From 1a1c9fd0207ea1fca5a90daff16d82c1274a63dd Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya Date: Tue, 9 Mar 2021 13:43:10 -0500 Subject: [PATCH 1/7] loading index from disk trial --- semantic_search/main.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 09cb1f6..6fd0d9e 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -2,6 +2,7 @@ from typing import Dict, List, Optional, Tuple, Union, cast import faiss +from faiss.swigfaiss import read_index import torch from fastapi import FastAPI from pydantic import BaseSettings @@ -32,6 +33,7 @@ class Settings(BaseSettings): pretrained_model_name_or_path: str = "johngiorgi/declutr-sci-base" batch_size: int = 64 max_length: Optional[int] = None + file_path: Optional[str] = None mean_pool: bool = True cuda_device: int = -1 @@ -80,7 +82,10 @@ def app_startup(): settings.pretrained_model_name_or_path, cuda_device=settings.cuda_device ) embedding_dim = model.model.config.hidden_size - model.index = setup_faiss_index(embedding_dim) + if settings.file_path != None: + model.index = read_index(settings.file_path) + else: + model.index = setup_faiss_index(embedding_dim) @app.post("/") From 140d0854f41f3b2ca7231ffec25bc60180e816f5 Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya Date: Wed, 10 Mar 2021 14:08:57 -0500 Subject: [PATCH 2/7] updated flake8 fix --- semantic_search/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 6fd0d9e..5d7a9bd 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -82,7 +82,7 @@ def app_startup(): settings.pretrained_model_name_or_path, cuda_device=settings.cuda_device ) embedding_dim = model.model.config.hidden_size - if settings.file_path != None: + if settings.file_path is not None: model.index = read_index(settings.file_path) else: model.index = setup_faiss_index(embedding_dim) From fd12951a50f20d2f3e0d83104e4ec3b118146f06 Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya <57552053+Anwesh1@users.noreply.github.com> Date: Wed, 10 Mar 2021 14:44:00 -0500 Subject: [PATCH 3/7] Update semantic_search/main.py Co-authored-by: John Giorgi --- semantic_search/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 5d7a9bd..90817b7 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -33,7 +33,7 @@ class Settings(BaseSettings): pretrained_model_name_or_path: str = "johngiorgi/declutr-sci-base" batch_size: int = 64 max_length: Optional[int] = None - file_path: Optional[str] = None + serialized_index_path: Optional[str] = None mean_pool: bool = True cuda_device: int = -1 From f7ad7bd4a722f72516d57f42601f80848ed432ed Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya <57552053+Anwesh1@users.noreply.github.com> Date: Wed, 10 Mar 2021 14:44:07 -0500 Subject: [PATCH 4/7] Update semantic_search/main.py Co-authored-by: John Giorgi --- semantic_search/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 90817b7..b879e23 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -82,7 +82,7 @@ def app_startup(): settings.pretrained_model_name_or_path, cuda_device=settings.cuda_device ) embedding_dim = model.model.config.hidden_size - if settings.file_path is not None: + if settings.serialized_index_path is not None: model.index = read_index(settings.file_path) else: model.index = setup_faiss_index(embedding_dim) From bdd22dbb51f0f73d8066fd3e768601e10ff4cf8f Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya <57552053+Anwesh1@users.noreply.github.com> Date: Wed, 10 Mar 2021 14:44:26 -0500 Subject: [PATCH 5/7] Update semantic_search/main.py Co-authored-by: John Giorgi --- semantic_search/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index b879e23..329d4ce 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -83,7 +83,7 @@ def app_startup(): ) embedding_dim = model.model.config.hidden_size if settings.serialized_index_path is not None: - model.index = read_index(settings.file_path) + model.index = faiss.swigfaiss.read_index(settings.file_path) else: model.index = setup_faiss_index(embedding_dim) From c3cd2668060193392a8c24ddea0467924d670556 Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya <57552053+Anwesh1@users.noreply.github.com> Date: Wed, 10 Mar 2021 14:44:57 -0500 Subject: [PATCH 6/7] All required fixes done Co-authored-by: John Giorgi --- semantic_search/main.py | 1 - 1 file changed, 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 329d4ce..17e8aaa 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -2,7 +2,6 @@ from typing import Dict, List, Optional, Tuple, Union, cast import faiss -from faiss.swigfaiss import read_index import torch from fastapi import FastAPI from pydantic import BaseSettings From b357edb81061ee5593595cd110c5e43e0e84f988 Mon Sep 17 00:00:00 2001 From: Anweshi Anavadya <57552053+Anwesh1@users.noreply.github.com> Date: Wed, 10 Mar 2021 14:49:27 -0500 Subject: [PATCH 7/7] Grouped path variable names Co-authored-by: John Giorgi --- semantic_search/main.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/semantic_search/main.py b/semantic_search/main.py index 17e8aaa..c03915b 100644 --- a/semantic_search/main.py +++ b/semantic_search/main.py @@ -30,9 +30,9 @@ class Settings(BaseSettings): """ pretrained_model_name_or_path: str = "johngiorgi/declutr-sci-base" + serialized_index_path: Optional[str] = None batch_size: int = 64 max_length: Optional[int] = None - serialized_index_path: Optional[str] = None mean_pool: bool = True cuda_device: int = -1