-
Notifications
You must be signed in to change notification settings - Fork 1.3k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changed default local model to nomic #1943
Changes from all commits
19222d7
8a865b6
5839826
5153d2d
4103a07
85568cd
bdfe36e
c129c90
3e9f2a4
1f53351
4bb9ef2
a472f4c
0b2e760
b5886f4
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -12,7 +12,7 @@ | |
# The useable models configured as below must be SentenceTransformer compatible | ||
# NOTE: DO NOT CHANGE SET THESE UNLESS YOU KNOW WHAT YOU ARE DOING | ||
# IDEALLY, YOU SHOULD CHANGE EMBEDDING MODELS VIA THE UI | ||
DEFAULT_DOCUMENT_ENCODER_MODEL = "intfloat/e5-base-v2" | ||
DEFAULT_DOCUMENT_ENCODER_MODEL = "nomic-ai/nomic-embed-text-v1" | ||
DOCUMENT_ENCODER_MODEL = ( | ||
os.environ.get("DOCUMENT_ENCODER_MODEL") or DEFAULT_DOCUMENT_ENCODER_MODEL | ||
) | ||
|
@@ -34,8 +34,8 @@ | |
SIM_SCORE_RANGE_LOW = float(os.environ.get("SIM_SCORE_RANGE_LOW") or 0.0) | ||
SIM_SCORE_RANGE_HIGH = float(os.environ.get("SIM_SCORE_RANGE_HIGH") or 1.0) | ||
# Certain models like e5, BGE, etc use a prefix for asymmetric retrievals (query generally shorter than docs) | ||
ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "query: ") | ||
ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "passage: ") | ||
ASYM_QUERY_PREFIX = os.environ.get("ASYM_QUERY_PREFIX", "search_query: ") | ||
ASYM_PASSAGE_PREFIX = os.environ.get("ASYM_PASSAGE_PREFIX", "search_document: ") | ||
# Purely an optimization, memory limitation consideration | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. these are the defaults for nomic-ai/nomic-embed-text-v1 |
||
BATCH_SIZE_ENCODE_CHUNKS = 8 | ||
# For score display purposes, only way is to know the expected ranges | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -116,8 +116,9 @@ def get_tokenizer(model_name: str | None, provider_type: str | None) -> BaseToke | |
if provider_type.lower() == "openai": | ||
# Used across ada and text-embedding-3 models | ||
return _check_tokenizer_cache("openai") | ||
# If we are given a cloud provider_type that isn't OpenAI, we default to trying to use the model_name | ||
# this means we are approximating the token count which may leave some performance on the table | ||
|
||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. general note |
||
# If we are given a cloud provider_type that isn't OpenAI, we default to trying to use the model_name | ||
if not model_name: | ||
raise ValueError("Need to provide a model_name or provider_type") | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -199,7 +199,14 @@ def get_embedding_model( | |
|
||
if model_name not in _GLOBAL_MODELS_DICT: | ||
logger.info(f"Loading {model_name}") | ||
model = SentenceTransformer(model_name) | ||
# Some model architectures that aren't built into the Transformers or Sentence | ||
# Transformer need to be downloaded to be loaded locally. This does not mean | ||
# data is sent to remote servers for inference, however the remote code can | ||
# be fairly arbitrary so only use trusted models | ||
model = SentenceTransformer( | ||
model_name_or_path=model_name, | ||
trust_remote_code=True, | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would add a comment here: |
||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This is related to the needing to also install nomic-bert-2048 Not 100% sure though |
||
model.max_seq_length = max_context_length | ||
_GLOBAL_MODELS_DICT[model_name] = model | ||
elif max_context_length != _GLOBAL_MODELS_DICT[model_name].max_seq_length: | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,3 +1,4 @@ | ||
einops==0.8.0 | ||
fastapi==0.109.2 | ||
h5py==3.9.0 | ||
pydantic==1.10.13 | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's better to combine these into a single layer. If you do a single RUN it creates a single layer that can be cached.