Skip to content

Commit

Permalink
Warm up models before first document indexed (#333)
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 authored Aug 25, 2023
1 parent cb13f5b commit 384bf1b
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 1 deletion.
4 changes: 4 additions & 0 deletions backend/danswer/background/update.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
from danswer.db.models import Connector
from danswer.db.models import IndexAttempt
from danswer.db.models import IndexingStatus
from danswer.search.search_utils import warm_up_models
from danswer.utils.logger import IndexAttemptSingleton
from danswer.utils.logger import setup_logger

Expand Down Expand Up @@ -417,4 +418,7 @@ def update_loop(delay: int = 10, num_workers: int = NUM_INDEXING_WORKERS) -> Non


if __name__ == "__main__":
logger.info("Warming up Embedding Model(s)")
warm_up_models(indexer_only=True)
logger.info("Starting Indexing Loop")
update_loop()
6 changes: 5 additions & 1 deletion backend/danswer/search/search_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,10 +61,14 @@ def get_default_intent_model() -> TFDistilBertForSequenceClassification:
return _INTENT_MODEL


def warm_up_models() -> None:
def warm_up_models(indexer_only: bool = False) -> None:
warm_up_str = "Danswer is amazing"
get_default_tokenizer()(warm_up_str)
get_default_embedding_model().encode(warm_up_str)

if indexer_only:
return

cross_encoders = get_default_reranking_model_ensemble()
[
cross_encoder.predict((warm_up_str, warm_up_str))
Expand Down

1 comment on commit 384bf1b

@vercel
Copy link

@vercel vercel bot commented on 384bf1b Aug 25, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please sign in to comment.