Skip to content

Commit

Permalink
boost added
Browse files Browse the repository at this point in the history
  • Loading branch information
yuhongsun96 committed Aug 31, 2023
1 parent 681eb6e commit 69e7569
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 1 deletion.
3 changes: 3 additions & 0 deletions backend/danswer/chunking/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from typing import cast

from danswer.configs.constants import BLURB
from danswer.configs.constants import BOOST
from danswer.configs.constants import METADATA
from danswer.configs.constants import SEMANTIC_IDENTIFIER
from danswer.configs.constants import SOURCE_LINKS
Expand Down Expand Up @@ -57,6 +58,7 @@ class InferenceChunk(BaseChunk):
document_id: str
source_type: str
semantic_identifier: str
boost: float
metadata: dict[str, Any]

@classmethod
Expand All @@ -78,6 +80,7 @@ def from_dict(cls, init_dict: dict[str, Any]) -> "InferenceChunk":
init_kwargs[METADATA] = json.loads(init_kwargs[METADATA])
else:
init_kwargs[METADATA] = {}
init_kwargs[BOOST] = init_kwargs.get(BOOST, 1)
if init_kwargs.get(SEMANTIC_IDENTIFIER) is None:
logger.error(
f"Chunk with blurb: {init_kwargs.get(BLURB, 'Unknown')[:50]}... has no Semantic Identifier"
Expand Down
3 changes: 2 additions & 1 deletion backend/danswer/search/semantic_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,8 @@ def semantic_reranking(
encoder.predict([(query, chunk.content) for chunk in chunks]) # type: ignore
for encoder in cross_encoders
]
averaged_sim_scores = sum(sim_scores) / len(sim_scores)
boosts = [chunk.boost for chunk in chunks]
averaged_sim_scores = sum(sim_scores) * boosts / len(sim_scores)
scored_results = list(zip(averaged_sim_scores, chunks))
scored_results.sort(key=lambda x: x[0], reverse=True)
ranked_sim_scores, ranked_chunks = zip(*scored_results)
Expand Down

0 comments on commit 69e7569

Please sign in to comment.