Skip to content

Commit

Permalink
fix(search): Use proposed decay values and added a min_score value
Browse files Browse the repository at this point in the history
  • Loading branch information
albertisfu committed Dec 30, 2024
1 parent d327f12 commit b40a747
Show file tree
Hide file tree
Showing 6 changed files with 38 additions and 17 deletions.
17 changes: 14 additions & 3 deletions cl/lib/elasticsearch_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -947,6 +947,7 @@ def build_decay_relevance_score(
decay: float,
default_missing_date: str = "1600-01-01T00:00:00Z",
boost_mode: str = "multiply",
min_score: float = 0.0,
) -> QueryString:
"""
Build a decay relevance score query for Elasticsearch that adjusts the
Expand All @@ -960,6 +961,7 @@ def build_decay_relevance_score(
is null.
:param boost_mode: The mode to combine the decay score with the query's
original relevance score.
:param min_score: The minimum score where the decay function stabilizes.
:return: The modified QueryString object with applied function score.
"""

Expand All @@ -972,9 +974,9 @@ def build_decay_relevance_score(
def default_missing_date = Instant.parse(params.default_missing_date).toEpochMilli();
def decay = (double)params.decay;
def now = new Date().getTime();
def min_score = (double)params.min_score;
// Convert scale parameter into milliseconds.
def scaleStr = params.scale;
double years = (double)params.scale;
// Convert years to milliseconds 1 year = 365 days
long scaleMillis = (long)(years * 365 * 24 * 60 * 60 * 1000);
Expand All @@ -989,12 +991,15 @@ def lambda = Math.log(decay) / scaleMillis;
// Absolute distance from now
def diff = Math.abs(docDate - now);
// Score: exp( λ * max(0, |docDate - now|) )
return Math.exp(lambda * diff);
def decay_score = Math.exp(lambda * diff);
// Adjust the decay score to have a minimum value
return min_score + ((1 - min_score) * decay_score);
""",
"params": {
"default_missing_date": default_missing_date,
"scale": scale, # Years
"decay": decay,
"min_score": min_score,
},
},
},
Expand Down Expand Up @@ -2605,8 +2610,14 @@ def apply_custom_score_to_main_query(
date_field = str(valid_decay_relevance_types[cd["type"]]["field"])
scale = int(valid_decay_relevance_types[cd["type"]]["scale"])
decay = float(valid_decay_relevance_types[cd["type"]]["decay"])
min_score = float(valid_decay_relevance_types[cd["type"]]["min_score"])
query = build_decay_relevance_score(
query, date_field, scale=scale, decay=decay, boost_mode=boost_mode
query,
date_field,
scale=scale,
decay=decay,
boost_mode=boost_mode,
min_score=min_score,
)
return query

Expand Down
23 changes: 16 additions & 7 deletions cl/search/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -281,22 +281,31 @@
SEARCH_TYPES.OPINION: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
"decay": 0.2,
"min_score": 0.1,
},
SEARCH_TYPES.RECAP: {
"field": "dateFiled",
"scale": 20,
"decay": 0.2,
"min_score": 0.1,
},
SEARCH_TYPES.RECAP: {"field": "dateFiled", "scale": 50, "decay": 0.5},
SEARCH_TYPES.DOCKETS: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
"scale": 20,
"decay": 0.2,
"min_score": 0.1,
},
SEARCH_TYPES.RECAP_DOCUMENT: {
"field": "dateFiled",
"scale": 50,
"decay": 0.5,
"scale": 20,
"decay": 0.2,
"min_score": 0.1,
},
SEARCH_TYPES.ORAL_ARGUMENT: {
"field": "dateArgued",
"scale": 50,
"decay": 0.5,
"decay": 0.2,
"min_score": 0.1,
},
}
4 changes: 2 additions & 2 deletions cl/search/tests/tests_es_opinion.py
Original file line number Diff line number Diff line change
Expand Up @@ -2286,7 +2286,7 @@ def setUpTestData(cls):
case_name="Keyword Match",
case_name_full="",
case_name_short="",
date_filed=datetime.date(1732, 2, 23),
date_filed=datetime.date(1832, 2, 23),
procedural_history="",
source="C",
attorneys="",
Expand Down Expand Up @@ -2372,7 +2372,7 @@ def setUpTestData(cls):
case_name="Ipsum Dolor Terms",
case_name_full="",
case_name_short="",
date_filed=datetime.date(1800, 2, 23),
date_filed=datetime.date(1900, 2, 23),
procedural_history="More Ipsum Dolor Terms",
source="C",
attorneys="More Ipsum Dolor Terms",
Expand Down
4 changes: 2 additions & 2 deletions cl/search/tests/tests_es_oral_arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -2504,7 +2504,7 @@ def setUpTestData(cls):
with cls.captureOnCommitCallbacks(execute=True):
cls.docket_old = DocketFactory.create(
docket_number="1:21-bk-1235",
date_argued=datetime.date(1732, 2, 23),
date_argued=datetime.date(1832, 2, 23),
)
cls.audio_old = AudioFactory.create(
case_name="Keyword Match",
Expand Down Expand Up @@ -2585,7 +2585,7 @@ def setUpTestData(cls):
cls.docket_high_relevance_old_date = DocketFactory.create(
case_name="Ipsum Dolor Terms",
docket_number="1:21-bk-1239",
date_argued=datetime.date(1800, 2, 23),
date_argued=datetime.date(1900, 2, 23),
)
cls.audio_high_relevance_old_date = AudioFactory.create(
case_name="Ipsum Dolor Terms",
Expand Down
4 changes: 2 additions & 2 deletions cl/search/tests/tests_es_recap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2872,7 +2872,7 @@ def setUpTestData(cls):
case_name_short="",
docket_number="1:21-bk-1235",
source=Docket.RECAP,
date_filed=datetime.date(1732, 2, 23),
date_filed=datetime.date(1832, 2, 23),
)
cls.rd_old = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(
Expand Down Expand Up @@ -2955,7 +2955,7 @@ def setUpTestData(cls):
source=Docket.RECAP,
nature_of_suit="More Ipsum Dolor Terms",
cause="More Ipsum Dolor Terms",
date_filed=datetime.date(1800, 2, 23),
date_filed=datetime.date(1900, 2, 23),
)
cls.rd_high_relevance_old_date = RECAPDocumentFactory(
docket_entry=DocketEntryWithParentsFactory(
Expand Down
3 changes: 2 additions & 1 deletion cl/tests/cases.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,7 +372,8 @@ def _test_results_ordering(self, test, field, version="v4"):
self.assertEqual(
actual_order,
test[expected_order_key],
msg=f"Expected order {test[expected_order_key]}, but got {actual_order}",
msg=f"Expected order {test[expected_order_key]}, but got {actual_order} for "
f"Search type: {test["search_params"]["type"]}",
)

def _assert_order_in_html(
Expand Down

0 comments on commit b40a747

Please sign in to comment.