From b40a747dd0d3cdd2657d13a62f619f1823b0b089 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Mon, 30 Dec 2024 15:09:28 -0600 Subject: [PATCH] fix(search): Use proposed decay values and added a min_score value --- cl/lib/elasticsearch_utils.py | 17 +++++++++++++--- cl/search/constants.py | 23 +++++++++++++++------- cl/search/tests/tests_es_opinion.py | 4 ++-- cl/search/tests/tests_es_oral_arguments.py | 4 ++-- cl/search/tests/tests_es_recap.py | 4 ++-- cl/tests/cases.py | 3 ++- 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/cl/lib/elasticsearch_utils.py b/cl/lib/elasticsearch_utils.py index af76c5e220..d08d3e675d 100644 --- a/cl/lib/elasticsearch_utils.py +++ b/cl/lib/elasticsearch_utils.py @@ -947,6 +947,7 @@ def build_decay_relevance_score( decay: float, default_missing_date: str = "1600-01-01T00:00:00Z", boost_mode: str = "multiply", + min_score: float = 0.0, ) -> QueryString: """ Build a decay relevance score query for Elasticsearch that adjusts the @@ -960,6 +961,7 @@ def build_decay_relevance_score( is null. :param boost_mode: The mode to combine the decay score with the query's original relevance score. + :param min_score: The minimum score where the decay function stabilizes. :return: The modified QueryString object with applied function score. """ @@ -972,9 +974,9 @@ def build_decay_relevance_score( def default_missing_date = Instant.parse(params.default_missing_date).toEpochMilli(); def decay = (double)params.decay; def now = new Date().getTime(); + def min_score = (double)params.min_score; // Convert scale parameter into milliseconds. - def scaleStr = params.scale; double years = (double)params.scale; // Convert years to milliseconds 1 year = 365 days long scaleMillis = (long)(years * 365 * 24 * 60 * 60 * 1000); @@ -989,12 +991,15 @@ def lambda = Math.log(decay) / scaleMillis; // Absolute distance from now def diff = Math.abs(docDate - now); // Score: exp( λ * max(0, |docDate - now|) ) - return Math.exp(lambda * diff); + def decay_score = Math.exp(lambda * diff); + // Adjust the decay score to have a minimum value + return min_score + ((1 - min_score) * decay_score); """, "params": { "default_missing_date": default_missing_date, "scale": scale, # Years "decay": decay, + "min_score": min_score, }, }, }, @@ -2605,8 +2610,14 @@ def apply_custom_score_to_main_query( date_field = str(valid_decay_relevance_types[cd["type"]]["field"]) scale = int(valid_decay_relevance_types[cd["type"]]["scale"]) decay = float(valid_decay_relevance_types[cd["type"]]["decay"]) + min_score = float(valid_decay_relevance_types[cd["type"]]["min_score"]) query = build_decay_relevance_score( - query, date_field, scale=scale, decay=decay, boost_mode=boost_mode + query, + date_field, + scale=scale, + decay=decay, + boost_mode=boost_mode, + min_score=min_score, ) return query diff --git a/cl/search/constants.py b/cl/search/constants.py index 78b02bcf3c..adb2f697d2 100644 --- a/cl/search/constants.py +++ b/cl/search/constants.py @@ -281,22 +281,31 @@ SEARCH_TYPES.OPINION: { "field": "dateFiled", "scale": 50, - "decay": 0.5, + "decay": 0.2, + "min_score": 0.1, + }, + SEARCH_TYPES.RECAP: { + "field": "dateFiled", + "scale": 20, + "decay": 0.2, + "min_score": 0.1, }, - SEARCH_TYPES.RECAP: {"field": "dateFiled", "scale": 50, "decay": 0.5}, SEARCH_TYPES.DOCKETS: { "field": "dateFiled", - "scale": 50, - "decay": 0.5, + "scale": 20, + "decay": 0.2, + "min_score": 0.1, }, SEARCH_TYPES.RECAP_DOCUMENT: { "field": "dateFiled", - "scale": 50, - "decay": 0.5, + "scale": 20, + "decay": 0.2, + "min_score": 0.1, }, SEARCH_TYPES.ORAL_ARGUMENT: { "field": "dateArgued", "scale": 50, - "decay": 0.5, + "decay": 0.2, + "min_score": 0.1, }, } diff --git a/cl/search/tests/tests_es_opinion.py b/cl/search/tests/tests_es_opinion.py index 936ded9b6d..358f7c2725 100644 --- a/cl/search/tests/tests_es_opinion.py +++ b/cl/search/tests/tests_es_opinion.py @@ -2286,7 +2286,7 @@ def setUpTestData(cls): case_name="Keyword Match", case_name_full="", case_name_short="", - date_filed=datetime.date(1732, 2, 23), + date_filed=datetime.date(1832, 2, 23), procedural_history="", source="C", attorneys="", @@ -2372,7 +2372,7 @@ def setUpTestData(cls): case_name="Ipsum Dolor Terms", case_name_full="", case_name_short="", - date_filed=datetime.date(1800, 2, 23), + date_filed=datetime.date(1900, 2, 23), procedural_history="More Ipsum Dolor Terms", source="C", attorneys="More Ipsum Dolor Terms", diff --git a/cl/search/tests/tests_es_oral_arguments.py b/cl/search/tests/tests_es_oral_arguments.py index 7dd4b4647f..1147fa92e7 100644 --- a/cl/search/tests/tests_es_oral_arguments.py +++ b/cl/search/tests/tests_es_oral_arguments.py @@ -2504,7 +2504,7 @@ def setUpTestData(cls): with cls.captureOnCommitCallbacks(execute=True): cls.docket_old = DocketFactory.create( docket_number="1:21-bk-1235", - date_argued=datetime.date(1732, 2, 23), + date_argued=datetime.date(1832, 2, 23), ) cls.audio_old = AudioFactory.create( case_name="Keyword Match", @@ -2585,7 +2585,7 @@ def setUpTestData(cls): cls.docket_high_relevance_old_date = DocketFactory.create( case_name="Ipsum Dolor Terms", docket_number="1:21-bk-1239", - date_argued=datetime.date(1800, 2, 23), + date_argued=datetime.date(1900, 2, 23), ) cls.audio_high_relevance_old_date = AudioFactory.create( case_name="Ipsum Dolor Terms", diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py index 3cd5a008dd..91981feb77 100644 --- a/cl/search/tests/tests_es_recap.py +++ b/cl/search/tests/tests_es_recap.py @@ -2872,7 +2872,7 @@ def setUpTestData(cls): case_name_short="", docket_number="1:21-bk-1235", source=Docket.RECAP, - date_filed=datetime.date(1732, 2, 23), + date_filed=datetime.date(1832, 2, 23), ) cls.rd_old = RECAPDocumentFactory( docket_entry=DocketEntryWithParentsFactory( @@ -2955,7 +2955,7 @@ def setUpTestData(cls): source=Docket.RECAP, nature_of_suit="More Ipsum Dolor Terms", cause="More Ipsum Dolor Terms", - date_filed=datetime.date(1800, 2, 23), + date_filed=datetime.date(1900, 2, 23), ) cls.rd_high_relevance_old_date = RECAPDocumentFactory( docket_entry=DocketEntryWithParentsFactory( diff --git a/cl/tests/cases.py b/cl/tests/cases.py index 184555abb5..cdd93358db 100644 --- a/cl/tests/cases.py +++ b/cl/tests/cases.py @@ -372,7 +372,8 @@ def _test_results_ordering(self, test, field, version="v4"): self.assertEqual( actual_order, test[expected_order_key], - msg=f"Expected order {test[expected_order_key]}, but got {actual_order}", + msg=f"Expected order {test[expected_order_key]}, but got {actual_order} for " + f"Search type: {test["search_params"]["type"]}", ) def _assert_order_in_html(