From 0bc21e21b1e123c25baea5b1394e4083e4733c6f Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Thu, 15 Aug 2024 17:11:14 -0500 Subject: [PATCH 1/3] fix(elasticsearch): Tweaked Opinions index to improve MLT queries --- cl/search/documents.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/cl/search/documents.py b/cl/search/documents.py index d7b18f9472..c78ca59824 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -1279,6 +1279,7 @@ class OpinionBaseDocument(Document): docketNumber = fields.TextField( analyzer="text_en_splitting_cl", term_vector="with_positions_offsets", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1290,6 +1291,7 @@ class OpinionBaseDocument(Document): ) caseName = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", term_vector="with_positions_offsets", fields={ "exact": fields.TextField( @@ -1302,6 +1304,7 @@ class OpinionBaseDocument(Document): ) caseNameFull = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1317,6 +1320,7 @@ class OpinionBaseDocument(Document): court_id = fields.TextField( analyzer="text_en_splitting_cl", search_analyzer="search_analyzer", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1327,6 +1331,7 @@ class OpinionBaseDocument(Document): ) court = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1343,6 +1348,7 @@ class OpinionBaseDocument(Document): ) judge = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1360,6 +1366,7 @@ class OpinionBaseDocument(Document): ) attorney = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1370,6 +1377,7 @@ class OpinionBaseDocument(Document): ) suitNature = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", term_vector="with_positions_offsets", fields={ "exact": fields.TextField( @@ -1384,6 +1392,7 @@ class OpinionBaseDocument(Document): fields.TextField( analyzer="text_en_splitting_cl", term_vector="with_positions_offsets", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1399,6 +1408,7 @@ class OpinionBaseDocument(Document): status = fields.TextField( analyzer="text_en_splitting_cl", search_analyzer="search_analyzer", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1409,6 +1419,7 @@ class OpinionBaseDocument(Document): ) procedural_history = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1419,6 +1430,7 @@ class OpinionBaseDocument(Document): ) posture = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1429,6 +1441,7 @@ class OpinionBaseDocument(Document): ) syllabus = fields.TextField( analyzer="text_en_splitting_cl", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1596,6 +1609,7 @@ class OpinionDocument(OpinionBaseDocument): text = fields.TextField( analyzer="text_en_splitting_cl", term_vector="with_positions_offsets", + copy_to="combined_fields", fields={ "exact": fields.TextField( analyzer="english_exact", @@ -1612,6 +1626,11 @@ class OpinionDocument(OpinionBaseDocument): joined_by_ids = fields.ListField( fields.IntegerField(multi=True), ) + combined_fields = fields.TextField( + analyzer="english_exact", + search_analyzer="search_analyzer_exact", + term_vector="yes", + ) class Django: model = Opinion From e5f28743220fb3577b8a127b658bb3ffce336665 Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Thu, 15 Aug 2024 17:35:20 -0500 Subject: [PATCH 2/3] fix(elasticsearch): Exclude combined_fields from Opinions Search API responses --- cl/search/api_serializers.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py index 1f9cbb7d75..cd61517f2e 100644 --- a/cl/search/api_serializers.py +++ b/cl/search/api_serializers.py @@ -432,6 +432,7 @@ class Meta: "procedural_history", "panel_names", "sha1", + "combined_fields", ) @@ -576,6 +577,7 @@ class Meta: "local_path", "sha1", "cites", + "combined_fields", ) @@ -606,6 +608,7 @@ class Meta: "cluster_child", "date_created", "timestamp", + "combined_fields", ) From 6641d2957e986b27930739ed794ea65910dc993f Mon Sep 17 00:00:00 2001 From: Alberto Islas Date: Thu, 15 Aug 2024 17:55:32 -0500 Subject: [PATCH 3/3] fix(elasticsearch): Removed combined_fields in V4 Opinions Search API --- cl/search/api_serializers.py | 1 - 1 file changed, 1 deletion(-) diff --git a/cl/search/api_serializers.py b/cl/search/api_serializers.py index cd61517f2e..c4ccb4fec0 100644 --- a/cl/search/api_serializers.py +++ b/cl/search/api_serializers.py @@ -577,7 +577,6 @@ class Meta: "local_path", "sha1", "cites", - "combined_fields", )