From d78609196510eaa4149f6478db0a6f21614647a3 Mon Sep 17 00:00:00 2001 From: Panagiotis Bailis Date: Thu, 31 Oct 2024 15:38:14 +0200 Subject: [PATCH] Adding text_similarity_reranker retriever specification (#3057) --- output/openapi/elasticsearch-openapi.json | 45 +++++- .../elasticsearch-serverless-openapi.json | 45 +++++- output/schema/schema-serverless.json | 129 +++++++++++++++--- output/schema/schema.json | 129 +++++++++++++++--- output/typescript/types.ts | 11 +- specification/_global/search/SearchRequest.ts | 4 +- specification/_types/Retriever.ts | 19 ++- 7 files changed, 331 insertions(+), 51 deletions(-) diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json index 2d5e78cca7..d10d323b03 100644 --- a/output/openapi/elasticsearch-openapi.json +++ b/output/openapi/elasticsearch-openapi.json @@ -80833,6 +80833,9 @@ }, "rrf": { "$ref": "#/components/schemas/_types:RRFRetriever" + }, + "text_similarity_reranker": { + "$ref": "#/components/schemas/_types:TextSimilarityReranker" } }, "minProperties": 1, @@ -80859,10 +80862,6 @@ "sort": { "$ref": "#/components/schemas/_types:Sort" }, - "min_score": { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "type": "number" - }, "collapse": { "$ref": "#/components/schemas/_global.search._types:FieldCollapse" } @@ -80886,6 +80885,10 @@ } } ] + }, + "min_score": { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "type": "number" } } }, @@ -80958,6 +80961,40 @@ } ] }, + "_types:TextSimilarityReranker": { + "allOf": [ + { + "$ref": "#/components/schemas/_types:RetrieverBase" + }, + { + "type": "object", + "properties": { + "retriever": { + "$ref": "#/components/schemas/_types:RetrieverContainer" + }, + "rank_window_size": { + "description": "This value determines how many documents we will consider from the nested retriever.", + "type": "number" + }, + "inference_id": { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "type": "string" + }, + "inference_text": { + "description": "The text snippet used as the basis for similarity comparison", + "type": "string" + }, + "field": { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "type": "string" + } + }, + "required": [ + "retriever" + ] + } + ] + }, "search_application._types:SearchApplication": { "allOf": [ { diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json index 04e86fac42..88e0db7e4d 100644 --- a/output/openapi/elasticsearch-serverless-openapi.json +++ b/output/openapi/elasticsearch-serverless-openapi.json @@ -53178,6 +53178,9 @@ }, "rrf": { "$ref": "#/components/schemas/_types:RRFRetriever" + }, + "text_similarity_reranker": { + "$ref": "#/components/schemas/_types:TextSimilarityReranker" } }, "minProperties": 1, @@ -53204,10 +53207,6 @@ "sort": { "$ref": "#/components/schemas/_types:Sort" }, - "min_score": { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "type": "number" - }, "collapse": { "$ref": "#/components/schemas/_global.search._types:FieldCollapse" } @@ -53231,6 +53230,10 @@ } } ] + }, + "min_score": { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "type": "number" } } }, @@ -53303,6 +53306,40 @@ } ] }, + "_types:TextSimilarityReranker": { + "allOf": [ + { + "$ref": "#/components/schemas/_types:RetrieverBase" + }, + { + "type": "object", + "properties": { + "retriever": { + "$ref": "#/components/schemas/_types:RetrieverContainer" + }, + "rank_window_size": { + "description": "This value determines how many documents we will consider from the nested retriever.", + "type": "number" + }, + "inference_id": { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "type": "string" + }, + "inference_text": { + "description": "The text snippet used as the basis for similarity comparison", + "type": "string" + }, + "field": { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "type": "string" + } + }, + "required": [ + "retriever" + ] + } + ] + }, "search_application._types:SearchApplication": { "allOf": [ { diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json index 6cdd3373f5..80d3d4b86c 100644 --- a/output/schema/schema-serverless.json +++ b/output/schema/schema-serverless.json @@ -36176,9 +36176,12 @@ }, { "availability": { - "serverless": {}, + "serverless": { + "stability": "stable" + }, "stack": { - "since": "8.14.0" + "since": "8.14.0", + "stability": "stable" } }, "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.", @@ -138597,9 +138600,21 @@ "namespace": "_types" } } + }, + { + "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI", + "name": "text_similarity_reranker", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TextSimilarityReranker", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L26-L36", + "specLocation": "_types/Retriever.ts#L26-L38", "variants": { "kind": "container" } @@ -138665,18 +138680,6 @@ } } }, - { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "name": "min_score", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, { "description": "Collapses the top documents by a specified key into a single top document per key.", "name": "collapse", @@ -138690,7 +138693,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L43-L56" + "specLocation": "_types/Retriever.ts#L47-L58" }, { "kind": "interface", @@ -138725,9 +138728,21 @@ ], "kind": "union_of" } + }, + { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "name": "min_score", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L38-L41" + "specLocation": "_types/Retriever.ts#L40-L45" }, { "inherits": { @@ -138815,7 +138830,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L58-L71" + "specLocation": "_types/Retriever.ts#L60-L73" }, { "inherits": { @@ -138870,7 +138885,83 @@ } } ], - "specLocation": "_types/Retriever.ts#L73-L80" + "specLocation": "_types/Retriever.ts#L75-L82" + }, + { + "inherits": { + "type": { + "name": "RetrieverBase", + "namespace": "_types" + } + }, + "kind": "interface", + "name": { + "name": "TextSimilarityReranker", + "namespace": "_types" + }, + "properties": [ + { + "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.", + "name": "retriever", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "RetrieverContainer", + "namespace": "_types" + } + } + }, + { + "description": "This value determines how many documents we will consider from the nested retriever.", + "name": "rank_window_size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The text snippet used as the basis for similarity comparison", + "name": "inference_text", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "name": "field", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L84-L95" }, { "inherits": { diff --git a/output/schema/schema.json b/output/schema/schema.json index ffcf25fed7..c08f9db232 100644 --- a/output/schema/schema.json +++ b/output/schema/schema.json @@ -32227,9 +32227,12 @@ }, { "availability": { - "serverless": {}, + "serverless": { + "stability": "stable" + }, "stack": { - "since": "8.14.0" + "since": "8.14.0", + "stability": "stable" } }, "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.", @@ -45906,7 +45909,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L58-L71" + "specLocation": "_types/Retriever.ts#L60-L73" }, { "kind": "interface", @@ -47413,7 +47416,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L73-L80" + "specLocation": "_types/Retriever.ts#L75-L82" }, { "kind": "interface", @@ -47838,9 +47841,21 @@ } ] } + }, + { + "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", + "name": "min_score", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "float", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L38-L41" + "specLocation": "_types/Retriever.ts#L40-L45" }, { "kind": "interface", @@ -47884,9 +47899,21 @@ "namespace": "_types" } } + }, + { + "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI", + "name": "text_similarity_reranker", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "TextSimilarityReranker", + "namespace": "_types" + } + } } ], - "specLocation": "_types/Retriever.ts#L26-L36", + "specLocation": "_types/Retriever.ts#L26-L38", "variants": { "kind": "container" } @@ -49522,18 +49549,6 @@ } } }, - { - "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.", - "name": "min_score", - "required": false, - "type": { - "kind": "instance_of", - "type": { - "name": "float", - "namespace": "_types" - } - } - }, { "description": "Collapses the top documents by a specified key into a single top document per key.", "name": "collapse", @@ -49547,7 +49562,7 @@ } } ], - "specLocation": "_types/Retriever.ts#L43-L56" + "specLocation": "_types/Retriever.ts#L47-L58" }, { "kind": "interface", @@ -49838,6 +49853,82 @@ ], "specLocation": "_types/Knn.ts#L79-L82" }, + { + "kind": "interface", + "inherits": { + "type": { + "name": "RetrieverBase", + "namespace": "_types" + } + }, + "name": { + "name": "TextSimilarityReranker", + "namespace": "_types" + }, + "properties": [ + { + "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.", + "name": "retriever", + "required": true, + "type": { + "kind": "instance_of", + "type": { + "name": "RetrieverContainer", + "namespace": "_types" + } + } + }, + { + "description": "This value determines how many documents we will consider from the nested retriever.", + "name": "rank_window_size", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "integer", + "namespace": "_types" + } + } + }, + { + "description": "Unique identifier of the inference endpoint created using the inference API.", + "name": "inference_id", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The text snippet used as the basis for similarity comparison", + "name": "inference_text", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + }, + { + "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text", + "name": "field", + "required": false, + "type": { + "kind": "instance_of", + "type": { + "name": "string", + "namespace": "_builtins" + } + } + } + ], + "specLocation": "_types/Retriever.ts#L84-L95" + }, { "kind": "enum", "members": [ diff --git a/output/typescript/types.ts b/output/typescript/types.ts index 5864413f9a..4e41886814 100644 --- a/output/typescript/types.ts +++ b/output/typescript/types.ts @@ -2697,12 +2697,14 @@ export interface Retries { export interface RetrieverBase { filter?: QueryDslQueryContainer | QueryDslQueryContainer[] + min_score?: float } export interface RetrieverContainer { standard?: StandardRetriever knn?: KnnRetriever rrf?: RRFRetriever + text_similarity_reranker?: TextSimilarityReranker } export type Routing = string @@ -2867,7 +2869,6 @@ export interface StandardRetriever extends RetrieverBase { search_after?: SortResults terminate_after?: integer sort?: Sort - min_score?: float collapse?: SearchFieldCollapse } @@ -2904,6 +2905,14 @@ export interface TextEmbedding { model_text: string } +export interface TextSimilarityReranker extends RetrieverBase { + retriever: RetrieverContainer + rank_window_size?: integer + inference_id?: string + inference_text?: string + field?: string +} + export type ThreadType = 'cpu' | 'wait' | 'block' | 'gpu' | 'mem' export type TimeOfDay = string diff --git a/specification/_global/search/SearchRequest.ts b/specification/_global/search/SearchRequest.ts index 9fec2b244d..b2719a6b29 100644 --- a/specification/_global/search/SearchRequest.ts +++ b/specification/_global/search/SearchRequest.ts @@ -428,8 +428,8 @@ export interface Request extends RequestBase { rescore?: Rescore | Rescore[] /** * A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn. - * @availability stack since=8.14.0 - * @availability serverless + * @availability stack since=8.14.0 stability=stable + * @availability serverless stability=stable */ retriever?: RetrieverContainer /** diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts index 5118033924..42d5c5f232 100644 --- a/specification/_types/Retriever.ts +++ b/specification/_types/Retriever.ts @@ -33,11 +33,15 @@ export class RetrieverContainer { knn?: KnnRetriever /** A retriever that produces top documents from reciprocal rank fusion (RRF). */ rrf?: RRFRetriever + /** A retriever that reranks the top documents based on a reranking model using the InferenceAPI */ + text_similarity_reranker?: TextSimilarityReranker } export class RetrieverBase { /** Query to filter the documents that can match. */ filter?: QueryContainer | QueryContainer[] + /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */ + min_score?: float } export class StandardRetriever extends RetrieverBase { @@ -49,8 +53,6 @@ export class StandardRetriever extends RetrieverBase { terminate_after?: integer /** A sort object that that specifies the order of matching documents. */ sort?: Sort - /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */ - min_score?: float /** Collapses the top documents by a specified key into a single top document per key. */ collapse?: FieldCollapse } @@ -78,3 +80,16 @@ export class RRFRetriever extends RetrieverBase { /** This value determines the size of the individual result sets per query. */ rank_window_size?: integer } + +export class TextSimilarityReranker extends RetrieverBase { + /** The nested retriever which will produce the first-level results, that will later be used for reranking. */ + retriever: RetrieverContainer + /** This value determines how many documents we will consider from the nested retriever. */ + rank_window_size?: integer + /** Unique identifier of the inference endpoint created using the inference API. */ + inference_id?: string + /** The text snippet used as the basis for similarity comparison */ + inference_text?: string + /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text */ + field?: string +}