From d78609196510eaa4149f6478db0a6f21614647a3 Mon Sep 17 00:00:00 2001
From: Panagiotis Bailis <pmpailis@gmail.com>
Date: Thu, 31 Oct 2024 15:38:14 +0200
Subject: [PATCH] Adding text_similarity_reranker retriever specification
 (#3057)

---
 output/openapi/elasticsearch-openapi.json     |  45 +++++-
 .../elasticsearch-serverless-openapi.json     |  45 +++++-
 output/schema/schema-serverless.json          | 129 +++++++++++++++---
 output/schema/schema.json                     | 129 +++++++++++++++---
 output/typescript/types.ts                    |  11 +-
 specification/_global/search/SearchRequest.ts |   4 +-
 specification/_types/Retriever.ts             |  19 ++-
 7 files changed, 331 insertions(+), 51 deletions(-)

diff --git a/output/openapi/elasticsearch-openapi.json b/output/openapi/elasticsearch-openapi.json
index 2d5e78cca7..d10d323b03 100644
--- a/output/openapi/elasticsearch-openapi.json
+++ b/output/openapi/elasticsearch-openapi.json
@@ -80833,6 +80833,9 @@
           },
           "rrf": {
             "$ref": "#/components/schemas/_types:RRFRetriever"
+          },
+          "text_similarity_reranker": {
+            "$ref": "#/components/schemas/_types:TextSimilarityReranker"
           }
         },
         "minProperties": 1,
@@ -80859,10 +80862,6 @@
               "sort": {
                 "$ref": "#/components/schemas/_types:Sort"
               },
-              "min_score": {
-                "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
-                "type": "number"
-              },
               "collapse": {
                 "$ref": "#/components/schemas/_global.search._types:FieldCollapse"
               }
@@ -80886,6 +80885,10 @@
                 }
               }
             ]
+          },
+          "min_score": {
+            "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
+            "type": "number"
           }
         }
       },
@@ -80958,6 +80961,40 @@
           }
         ]
       },
+      "_types:TextSimilarityReranker": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types:RetrieverBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "retriever": {
+                "$ref": "#/components/schemas/_types:RetrieverContainer"
+              },
+              "rank_window_size": {
+                "description": "This value determines how many documents we will consider from the nested retriever.",
+                "type": "number"
+              },
+              "inference_id": {
+                "description": "Unique identifier of the inference endpoint created using the inference API.",
+                "type": "string"
+              },
+              "inference_text": {
+                "description": "The text snippet used as the basis for similarity comparison",
+                "type": "string"
+              },
+              "field": {
+                "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text",
+                "type": "string"
+              }
+            },
+            "required": [
+              "retriever"
+            ]
+          }
+        ]
+      },
       "search_application._types:SearchApplication": {
         "allOf": [
           {
diff --git a/output/openapi/elasticsearch-serverless-openapi.json b/output/openapi/elasticsearch-serverless-openapi.json
index 04e86fac42..88e0db7e4d 100644
--- a/output/openapi/elasticsearch-serverless-openapi.json
+++ b/output/openapi/elasticsearch-serverless-openapi.json
@@ -53178,6 +53178,9 @@
           },
           "rrf": {
             "$ref": "#/components/schemas/_types:RRFRetriever"
+          },
+          "text_similarity_reranker": {
+            "$ref": "#/components/schemas/_types:TextSimilarityReranker"
           }
         },
         "minProperties": 1,
@@ -53204,10 +53207,6 @@
               "sort": {
                 "$ref": "#/components/schemas/_types:Sort"
               },
-              "min_score": {
-                "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
-                "type": "number"
-              },
               "collapse": {
                 "$ref": "#/components/schemas/_global.search._types:FieldCollapse"
               }
@@ -53231,6 +53230,10 @@
                 }
               }
             ]
+          },
+          "min_score": {
+            "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
+            "type": "number"
           }
         }
       },
@@ -53303,6 +53306,40 @@
           }
         ]
       },
+      "_types:TextSimilarityReranker": {
+        "allOf": [
+          {
+            "$ref": "#/components/schemas/_types:RetrieverBase"
+          },
+          {
+            "type": "object",
+            "properties": {
+              "retriever": {
+                "$ref": "#/components/schemas/_types:RetrieverContainer"
+              },
+              "rank_window_size": {
+                "description": "This value determines how many documents we will consider from the nested retriever.",
+                "type": "number"
+              },
+              "inference_id": {
+                "description": "Unique identifier of the inference endpoint created using the inference API.",
+                "type": "string"
+              },
+              "inference_text": {
+                "description": "The text snippet used as the basis for similarity comparison",
+                "type": "string"
+              },
+              "field": {
+                "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text",
+                "type": "string"
+              }
+            },
+            "required": [
+              "retriever"
+            ]
+          }
+        ]
+      },
       "search_application._types:SearchApplication": {
         "allOf": [
           {
diff --git a/output/schema/schema-serverless.json b/output/schema/schema-serverless.json
index 6cdd3373f5..80d3d4b86c 100644
--- a/output/schema/schema-serverless.json
+++ b/output/schema/schema-serverless.json
@@ -36176,9 +36176,12 @@
           },
           {
             "availability": {
-              "serverless": {},
+              "serverless": {
+                "stability": "stable"
+              },
               "stack": {
-                "since": "8.14.0"
+                "since": "8.14.0",
+                "stability": "stable"
               }
             },
             "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.",
@@ -138597,9 +138600,21 @@
               "namespace": "_types"
             }
           }
+        },
+        {
+          "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI",
+          "name": "text_similarity_reranker",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TextSimilarityReranker",
+              "namespace": "_types"
+            }
+          }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L26-L36",
+      "specLocation": "_types/Retriever.ts#L26-L38",
       "variants": {
         "kind": "container"
       }
@@ -138665,18 +138680,6 @@
             }
           }
         },
-        {
-          "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
-          "name": "min_score",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
         {
           "description": "Collapses the top documents by a specified key into a single top document per key.",
           "name": "collapse",
@@ -138690,7 +138693,7 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L43-L56"
+      "specLocation": "_types/Retriever.ts#L47-L58"
     },
     {
       "kind": "interface",
@@ -138725,9 +138728,21 @@
             ],
             "kind": "union_of"
           }
+        },
+        {
+          "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
+          "name": "min_score",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L38-L41"
+      "specLocation": "_types/Retriever.ts#L40-L45"
     },
     {
       "inherits": {
@@ -138815,7 +138830,7 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L58-L71"
+      "specLocation": "_types/Retriever.ts#L60-L73"
     },
     {
       "inherits": {
@@ -138870,7 +138885,83 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L73-L80"
+      "specLocation": "_types/Retriever.ts#L75-L82"
+    },
+    {
+      "inherits": {
+        "type": {
+          "name": "RetrieverBase",
+          "namespace": "_types"
+        }
+      },
+      "kind": "interface",
+      "name": {
+        "name": "TextSimilarityReranker",
+        "namespace": "_types"
+      },
+      "properties": [
+        {
+          "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.",
+          "name": "retriever",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RetrieverContainer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "This value determines how many documents we will consider from the nested retriever.",
+          "name": "rank_window_size",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Unique identifier of the inference endpoint created using the inference API.",
+          "name": "inference_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The text snippet used as the basis for similarity comparison",
+          "name": "inference_text",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text",
+          "name": "field",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/Retriever.ts#L84-L95"
     },
     {
       "inherits": {
diff --git a/output/schema/schema.json b/output/schema/schema.json
index ffcf25fed7..c08f9db232 100644
--- a/output/schema/schema.json
+++ b/output/schema/schema.json
@@ -32227,9 +32227,12 @@
           },
           {
             "availability": {
-              "serverless": {},
+              "serverless": {
+                "stability": "stable"
+              },
               "stack": {
-                "since": "8.14.0"
+                "since": "8.14.0",
+                "stability": "stable"
               }
             },
             "description": "A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.",
@@ -45906,7 +45909,7 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L58-L71"
+      "specLocation": "_types/Retriever.ts#L60-L73"
     },
     {
       "kind": "interface",
@@ -47413,7 +47416,7 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L73-L80"
+      "specLocation": "_types/Retriever.ts#L75-L82"
     },
     {
       "kind": "interface",
@@ -47838,9 +47841,21 @@
               }
             ]
           }
+        },
+        {
+          "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
+          "name": "min_score",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "float",
+              "namespace": "_types"
+            }
+          }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L38-L41"
+      "specLocation": "_types/Retriever.ts#L40-L45"
     },
     {
       "kind": "interface",
@@ -47884,9 +47899,21 @@
               "namespace": "_types"
             }
           }
+        },
+        {
+          "description": "A retriever that reranks the top documents based on a reranking model using the InferenceAPI",
+          "name": "text_similarity_reranker",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "TextSimilarityReranker",
+              "namespace": "_types"
+            }
+          }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L26-L36",
+      "specLocation": "_types/Retriever.ts#L26-L38",
       "variants": {
         "kind": "container"
       }
@@ -49522,18 +49549,6 @@
             }
           }
         },
-        {
-          "description": "Minimum _score for matching documents. Documents with a lower _score are not included in the top documents.",
-          "name": "min_score",
-          "required": false,
-          "type": {
-            "kind": "instance_of",
-            "type": {
-              "name": "float",
-              "namespace": "_types"
-            }
-          }
-        },
         {
           "description": "Collapses the top documents by a specified key into a single top document per key.",
           "name": "collapse",
@@ -49547,7 +49562,7 @@
           }
         }
       ],
-      "specLocation": "_types/Retriever.ts#L43-L56"
+      "specLocation": "_types/Retriever.ts#L47-L58"
     },
     {
       "kind": "interface",
@@ -49838,6 +49853,82 @@
       ],
       "specLocation": "_types/Knn.ts#L79-L82"
     },
+    {
+      "kind": "interface",
+      "inherits": {
+        "type": {
+          "name": "RetrieverBase",
+          "namespace": "_types"
+        }
+      },
+      "name": {
+        "name": "TextSimilarityReranker",
+        "namespace": "_types"
+      },
+      "properties": [
+        {
+          "description": "The nested retriever which will produce the first-level results, that will later be used for reranking.",
+          "name": "retriever",
+          "required": true,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "RetrieverContainer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "This value determines how many documents we will consider from the nested retriever.",
+          "name": "rank_window_size",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "integer",
+              "namespace": "_types"
+            }
+          }
+        },
+        {
+          "description": "Unique identifier of the inference endpoint created using the inference API.",
+          "name": "inference_id",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The text snippet used as the basis for similarity comparison",
+          "name": "inference_text",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        },
+        {
+          "description": "The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text",
+          "name": "field",
+          "required": false,
+          "type": {
+            "kind": "instance_of",
+            "type": {
+              "name": "string",
+              "namespace": "_builtins"
+            }
+          }
+        }
+      ],
+      "specLocation": "_types/Retriever.ts#L84-L95"
+    },
     {
       "kind": "enum",
       "members": [
diff --git a/output/typescript/types.ts b/output/typescript/types.ts
index 5864413f9a..4e41886814 100644
--- a/output/typescript/types.ts
+++ b/output/typescript/types.ts
@@ -2697,12 +2697,14 @@ export interface Retries {
 
 export interface RetrieverBase {
   filter?: QueryDslQueryContainer | QueryDslQueryContainer[]
+  min_score?: float
 }
 
 export interface RetrieverContainer {
   standard?: StandardRetriever
   knn?: KnnRetriever
   rrf?: RRFRetriever
+  text_similarity_reranker?: TextSimilarityReranker
 }
 
 export type Routing = string
@@ -2867,7 +2869,6 @@ export interface StandardRetriever extends RetrieverBase {
   search_after?: SortResults
   terminate_after?: integer
   sort?: Sort
-  min_score?: float
   collapse?: SearchFieldCollapse
 }
 
@@ -2904,6 +2905,14 @@ export interface TextEmbedding {
   model_text: string
 }
 
+export interface TextSimilarityReranker extends RetrieverBase {
+  retriever: RetrieverContainer
+  rank_window_size?: integer
+  inference_id?: string
+  inference_text?: string
+  field?: string
+}
+
 export type ThreadType = 'cpu' | 'wait' | 'block' | 'gpu' | 'mem'
 
 export type TimeOfDay = string
diff --git a/specification/_global/search/SearchRequest.ts b/specification/_global/search/SearchRequest.ts
index 9fec2b244d..b2719a6b29 100644
--- a/specification/_global/search/SearchRequest.ts
+++ b/specification/_global/search/SearchRequest.ts
@@ -428,8 +428,8 @@ export interface Request extends RequestBase {
     rescore?: Rescore | Rescore[]
     /**
      * A retriever is a specification to describe top documents returned from a search. A retriever replaces other elements of the search API that also return top documents such as query and knn.
-     * @availability stack since=8.14.0
-     * @availability serverless
+     * @availability stack since=8.14.0 stability=stable
+     * @availability serverless stability=stable
      */
     retriever?: RetrieverContainer
     /**
diff --git a/specification/_types/Retriever.ts b/specification/_types/Retriever.ts
index 5118033924..42d5c5f232 100644
--- a/specification/_types/Retriever.ts
+++ b/specification/_types/Retriever.ts
@@ -33,11 +33,15 @@ export class RetrieverContainer {
   knn?: KnnRetriever
   /** A retriever that produces top documents from reciprocal rank fusion (RRF). */
   rrf?: RRFRetriever
+  /** A retriever that reranks the top documents based on a reranking model using the InferenceAPI */
+  text_similarity_reranker?: TextSimilarityReranker
 }
 
 export class RetrieverBase {
   /** Query to filter the documents that can match. */
   filter?: QueryContainer | QueryContainer[]
+  /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */
+  min_score?: float
 }
 
 export class StandardRetriever extends RetrieverBase {
@@ -49,8 +53,6 @@ export class StandardRetriever extends RetrieverBase {
   terminate_after?: integer
   /** A sort object that that specifies the order of matching documents. */
   sort?: Sort
-  /** Minimum _score for matching documents. Documents with a lower _score are not included in the top documents. */
-  min_score?: float
   /** Collapses the top documents by a specified key into a single top document per key. */
   collapse?: FieldCollapse
 }
@@ -78,3 +80,16 @@ export class RRFRetriever extends RetrieverBase {
   /** This value determines the size of the individual result sets per query.  */
   rank_window_size?: integer
 }
+
+export class TextSimilarityReranker extends RetrieverBase {
+  /** The nested retriever which will produce the first-level results, that will later be used for reranking. */
+  retriever: RetrieverContainer
+  /** This value determines how many documents we will consider from the nested retriever.  */
+  rank_window_size?: integer
+  /** Unique identifier of the inference endpoint created using the inference API. */
+  inference_id?: string
+  /** The text snippet used as the basis for similarity comparison */
+  inference_text?: string
+  /** The document field to be used for text similarity comparisons. This field should contain the text that will be evaluated against the inference_text */
+  field?: string
+}