From c677939e09df8cc5e5e86fc3bce5ab9821caf0be Mon Sep 17 00:00:00 2001 From: dphuang2 Date: Tue, 26 Mar 2024 01:06:04 -0700 Subject: [PATCH] publish jina.ai --- .../from-custom-request_jina.ai.yaml | 152 ++++ sdks/db/category-cache.yaml | 1 + sdks/db/custom-request-last-fetched.yaml | 1 + sdks/db/custom-request-specs/jina.ai.yaml | 780 +++++++++++++++++ .../fixed-specs-cache/jina-ai-fixed-spec.yaml | 813 ++++++++++++++++++ sdks/db/fixed-specs/jina-ai-fixed-spec.yaml | 790 +++++++++++++++++ .../jina-ai.json | 3 + .../jina-ai/openapi.yaml | 780 +++++++++++++++++ .../jina.ai.yaml | 19 + sdks/db/progress/jina-ai-progress.yaml | 31 + .../from-custom-request_jina.ai.json | 280 ++++++ .../from-custom-request_jina.ai.json | 24 + sdks/publish.yaml | 31 + sdks/src/collect-from-custom-requests.ts | 5 + 14 files changed, 3710 insertions(+) create mode 100644 sdks/db/cached-method-objects/from-custom-request_jina.ai.yaml create mode 100644 sdks/db/custom-request-specs/jina.ai.yaml create mode 100644 sdks/db/fixed-specs-cache/jina-ai-fixed-spec.yaml create mode 100644 sdks/db/fixed-specs/jina-ai-fixed-spec.yaml create mode 100644 sdks/db/generate-repository-description-cache/jina-ai.json create mode 100644 sdks/db/intermediate-fixed-specs/jina-ai/openapi.yaml create mode 100644 sdks/db/processed-custom-request-cache/jina.ai.yaml create mode 100644 sdks/db/progress/jina-ai-progress.yaml create mode 100644 sdks/db/published/from-custom-request_jina.ai.json create mode 100644 sdks/db/spec-data/from-custom-request_jina.ai.json diff --git a/sdks/db/cached-method-objects/from-custom-request_jina.ai.yaml b/sdks/db/cached-method-objects/from-custom-request_jina.ai.yaml new file mode 100644 index 000000000..320448e62 --- /dev/null +++ b/sdks/db/cached-method-objects/from-custom-request_jina.ai.yaml @@ -0,0 +1,152 @@ +hash: 96399bc7fcb32c5e85bb6e35881574193359371381eb5417a0a29dca5d4a2f0b +methodObjects: + - url: /v1/embeddings + method: createRepresentation + httpMethod: post + tag: embeddings + typeScriptTag: embeddings + description: Create Embedding + parameters: [] + responses: + - statusCode: '200' + description: Output of the embedding service + - statusCode: '422' + description: '' + - url: /v1/bulk-embeddings + method: uploadFileAndGetEmbeddings + httpMethod: post + tag: bulk-embeddings + typeScriptTag: bulkEmbeddings + description: Start Bulk Embedding + parameters: + - name: file + schema: string + required: true + description: '' + example: FILE + - name: email + schema: string + required: false + description: '' + - name: model + schema: string + required: true + description: '' + example: MODEL + responses: + - statusCode: '200' + description: '' + - statusCode: '422' + description: '' + - url: /v1/bulk-embeddings/{job_id} + method: getJob + httpMethod: get + tag: bulk-embeddings + typeScriptTag: bulkEmbeddings + description: Retrieve Job + parameters: + - name: jobId + schema: string + required: true + description: '' + example: JOB_ID + responses: + - statusCode: '200' + description: '' + - statusCode: '422' + description: '' + - url: /v1/bulk-embeddings/{job_id}/download-result + method: downloadResultPost + httpMethod: post + tag: bulk-embeddings + typeScriptTag: bulkEmbeddings + description: Download Result + parameters: + - name: jobId + schema: string + required: true + description: '' + example: JOB_ID + responses: + - statusCode: '200' + description: '' + - statusCode: '422' + description: '' + - url: /v1/rerank + method: pairRanking + httpMethod: post + tag: rerank + typeScriptTag: rerank + description: Rank + parameters: + - name: model + schema: string + required: true + description: '' + example: MODEL + - name: query + schema: undefined + required: true + description: '' + - name: documents + schema: undefined + required: true + description: '' + - name: top_n + schema: integer + required: false + description: '' + - name: return_documents + schema: boolean + required: false + description: '' + default: true + responses: + - statusCode: '200' + description: Output of the embedding service + - statusCode: '422' + description: '' + - url: /v1/multi-embeddings + method: generateEmbeddings + httpMethod: post + tag: multi-embeddings + typeScriptTag: multiEmbeddings + description: Create Multi Embeddings + parameters: + - name: model + schema: string + required: true + description: '' + example: MODEL + - name: input + schema: undefined + required: true + description: '' + - name: input_type + schema: string + required: false + description: '' + default: document + - name: encoding_format + schema: string + required: false + description: '' + responses: + - statusCode: '200' + description: Output of the embedding service + - statusCode: '422' + description: '' + - url: / + method: checkStatus + httpMethod: get + tag: health + typeScriptTag: health + description: Get the health of Universal API service + parameters: [] + responses: + - statusCode: '200' + description: >- + Pydantic BaseModel for Jina health check, used as the response model + in REST app. +numberOfSchemas: 20 +apiDescription: This is the UniversalAPI to access all the Jina embedding models diff --git a/sdks/db/category-cache.yaml b/sdks/db/category-cache.yaml index 5e20404da..032ffe859 100644 --- a/sdks/db/category-cache.yaml +++ b/sdks/db/category-cache.yaml @@ -144,3 +144,4 @@ apis: HiBob-undefined: HR Talent & Recruitment intelliHR-undefined: HR Talent & Recruitment InducedAI-undefined: AI Tools + Jina AI-undefined: AI Tools diff --git a/sdks/db/custom-request-last-fetched.yaml b/sdks/db/custom-request-last-fetched.yaml index 1f4bc3998..fd8a52f09 100644 --- a/sdks/db/custom-request-last-fetched.yaml +++ b/sdks/db/custom-request-last-fetched.yaml @@ -127,3 +127,4 @@ lastUpdated: hibob.com: 2024-03-26T06:54:43.640Z intellihr.com: 2024-03-26T07:08:24.035Z induced.ai: 2024-03-26T07:54:57.246Z + jina.ai: 2024-03-26T08:04:32.057Z diff --git a/sdks/db/custom-request-specs/jina.ai.yaml b/sdks/db/custom-request-specs/jina.ai.yaml new file mode 100644 index 000000000..170053aaf --- /dev/null +++ b/sdks/db/custom-request-specs/jina.ai.yaml @@ -0,0 +1,780 @@ +openapi: 3.1.0 +info: + title: The Jina Embedding Serving API + description: This is the UniversalAPI to access all the Jina embedding models + version: 0.0.86 +paths: + /v1/embeddings: + post: + tags: + - embeddings + summary: Create Embedding + description: Create embedding representations of the given input texts. + operationId: create_embedding_v1_embeddings_post + requestBody: + content: + application/json: + schema: + anyOf: + - $ref: >- + #/components/schemas/api_schemas__embedding__TextEmbeddingInput + - $ref: '#/components/schemas/ImageEmbeddingInput' + title: Body + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ModelEmbeddingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings: + post: + tags: + - bulk-embeddings + summary: Start Bulk Embedding + description: Upload a file and get embeddings for each row + operationId: start_bulk_embedding_v1_bulk_embeddings_post + requestBody: + content: + multipart/form-data: + schema: + $ref: >- + #/components/schemas/Body_start_bulk_embedding_v1_bulk_embeddings_post + required: true + responses: + '200': + description: Start a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings/{job_id}: + get: + tags: + - bulk-embeddings + summary: Retrieve Job + operationId: retrieve_job_v1_bulk_embeddings__job_id__get + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Get information about a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings/{job_id}/download-result: + post: + tags: + - bulk-embeddings + summary: Download Result + operationId: download_result_v1_bulk_embeddings__job_id__download_result_post + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Download the result of a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/DownloadResultResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/rerank: + post: + tags: + - rerank + summary: Rank + description: Rank pairs. + operationId: rank_v1_rerank_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TextRankInput' + required: true + responses: + '200': + description: Rank output + content: + application/json: + schema: + $ref: '#/components/schemas/RankingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/multi-embeddings: + post: + tags: + - multi-embeddings + summary: Create Multi Embeddings + description: Create embedding representations of the given input texts. + operationId: create_multi_embeddings_v1_multi_embeddings_post + requestBody: + content: + application/json: + schema: + $ref: >- + #/components/schemas/api_schemas__multi_embeddings__TextEmbeddingInput + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ColbertModelEmbeddingsOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /: + get: + summary: Get the health of Universal API service + description: |- + Get the health of this Gateway service. + .. # noqa: DAR201 + operationId: _gateway_health__get + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HealthModel' +components: + schemas: + Body_start_bulk_embedding_v1_bulk_embeddings_post: + properties: + file: + type: string + format: binary + title: File + email: + type: string + format: email + title: Email + model: + type: string + title: Model + type: object + required: + - file + - model + title: Body_start_bulk_embedding_v1_bulk_embeddings_post + BulkEmbeddingJobResponse: + properties: + user_id: + type: string + title: User Id + description: The user ID of the user who created the job + model_name: + type: string + title: Model Name + description: The name of the model to use + model_package_arn: + type: string + title: Model Package Arn + description: The model package ARN + status: + allOf: + - $ref: '#/components/schemas/BulkEmbeddingJobStatus' + description: The status of the job + file_name: + type: string + title: File Name + description: The name of the input file + user_email: + type: string + format: email + title: User Email + description: The email of the user who created the job + created_at: + type: string + format: date-time + title: Created At + description: Time of creation of the job. + nullable: false + completed_at: + type: string + format: date-time + title: Completed At + description: Time of completion of the job. + error: + type: string + title: Error + description: The error message of the job + _id: + type: string + title: ' Id' + description: The ID of the job + used_token_count: + type: integer + title: Used Token Count + description: The number of tokens used for the job + type: object + required: + - user_id + - model_name + - status + - file_name + - _id + title: BulkEmbeddingJobResponse + example: + id: '000000000000000000000000' + model_name: model_1 + status: in-progress + file_name: input.csv + used_token_count: 1000 + BulkEmbeddingJobStatus: + type: string + enum: + - waiting + - in-progress + - failed + - completed + title: BulkEmbeddingJobStatus + description: An enumeration. + ColbertModelEmbeddingsOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ColbertModelEmbeddingsOutput + description: Output of the embedding service + example: + data: + - index: 0 + embeddings: + - - 0.1 + - 0.2 + - 0.3 + - - 0.4 + - 0.5 + - 0.6 + object: embeddings + - index: 1 + embeddings: + - - 0.6 + - 0.5 + - 0.4 + - - 0.3 + - 0.2 + - 0.1 + object: embeddings + usage: + total_tokens: 15 + prompt_tokens: 15 + DownloadResultResponse: + properties: + id: + type: string + title: Id + description: The ID of the job + download_url: + type: string + title: Download Url + description: The URL to download the result file + type: object + required: + - id + - download_url + title: DownloadResultResponse + example: + id: '000000000000000000000000' + download_url: https://example.com + HTTPValidationError: + properties: + detail: + items: + $ref: '#/components/schemas/ValidationError' + type: array + title: Detail + type: object + title: HTTPValidationError + HealthModel: + properties: {} + type: object + title: HealthModel + description: >- + Pydantic BaseModel for Jina health check, used as the response model in + REST app. + ImageDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + url: + type: string + maxLength: 65536 + minLength: 1 + format: uri + title: Url + description: URL of an image file + bytes: + type: string + format: binary + title: Bytes + description: Bytes representation of the Image. + type: object + title: ImageDoc + description: >- + BaseDoc is the base class for all Documents. This class should be + subclassed + + to create new Document types with a specific schema. + + + The schema of a Document is defined by the fields of the class. + + + Example: + + ```python + + from docarray import BaseDoc + + from docarray.typing import NdArray, ImageUrl + + import numpy as np + + + + class MyDoc(BaseDoc): + embedding: NdArray[512] + image: ImageUrl + + + doc = MyDoc(embedding=np.zeros(512), + image='https://example.com/image.jpg') + + ``` + + + + BaseDoc is a subclass of [pydantic.BaseModel]( + + https://docs.pydantic.dev/usage/models/) and can be used in a similar + way. + ImageEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + $ref: '#/components/schemas/ImageDoc' + type: array + - $ref: '#/components/schemas/ImageDoc' + title: Input + description: List of images to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: ImageEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: clip + input: + - bytes or URL + ModelEmbeddingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ModelEmbeddingOutput + description: Output of the embedding service + example: + data: + - index: 0 + embedding: + - 0.1 + - 0.2 + - 0.3 + object: embedding + - index: 1 + embedding: + - 0.3 + - 0.2 + - 0.1 + object: embedding + usage: + total_tokens: 15 + prompt_tokens: 15 + RankingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + results: + items: {} + type: array + title: Results + description: An ordered list of ranked documents + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__rank__Usage' + title: Usage + description: Total usage of the request. + type: object + required: + - model + - results + - usage + title: RankingOutput + description: Output of the embedding service + example: + results: + - index: 0 + document: + text: Document to rank 1 + relevance_score: 0.9 + - index: 1 + document: + text: Document to rank 2 + relevance_score: 0.8 + usage: + total_tokens: 15 + prompt_tokens: 15 + TextRankInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + query: + anyOf: + - type: string + - $ref: '#/components/schemas/api_schemas__rank__TextDoc' + title: Query + description: The search query + documents: + anyOf: + - items: + type: string + type: array + - items: + $ref: '#/components/schemas/api_schemas__rank__TextDoc' + type: array + title: Documents + description: >- + A list of text documents or strings to rerank. If a document is + provided the text fields is required and all other fields will be + preserved in the response. + top_n: + type: integer + title: Top N + description: >- + The number of most relevant documents or indices to return, defaults + to the length of `documents` + return_documents: + type: boolean + title: Return Documents + description: >- + If false, returns results without the doc text - the api will return + a list of {index, relevance score} where index is inferred from the + list passed into the request. If true, returns results with the doc + text passed in - the api will return an ordered list of {index, + text, relevance score} where index + text refers to the list passed + into the request. Defaults to true + default: true + additionalProperties: false + type: object + required: + - model + - query + - documents + title: TextRankInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-reranker-v1-base-en + query: Search query + documents: + - Document to rank 1 + - Document to rank 2 + ValidationError: + properties: + loc: + items: + anyOf: + - type: string + - type: integer + type: array + title: Location + msg: + type: string + title: Message + type: + type: string + title: Error Type + type: object + required: + - loc + - msg + - type + title: ValidationError + api_schemas__embedding__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__embedding__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-embeddings-v2-base-en + input: + - Hello, world! + api_schemas__embedding__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + api_schemas__multi_embeddings__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + input_type: + type: string + enum: + - query + - document + title: Input Type + description: Type of the embedding to compute, query or document + default: document + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-colbert-v1-en + input: + - Hello, world! + api_schemas__rank__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__rank__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + example: + total_tokens: 15 + prompt_tokens: 15 + securitySchemes: + HTTPBearer: + type: http + scheme: bearer +servers: + - url: https://api.jina.ai diff --git a/sdks/db/fixed-specs-cache/jina-ai-fixed-spec.yaml b/sdks/db/fixed-specs-cache/jina-ai-fixed-spec.yaml new file mode 100644 index 000000000..3c5d7680a --- /dev/null +++ b/sdks/db/fixed-specs-cache/jina-ai-fixed-spec.yaml @@ -0,0 +1,813 @@ +publishJson: + company: Jina AI + serviceName: false + sdkName: jina-ai-{language}-sdk + clientName: JinaAi + metaDescription: >- + Founded in February 2020, Jina AI has swiftly emerged as a global pioneer in + multimodal AI technology. Within an impressive timeframe of 20 months, we + have successfully raised $37.5M, marking our strong position in the AI + industry. Our ground-breaking technology, open-sourced on GitHub, has + empowered over 40,000 developers around the globe to seamlessly build and + deploy sophisticated multimodal applications. + + + In 2023, we've made significant strides in advancing AI generation tools + grounded on multimodal technology. This innovation has benefited over + 250,000 users worldwide, catering to a plethora of unique business + requirements. From facilitating business growth and enhancing operational + efficiency to optimizing costs, Jina AI is dedicated to empowering + businesses to excel in the multimodal era. + apiStatusUrls: inherit + homepage: jina.ai + developerDocumentation: api.jina.ai/redoc + categories: + - artificial_intelligence + - open_source + - developer_tools + - machine_learning + - ai + - vector_search + - embedding +rawSpecString: | + openapi: 3.1.0 + info: + title: The Jina Embedding Serving API + description: This is the UniversalAPI to access all the Jina embedding models + version: 0.0.86 + paths: + /v1/embeddings: + post: + tags: + - embeddings + summary: Create Embedding + description: Create embedding representations of the given input texts. + operationId: create_embedding_v1_embeddings_post + requestBody: + content: + application/json: + schema: + anyOf: + - $ref: >- + #/components/schemas/api_schemas__embedding__TextEmbeddingInput + - $ref: '#/components/schemas/ImageEmbeddingInput' + title: Body + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ModelEmbeddingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings: + post: + tags: + - bulk-embeddings + summary: Start Bulk Embedding + description: Upload a file and get embeddings for each row + operationId: start_bulk_embedding_v1_bulk_embeddings_post + requestBody: + content: + multipart/form-data: + schema: + $ref: >- + #/components/schemas/Body_start_bulk_embedding_v1_bulk_embeddings_post + required: true + responses: + '200': + description: Start a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings/{job_id}: + get: + tags: + - bulk-embeddings + summary: Retrieve Job + operationId: retrieve_job_v1_bulk_embeddings__job_id__get + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Get information about a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings/{job_id}/download-result: + post: + tags: + - bulk-embeddings + summary: Download Result + operationId: download_result_v1_bulk_embeddings__job_id__download_result_post + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Download the result of a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/DownloadResultResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/rerank: + post: + tags: + - rerank + summary: Rank + description: Rank pairs. + operationId: rank_v1_rerank_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TextRankInput' + required: true + responses: + '200': + description: Rank output + content: + application/json: + schema: + $ref: '#/components/schemas/RankingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/multi-embeddings: + post: + tags: + - multi-embeddings + summary: Create Multi Embeddings + description: Create embedding representations of the given input texts. + operationId: create_multi_embeddings_v1_multi_embeddings_post + requestBody: + content: + application/json: + schema: + $ref: >- + #/components/schemas/api_schemas__multi_embeddings__TextEmbeddingInput + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ColbertModelEmbeddingsOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /: + get: + summary: Get the health of Universal API service + description: |- + Get the health of this Gateway service. + .. # noqa: DAR201 + operationId: _gateway_health__get + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HealthModel' + components: + schemas: + Body_start_bulk_embedding_v1_bulk_embeddings_post: + properties: + file: + type: string + format: binary + title: File + email: + type: string + format: email + title: Email + model: + type: string + title: Model + type: object + required: + - file + - model + title: Body_start_bulk_embedding_v1_bulk_embeddings_post + BulkEmbeddingJobResponse: + properties: + user_id: + type: string + title: User Id + description: The user ID of the user who created the job + model_name: + type: string + title: Model Name + description: The name of the model to use + model_package_arn: + type: string + title: Model Package Arn + description: The model package ARN + status: + allOf: + - $ref: '#/components/schemas/BulkEmbeddingJobStatus' + description: The status of the job + file_name: + type: string + title: File Name + description: The name of the input file + user_email: + type: string + format: email + title: User Email + description: The email of the user who created the job + created_at: + type: string + format: date-time + title: Created At + description: Time of creation of the job. + nullable: false + completed_at: + type: string + format: date-time + title: Completed At + description: Time of completion of the job. + error: + type: string + title: Error + description: The error message of the job + _id: + type: string + title: ' Id' + description: The ID of the job + used_token_count: + type: integer + title: Used Token Count + description: The number of tokens used for the job + type: object + required: + - user_id + - model_name + - status + - file_name + - _id + title: BulkEmbeddingJobResponse + example: + id: '000000000000000000000000' + model_name: model_1 + status: in-progress + file_name: input.csv + used_token_count: 1000 + BulkEmbeddingJobStatus: + type: string + enum: + - waiting + - in-progress + - failed + - completed + title: BulkEmbeddingJobStatus + description: An enumeration. + ColbertModelEmbeddingsOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ColbertModelEmbeddingsOutput + description: Output of the embedding service + example: + data: + - index: 0 + embeddings: + - - 0.1 + - 0.2 + - 0.3 + - - 0.4 + - 0.5 + - 0.6 + object: embeddings + - index: 1 + embeddings: + - - 0.6 + - 0.5 + - 0.4 + - - 0.3 + - 0.2 + - 0.1 + object: embeddings + usage: + total_tokens: 15 + prompt_tokens: 15 + DownloadResultResponse: + properties: + id: + type: string + title: Id + description: The ID of the job + download_url: + type: string + title: Download Url + description: The URL to download the result file + type: object + required: + - id + - download_url + title: DownloadResultResponse + example: + id: '000000000000000000000000' + download_url: https://example.com + HTTPValidationError: + properties: + detail: + items: + $ref: '#/components/schemas/ValidationError' + type: array + title: Detail + type: object + title: HTTPValidationError + HealthModel: + properties: {} + type: object + title: HealthModel + description: >- + Pydantic BaseModel for Jina health check, used as the response model in + REST app. + ImageDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + url: + type: string + maxLength: 65536 + minLength: 1 + format: uri + title: Url + description: URL of an image file + bytes: + type: string + format: binary + title: Bytes + description: Bytes representation of the Image. + type: object + title: ImageDoc + description: >- + BaseDoc is the base class for all Documents. This class should be + subclassed + + to create new Document types with a specific schema. + + + The schema of a Document is defined by the fields of the class. + + + Example: + + ```python + + from docarray import BaseDoc + + from docarray.typing import NdArray, ImageUrl + + import numpy as np + + + + class MyDoc(BaseDoc): + embedding: NdArray[512] + image: ImageUrl + + + doc = MyDoc(embedding=np.zeros(512), + image='https://example.com/image.jpg') + + ``` + + + + BaseDoc is a subclass of [pydantic.BaseModel]( + + https://docs.pydantic.dev/usage/models/) and can be used in a similar + way. + ImageEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + $ref: '#/components/schemas/ImageDoc' + type: array + - $ref: '#/components/schemas/ImageDoc' + title: Input + description: List of images to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: ImageEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: clip + input: + - bytes or URL + ModelEmbeddingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ModelEmbeddingOutput + description: Output of the embedding service + example: + data: + - index: 0 + embedding: + - 0.1 + - 0.2 + - 0.3 + object: embedding + - index: 1 + embedding: + - 0.3 + - 0.2 + - 0.1 + object: embedding + usage: + total_tokens: 15 + prompt_tokens: 15 + RankingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + results: + items: {} + type: array + title: Results + description: An ordered list of ranked documents + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__rank__Usage' + title: Usage + description: Total usage of the request. + type: object + required: + - model + - results + - usage + title: RankingOutput + description: Output of the embedding service + example: + results: + - index: 0 + document: + text: Document to rank 1 + relevance_score: 0.9 + - index: 1 + document: + text: Document to rank 2 + relevance_score: 0.8 + usage: + total_tokens: 15 + prompt_tokens: 15 + TextRankInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + query: + anyOf: + - type: string + - $ref: '#/components/schemas/api_schemas__rank__TextDoc' + title: Query + description: The search query + documents: + anyOf: + - items: + type: string + type: array + - items: + $ref: '#/components/schemas/api_schemas__rank__TextDoc' + type: array + title: Documents + description: >- + A list of text documents or strings to rerank. If a document is + provided the text fields is required and all other fields will be + preserved in the response. + top_n: + type: integer + title: Top N + description: >- + The number of most relevant documents or indices to return, defaults + to the length of `documents` + return_documents: + type: boolean + title: Return Documents + description: >- + If false, returns results without the doc text - the api will return + a list of {index, relevance score} where index is inferred from the + list passed into the request. If true, returns results with the doc + text passed in - the api will return an ordered list of {index, + text, relevance score} where index + text refers to the list passed + into the request. Defaults to true + default: true + additionalProperties: false + type: object + required: + - model + - query + - documents + title: TextRankInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-reranker-v1-base-en + query: Search query + documents: + - Document to rank 1 + - Document to rank 2 + ValidationError: + properties: + loc: + items: + anyOf: + - type: string + - type: integer + type: array + title: Location + msg: + type: string + title: Message + type: + type: string + title: Error Type + type: object + required: + - loc + - msg + - type + title: ValidationError + api_schemas__embedding__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__embedding__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-embeddings-v2-base-en + input: + - Hello, world! + api_schemas__embedding__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + api_schemas__multi_embeddings__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + input_type: + type: string + enum: + - query + - document + title: Input Type + description: Type of the embedding to compute, query or document + default: document + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-colbert-v1-en + input: + - Hello, world! + api_schemas__rank__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__rank__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + example: + total_tokens: 15 + prompt_tokens: 15 + securitySchemes: + HTTPBearer: + type: http + scheme: bearer + servers: + - url: https://api.jina.ai +konfigCliVersion: 1.38.34 diff --git a/sdks/db/fixed-specs/jina-ai-fixed-spec.yaml b/sdks/db/fixed-specs/jina-ai-fixed-spec.yaml new file mode 100644 index 000000000..f38044f65 --- /dev/null +++ b/sdks/db/fixed-specs/jina-ai-fixed-spec.yaml @@ -0,0 +1,790 @@ +openapi: 3.0.3 +info: + title: The Jina Embedding Serving API + description: This is the UniversalAPI to access all the Jina embedding models + version: 0.0.86 + x-konfig-ignore: + object-with-no-properties: true +servers: + - url: https://api.jina.ai +tags: + - name: bulk-embeddings + - name: embeddings + - name: rerank + - name: multi-embeddings + - name: health +paths: + /v1/embeddings: + post: + tags: + - embeddings + summary: Create Embedding + operationId: Embeddings_createRepresentation + security: + - HTTPBearer: [] + description: Create embedding representations of the given input texts. + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/EmbeddingsCreateRepresentationRequest' + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ModelEmbeddingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings: + post: + tags: + - bulk-embeddings + summary: Start Bulk Embedding + operationId: Bulkembeddings_uploadFileAndGetEmbeddings + security: + - HTTPBearer: [] + description: Upload a file and get embeddings for each row + requestBody: + content: + multipart/form-data: + schema: + $ref: >- + #/components/schemas/Body_start_bulk_embedding_v1_bulk_embeddings_post + required: true + responses: + '200': + description: Start a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings/{job_id}: + get: + tags: + - bulk-embeddings + summary: Retrieve Job + operationId: Bulkembeddings_getJob + parameters: + - required: true + schema: + title: Job Id + type: string + name: job_id + in: path + responses: + '200': + description: Get information about a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings/{job_id}/download-result: + post: + tags: + - bulk-embeddings + summary: Download Result + operationId: Bulkembeddings_downloadResultPost + parameters: + - required: true + schema: + title: Job Id + type: string + name: job_id + in: path + responses: + '200': + description: Download the result of a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/DownloadResultResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/rerank: + post: + tags: + - rerank + summary: Rank + operationId: Rerank_pairRanking + security: + - HTTPBearer: [] + description: Rank pairs. + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TextRankInput' + required: true + responses: + '200': + description: Rank output + content: + application/json: + schema: + $ref: '#/components/schemas/RankingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/multi-embeddings: + post: + tags: + - multi-embeddings + summary: Create Multi Embeddings + operationId: Multiembeddings_generateEmbeddings + security: + - HTTPBearer: [] + description: Create embedding representations of the given input texts. + requestBody: + content: + application/json: + schema: + $ref: >- + #/components/schemas/api_schemas__multi_embeddings__TextEmbeddingInput + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ColbertModelEmbeddingsOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /: + get: + tags: + - health + summary: Get the health of Universal API service + operationId: Health_checkStatus + description: |- + Get the health of this Gateway service. + .. # noqa: DAR201 + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HealthModel' +components: + schemas: + Body_start_bulk_embedding_v1_bulk_embeddings_post: + title: Body_start_bulk_embedding_v1_bulk_embeddings_post + properties: + file: + title: File + type: string + format: binary + email: + title: Email + type: string + format: email + model: + title: Model + type: string + type: object + required: + - file + - model + BulkEmbeddingJobResponse: + title: BulkEmbeddingJobResponse + properties: + user_id: + title: User Id + description: The user ID of the user who created the job + type: string + model_name: + title: Model Name + description: The name of the model to use + type: string + model_package_arn: + title: Model Package Arn + description: The model package ARN + type: string + status: + $ref: '#/components/schemas/BulkEmbeddingJobStatus' + file_name: + title: File Name + description: The name of the input file + type: string + user_email: + title: User Email + description: The email of the user who created the job + type: string + format: email + created_at: + title: Created At + description: Time of creation of the job. + type: string + format: date-time + nullable: false + completed_at: + title: Completed At + description: Time of completion of the job. + type: string + format: date-time + error: + title: Error + description: The error message of the job + type: string + _id: + title: ' Id' + description: The ID of the job + type: string + used_token_count: + title: Used Token Count + description: The number of tokens used for the job + type: integer + type: object + required: + - user_id + - model_name + - status + - file_name + - _id + example: + id: '000000000000000000000000' + model_name: model_1 + status: in-progress + file_name: input.csv + used_token_count: 1000 + x-konfig-properties: + status: + description: The status of the job + BulkEmbeddingJobStatus: + title: BulkEmbeddingJobStatus + description: An enumeration. + type: string + enum: + - waiting + - in-progress + - failed + - completed + ColbertModelEmbeddingsOutput: + title: ColbertModelEmbeddingsOutput + description: Output of the embedding service + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + object: + title: Object + type: string + default: list + data: + title: Data + description: A list of Embedding Objects returned by the embedding service + items: {} + type: array + usage: + $ref: '#/components/schemas/api_schemas__embedding__Usage' + type: object + required: + - model + - data + - usage + example: + data: + - index: 0 + embeddings: + - - 0.1 + - 0.2 + - 0.3 + - - 0.4 + - 0.5 + - 0.6 + object: embeddings + - index: 1 + embeddings: + - - 0.6 + - 0.5 + - 0.4 + - - 0.3 + - 0.2 + - 0.1 + object: embeddings + usage: + total_tokens: 15 + prompt_tokens: 15 + x-konfig-properties: + usage: + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + DownloadResultResponse: + title: DownloadResultResponse + properties: + id: + title: Id + description: The ID of the job + type: string + download_url: + title: Download Url + description: The URL to download the result file + type: string + type: object + required: + - id + - download_url + example: + id: 000000000000000000000000X + download_url: https://example.com + HTTPValidationError: + title: HTTPValidationError + properties: + detail: + title: Detail + items: + $ref: '#/components/schemas/ValidationError' + type: array + type: object + HealthModel: + title: HealthModel + description: >- + Pydantic BaseModel for Jina health check, used as the response model in + REST app. + properties: {} + type: object + ImageDoc: + title: ImageDoc + description: >- + BaseDoc is the base class for all Documents. This class should be + subclassed + + to create new Document types with a specific schema. + + + The schema of a Document is defined by the fields of the class. + + + Example: + + ```python + + from docarray import BaseDoc + + from docarray.typing import NdArray, ImageUrl + + import numpy as np + + + + class MyDoc(BaseDoc): + embedding: NdArray[512] + image: ImageUrl + + + doc = MyDoc(embedding=np.zeros(512), + image='https://example.com/image.jpg') + + ``` + + + + BaseDoc is a subclass of + [pydantic.BaseModel](https://docs.pydantic.dev/usage/models/) and can be + used in a similar way. + properties: + id: + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + type: string + example: c801ec96945569130923f081d9dd5e8e + url: + title: Url + description: URL of an image file + type: string + maxLength: 65536 + minLength: 1 + format: uri + bytes: + title: Bytes + description: Bytes representation of the Image. + type: string + format: binary + type: object + ImageEmbeddingInput: + title: ImageEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + input: + title: Input + description: List of images to embed + anyOf: + - items: + $ref: '#/components/schemas/ImageDoc' + type: array + - $ref: '#/components/schemas/ImageDoc' + encoding_format: + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + type: string + enum: + - float + - base64 + type: object + required: + - model + - input + example: + model: clip + input: + - bytes or URL + ModelEmbeddingOutput: + title: ModelEmbeddingOutput + description: Output of the embedding service + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + object: + title: Object + type: string + default: list + data: + title: Data + description: A list of Embedding Objects returned by the embedding service + items: {} + type: array + usage: + $ref: '#/components/schemas/api_schemas__embedding__Usage' + type: object + required: + - model + - data + - usage + example: + data: + - index: 0 + embedding: + - 0.1 + - 0.2 + - 0.3 + object: embedding + - index: 1 + embedding: + - 0.3 + - 0.2 + - 0.1 + object: embedding + usage: + total_tokens: 15 + prompt_tokens: 15 + x-konfig-properties: + usage: + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + RankingOutput: + title: RankingOutput + description: Output of the embedding service + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + results: + title: Results + description: An ordered list of ranked documents + items: {} + type: array + usage: + $ref: '#/components/schemas/api_schemas__rank__Usage' + type: object + required: + - model + - results + - usage + example: + results: + - index: 0 + document: + text: Document to rank 1 + relevance_score: 0.9 + - index: 1 + document: + text: Document to rank 2 + relevance_score: 0.8 + usage: + total_tokens: 15 + prompt_tokens: 15 + x-konfig-properties: + usage: + title: Usage + description: Total usage of the request. + TextRankInput: + title: TextRankInput + description: The input to the API for text embedding. OpenAI compatible + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + query: + title: Query + description: The search query + anyOf: + - type: string + - $ref: '#/components/schemas/api_schemas__rank__TextDoc' + documents: + title: Documents + description: >- + A list of text documents or strings to rerank. If a document is + provided the text fields is required and all other fields will be + preserved in the response. + anyOf: + - items: + type: string + type: array + - items: + $ref: '#/components/schemas/api_schemas__rank__TextDoc' + type: array + top_n: + title: Top N + description: >- + The number of most relevant documents or indices to return, defaults + to the length of `documents` + type: integer + return_documents: + title: Return Documents + description: >- + If false, returns results without the doc text - the api will return + a list of {index, relevance score} where index is inferred from the + list passed into the request. If true, returns results with the doc + text passed in - the api will return an ordered list of {index, + text, relevance score} where index + text refers to the list passed + into the request. Defaults to true + type: boolean + default: true + type: object + required: + - model + - query + - documents + example: + model: jina-reranker-v1-base-en + query: Search query + documents: + - Document to rank 1 + - Document to rank 2 + ValidationError: + title: ValidationError + properties: + loc: + title: Location + items: + anyOf: + - type: string + - type: integer + type: array + msg: + title: Message + type: string + type: + title: Error Type + type: string + type: object + required: + - loc + - msg + - type + api_schemas__embedding__TextDoc: + title: TextDoc + description: Document containing a text field + properties: + id: + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + type: string + example: c801ec96945569130923f081d9dd5e8e + text: + title: Text + type: string + type: object + required: + - text + api_schemas__embedding__TextEmbeddingInput: + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + input: + title: Input + description: List of texts to embed + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + encoding_format: + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + type: string + enum: + - float + - base64 + type: object + required: + - model + - input + example: + model: jina-embeddings-v2-base-en + input: + - Hello, world! + api_schemas__embedding__Usage: + title: Usage + properties: + total_tokens: + title: Total Tokens + description: The number of tokens used by all the texts in the input + type: integer + prompt_tokens: + title: Prompt Tokens + description: Same as total_tokens + type: integer + type: object + required: + - total_tokens + - prompt_tokens + api_schemas__multi_embeddings__TextEmbeddingInput: + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + properties: + model: + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + type: string + input: + title: Input + description: List of texts to embed + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + input_type: + title: Input Type + description: Type of the embedding to compute, query or document + type: string + enum: + - query + - document + default: document + encoding_format: + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + type: string + enum: + - float + - base64 + type: object + required: + - model + - input + example: + model: jina-colbert-v1-en + input: + - Hello, world! + api_schemas__rank__TextDoc: + title: TextDoc + description: Document containing a text field + properties: + id: + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + type: string + example: c801ec96945569130923f081d9dd5e8e + text: + title: Text + type: string + type: object + required: + - text + api_schemas__rank__Usage: + title: Usage + properties: + total_tokens: + title: Total Tokens + description: The number of tokens used by all the texts in the input + type: integer + prompt_tokens: + title: Prompt Tokens + description: Same as total_tokens + type: integer + type: object + required: + - total_tokens + - prompt_tokens + example: + total_tokens: 15 + prompt_tokens: 15 + EmbeddingsCreateRepresentationRequest: + title: Body + anyOf: + - $ref: '#/components/schemas/api_schemas__embedding__TextEmbeddingInput' + - $ref: '#/components/schemas/ImageEmbeddingInput' + securitySchemes: + HTTPBearer: + type: http + scheme: bearer diff --git a/sdks/db/generate-repository-description-cache/jina-ai.json b/sdks/db/generate-repository-description-cache/jina-ai.json new file mode 100644 index 000000000..fc2077b1f --- /dev/null +++ b/sdks/db/generate-repository-description-cache/jina-ai.json @@ -0,0 +1,3 @@ +{ + "Founded in February 2020, Jina AI has swiftly emerged as a global pioneer in multimodal AI technology. Within an impressive timeframe of 20 months, we have successfully raised $37.5M, marking our strong position in the AI industry. Our ground-breaking technology, open-sourced on GitHub, has empowered over 40,000 developers around the globe to seamlessly build and deploy sophisticated multimodal applications.\n\nIn 2023, we've made significant strides in advancing AI generation tools grounded on multimodal technology. This innovation has benefited over 250,000 users worldwide, catering to a plethora of unique business requirements. From facilitating business growth and enhancing operational efficiency to optimizing costs, Jina AI is dedicated to empowering businesses to excel in the multimodal era.": "Jina AI is a leading global pioneer in multimodal AI technology, founded in Feb 2020. Raised $37.5M in 20 months, open-sourced tech on GitHub for 40,000+ developers. Advancing AI generation tools benefiting 250,000+ users globally. Jina AI's {language} SDK generated by Konfig (https://konfigthis.com/)." +} \ No newline at end of file diff --git a/sdks/db/intermediate-fixed-specs/jina-ai/openapi.yaml b/sdks/db/intermediate-fixed-specs/jina-ai/openapi.yaml new file mode 100644 index 000000000..170053aaf --- /dev/null +++ b/sdks/db/intermediate-fixed-specs/jina-ai/openapi.yaml @@ -0,0 +1,780 @@ +openapi: 3.1.0 +info: + title: The Jina Embedding Serving API + description: This is the UniversalAPI to access all the Jina embedding models + version: 0.0.86 +paths: + /v1/embeddings: + post: + tags: + - embeddings + summary: Create Embedding + description: Create embedding representations of the given input texts. + operationId: create_embedding_v1_embeddings_post + requestBody: + content: + application/json: + schema: + anyOf: + - $ref: >- + #/components/schemas/api_schemas__embedding__TextEmbeddingInput + - $ref: '#/components/schemas/ImageEmbeddingInput' + title: Body + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ModelEmbeddingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings: + post: + tags: + - bulk-embeddings + summary: Start Bulk Embedding + description: Upload a file and get embeddings for each row + operationId: start_bulk_embedding_v1_bulk_embeddings_post + requestBody: + content: + multipart/form-data: + schema: + $ref: >- + #/components/schemas/Body_start_bulk_embedding_v1_bulk_embeddings_post + required: true + responses: + '200': + description: Start a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/bulk-embeddings/{job_id}: + get: + tags: + - bulk-embeddings + summary: Retrieve Job + operationId: retrieve_job_v1_bulk_embeddings__job_id__get + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Get information about a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/BulkEmbeddingJobResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/bulk-embeddings/{job_id}/download-result: + post: + tags: + - bulk-embeddings + summary: Download Result + operationId: download_result_v1_bulk_embeddings__job_id__download_result_post + parameters: + - required: true + schema: + type: string + title: Job Id + name: job_id + in: path + responses: + '200': + description: Download the result of a bulk embedding job + content: + application/json: + schema: + $ref: '#/components/schemas/DownloadResultResponse' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + /v1/rerank: + post: + tags: + - rerank + summary: Rank + description: Rank pairs. + operationId: rank_v1_rerank_post + requestBody: + content: + application/json: + schema: + $ref: '#/components/schemas/TextRankInput' + required: true + responses: + '200': + description: Rank output + content: + application/json: + schema: + $ref: '#/components/schemas/RankingOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /v1/multi-embeddings: + post: + tags: + - multi-embeddings + summary: Create Multi Embeddings + description: Create embedding representations of the given input texts. + operationId: create_multi_embeddings_v1_multi_embeddings_post + requestBody: + content: + application/json: + schema: + $ref: >- + #/components/schemas/api_schemas__multi_embeddings__TextEmbeddingInput + required: true + responses: + '200': + description: Create embeddings + content: + application/json: + schema: + $ref: '#/components/schemas/ColbertModelEmbeddingsOutput' + '422': + description: Validation Error + content: + application/json: + schema: + $ref: '#/components/schemas/HTTPValidationError' + security: + - HTTPBearer: [] + /: + get: + summary: Get the health of Universal API service + description: |- + Get the health of this Gateway service. + .. # noqa: DAR201 + operationId: _gateway_health__get + responses: + '200': + description: Successful Response + content: + application/json: + schema: + $ref: '#/components/schemas/HealthModel' +components: + schemas: + Body_start_bulk_embedding_v1_bulk_embeddings_post: + properties: + file: + type: string + format: binary + title: File + email: + type: string + format: email + title: Email + model: + type: string + title: Model + type: object + required: + - file + - model + title: Body_start_bulk_embedding_v1_bulk_embeddings_post + BulkEmbeddingJobResponse: + properties: + user_id: + type: string + title: User Id + description: The user ID of the user who created the job + model_name: + type: string + title: Model Name + description: The name of the model to use + model_package_arn: + type: string + title: Model Package Arn + description: The model package ARN + status: + allOf: + - $ref: '#/components/schemas/BulkEmbeddingJobStatus' + description: The status of the job + file_name: + type: string + title: File Name + description: The name of the input file + user_email: + type: string + format: email + title: User Email + description: The email of the user who created the job + created_at: + type: string + format: date-time + title: Created At + description: Time of creation of the job. + nullable: false + completed_at: + type: string + format: date-time + title: Completed At + description: Time of completion of the job. + error: + type: string + title: Error + description: The error message of the job + _id: + type: string + title: ' Id' + description: The ID of the job + used_token_count: + type: integer + title: Used Token Count + description: The number of tokens used for the job + type: object + required: + - user_id + - model_name + - status + - file_name + - _id + title: BulkEmbeddingJobResponse + example: + id: '000000000000000000000000' + model_name: model_1 + status: in-progress + file_name: input.csv + used_token_count: 1000 + BulkEmbeddingJobStatus: + type: string + enum: + - waiting + - in-progress + - failed + - completed + title: BulkEmbeddingJobStatus + description: An enumeration. + ColbertModelEmbeddingsOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ColbertModelEmbeddingsOutput + description: Output of the embedding service + example: + data: + - index: 0 + embeddings: + - - 0.1 + - 0.2 + - 0.3 + - - 0.4 + - 0.5 + - 0.6 + object: embeddings + - index: 1 + embeddings: + - - 0.6 + - 0.5 + - 0.4 + - - 0.3 + - 0.2 + - 0.1 + object: embeddings + usage: + total_tokens: 15 + prompt_tokens: 15 + DownloadResultResponse: + properties: + id: + type: string + title: Id + description: The ID of the job + download_url: + type: string + title: Download Url + description: The URL to download the result file + type: object + required: + - id + - download_url + title: DownloadResultResponse + example: + id: '000000000000000000000000' + download_url: https://example.com + HTTPValidationError: + properties: + detail: + items: + $ref: '#/components/schemas/ValidationError' + type: array + title: Detail + type: object + title: HTTPValidationError + HealthModel: + properties: {} + type: object + title: HealthModel + description: >- + Pydantic BaseModel for Jina health check, used as the response model in + REST app. + ImageDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + url: + type: string + maxLength: 65536 + minLength: 1 + format: uri + title: Url + description: URL of an image file + bytes: + type: string + format: binary + title: Bytes + description: Bytes representation of the Image. + type: object + title: ImageDoc + description: >- + BaseDoc is the base class for all Documents. This class should be + subclassed + + to create new Document types with a specific schema. + + + The schema of a Document is defined by the fields of the class. + + + Example: + + ```python + + from docarray import BaseDoc + + from docarray.typing import NdArray, ImageUrl + + import numpy as np + + + + class MyDoc(BaseDoc): + embedding: NdArray[512] + image: ImageUrl + + + doc = MyDoc(embedding=np.zeros(512), + image='https://example.com/image.jpg') + + ``` + + + + BaseDoc is a subclass of [pydantic.BaseModel]( + + https://docs.pydantic.dev/usage/models/) and can be used in a similar + way. + ImageEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + $ref: '#/components/schemas/ImageDoc' + type: array + - $ref: '#/components/schemas/ImageDoc' + title: Input + description: List of images to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: ImageEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: clip + input: + - bytes or URL + ModelEmbeddingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + object: + type: string + title: Object + default: list + data: + items: {} + type: array + title: Data + description: A list of Embedding Objects returned by the embedding service + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__embedding__Usage' + title: Usage + description: >- + Total usage of the request. Sums up the usage from each individual + input + type: object + required: + - model + - data + - usage + title: ModelEmbeddingOutput + description: Output of the embedding service + example: + data: + - index: 0 + embedding: + - 0.1 + - 0.2 + - 0.3 + object: embedding + - index: 1 + embedding: + - 0.3 + - 0.2 + - 0.1 + object: embedding + usage: + total_tokens: 15 + prompt_tokens: 15 + RankingOutput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + results: + items: {} + type: array + title: Results + description: An ordered list of ranked documents + usage: + allOf: + - $ref: '#/components/schemas/api_schemas__rank__Usage' + title: Usage + description: Total usage of the request. + type: object + required: + - model + - results + - usage + title: RankingOutput + description: Output of the embedding service + example: + results: + - index: 0 + document: + text: Document to rank 1 + relevance_score: 0.9 + - index: 1 + document: + text: Document to rank 2 + relevance_score: 0.8 + usage: + total_tokens: 15 + prompt_tokens: 15 + TextRankInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + query: + anyOf: + - type: string + - $ref: '#/components/schemas/api_schemas__rank__TextDoc' + title: Query + description: The search query + documents: + anyOf: + - items: + type: string + type: array + - items: + $ref: '#/components/schemas/api_schemas__rank__TextDoc' + type: array + title: Documents + description: >- + A list of text documents or strings to rerank. If a document is + provided the text fields is required and all other fields will be + preserved in the response. + top_n: + type: integer + title: Top N + description: >- + The number of most relevant documents or indices to return, defaults + to the length of `documents` + return_documents: + type: boolean + title: Return Documents + description: >- + If false, returns results without the doc text - the api will return + a list of {index, relevance score} where index is inferred from the + list passed into the request. If true, returns results with the doc + text passed in - the api will return an ordered list of {index, + text, relevance score} where index + text refers to the list passed + into the request. Defaults to true + default: true + additionalProperties: false + type: object + required: + - model + - query + - documents + title: TextRankInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-reranker-v1-base-en + query: Search query + documents: + - Document to rank 1 + - Document to rank 2 + ValidationError: + properties: + loc: + items: + anyOf: + - type: string + - type: integer + type: array + title: Location + msg: + type: string + title: Message + type: + type: string + title: Error Type + type: object + required: + - loc + - msg + - type + title: ValidationError + api_schemas__embedding__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__embedding__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-embeddings-v2-base-en + input: + - Hello, world! + api_schemas__embedding__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + api_schemas__multi_embeddings__TextEmbeddingInput: + properties: + model: + type: string + title: Model + description: "The identifier of the model.\n\nAvailable models and corresponding param size and dimension:\n- `jina-embedding-t-en-v1`,\t14m,\t312\n- `jina-embedding-s-en-v1`,\t35m,\t512 (default)\n- `jina-embedding-b-en-v1`,\t110m,\t768\n- `jina-embedding-l-en-v1`,\t330,\t1024\n\nFor more information, please checkout our [technical blog](https://arxiv.org/abs/2307.11224).\n" + input: + anyOf: + - items: + type: string + type: array + - type: string + - items: + $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + type: array + - $ref: '#/components/schemas/api_schemas__embedding__TextDoc' + title: Input + description: List of texts to embed + input_type: + type: string + enum: + - query + - document + title: Input Type + description: Type of the embedding to compute, query or document + default: document + encoding_format: + type: string + enum: + - float + - base64 + title: Encoding Format + description: >- + The format in which you want the embeddings to be returned.Possible + value are `float` and `base64`. Defaults to `float` + additionalProperties: false + type: object + required: + - model + - input + title: TextEmbeddingInput + description: The input to the API for text embedding. OpenAI compatible + example: + model: jina-colbert-v1-en + input: + - Hello, world! + api_schemas__rank__TextDoc: + properties: + id: + type: string + title: Id + description: >- + The ID of the BaseDoc. This is useful for indexing in vector stores. + If not set by user, it will automatically be assigned a random value + example: c801ec96945569130923f081d9dd5e8e + text: + type: string + title: Text + type: object + required: + - text + title: TextDoc + description: Document containing a text field + api_schemas__rank__Usage: + properties: + total_tokens: + type: integer + title: Total Tokens + description: The number of tokens used by all the texts in the input + prompt_tokens: + type: integer + title: Prompt Tokens + description: Same as total_tokens + type: object + required: + - total_tokens + - prompt_tokens + title: Usage + example: + total_tokens: 15 + prompt_tokens: 15 + securitySchemes: + HTTPBearer: + type: http + scheme: bearer +servers: + - url: https://api.jina.ai diff --git a/sdks/db/processed-custom-request-cache/jina.ai.yaml b/sdks/db/processed-custom-request-cache/jina.ai.yaml new file mode 100644 index 000000000..0655b0334 --- /dev/null +++ b/sdks/db/processed-custom-request-cache/jina.ai.yaml @@ -0,0 +1,19 @@ +processed: + securitySchemes: + HTTPBearer: + type: http + scheme: bearer + apiBaseUrl: https://api.jina.ai + apiVersion: 0.0.86 + apiDescription: This is the UniversalAPI to access all the Jina embedding models + apiTitle: The Jina Embedding Serving API + endpoints: 7 + sdkMethods: 7 + schemas: 19 + parameters: 14 + originalCustomRequest: + type: GET + url: https://api.jina.ai/openapi.json + apiBaseUrl: https://api.jina.ai + customRequestSpecFilename: jina.ai.yaml + difficultyScore: 20 diff --git a/sdks/db/progress/jina-ai-progress.yaml b/sdks/db/progress/jina-ai-progress.yaml new file mode 100644 index 000000000..54966e995 --- /dev/null +++ b/sdks/db/progress/jina-ai-progress.yaml @@ -0,0 +1,31 @@ +examples: {} +examples_2: {} +examples_3: {} +ignoreObjectsWithNoProperties: true +operationIds: + /: + get: Health_checkStatus + /v1/bulk-embeddings: + post: Bulkembeddings_uploadFileAndGetEmbeddings + /v1/bulk-embeddings/{job_id}: + get: Bulkembeddings_getJob + /v1/bulk-embeddings/{job_id}/download-result: + post: Bulkembeddings_downloadResultPost + /v1/embeddings: + post: Embeddings_createRepresentation + /v1/multi-embeddings: + post: Multiembeddings_generateEmbeddings + /v1/rerank: + post: Rerank_pairRanking +operationTags: + /: + get: health +renameTags: {} +requestSchemaNames: + /v1/embeddings: + post: + application/json: EmbeddingsCreateRepresentationRequest +responseDescriptions: {} +responseSchemaNames: {} +securityParameters: {} +validServerUrls: {} diff --git a/sdks/db/published/from-custom-request_jina.ai.json b/sdks/db/published/from-custom-request_jina.ai.json new file mode 100644 index 000000000..e7ef04522 --- /dev/null +++ b/sdks/db/published/from-custom-request_jina.ai.json @@ -0,0 +1,280 @@ +{ + "securitySchemes": { + "HTTPBearer": { + "type": "http", + "scheme": "bearer" + } + }, + "apiBaseUrl": "https://api.jina.ai", + "apiVersion": "0.0.86", + "apiDescription": "This is the UniversalAPI to access all the Jina embedding models", + "apiTitle": "The Jina Embedding Serving API", + "endpoints": 7, + "sdkMethods": 7, + "schemas": 20, + "parameters": 14, + "originalCustomRequest": { + "type": "GET", + "url": "https://api.jina.ai/openapi.json", + "apiBaseUrl": "https://api.jina.ai" + }, + "customRequestSpecFilename": "jina.ai.yaml", + "difficultyScore": 20, + "difficulty": "Very Easy", + "company": "Jina AI", + "sdkName": "jina-ai-{language}-sdk", + "clientName": "JinaAi", + "metaDescription": "Founded in February 2020, Jina AI has swiftly emerged as a global pioneer in multimodal AI technology. Within an impressive timeframe of 20 months, we have successfully raised $37.5M, marking our strong position in the AI industry. Our ground-breaking technology, open-sourced on GitHub, has empowered over 40,000 developers around the globe to seamlessly build and deploy sophisticated multimodal applications.\n\nIn 2023, we've made significant strides in advancing AI generation tools grounded on multimodal technology. This innovation has benefited over 250,000 users worldwide, catering to a plethora of unique business requirements. From facilitating business growth and enhancing operational efficiency to optimizing costs, Jina AI is dedicated to empowering businesses to excel in the multimodal era.", + "apiStatusUrls": "inherit", + "homepage": "jina.ai", + "developerDocumentation": "api.jina.ai/redoc", + "categories": [ + "artificial_intelligence", + "open_source", + "developer_tools", + "machine_learning", + "ai", + "vector_search", + "embedding" + ], + "category": "AI Tools", + "methods": [ + { + "url": "/v1/embeddings", + "method": "createRepresentation", + "httpMethod": "post", + "tag": "embeddings", + "typeScriptTag": "embeddings", + "description": "Create Embedding", + "parameters": [], + "responses": [ + { + "statusCode": "200", + "description": "Output of the embedding service" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/v1/bulk-embeddings", + "method": "uploadFileAndGetEmbeddings", + "httpMethod": "post", + "tag": "bulk-embeddings", + "typeScriptTag": "bulkEmbeddings", + "description": "Start Bulk Embedding", + "parameters": [ + { + "name": "file", + "schema": "string", + "required": true, + "description": "", + "example": "FILE" + }, + { + "name": "email", + "schema": "string", + "required": false, + "description": "" + }, + { + "name": "model", + "schema": "string", + "required": true, + "description": "", + "example": "MODEL" + } + ], + "responses": [ + { + "statusCode": "200", + "description": "" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/v1/bulk-embeddings/{job_id}", + "method": "getJob", + "httpMethod": "get", + "tag": "bulk-embeddings", + "typeScriptTag": "bulkEmbeddings", + "description": "Retrieve Job", + "parameters": [ + { + "name": "jobId", + "schema": "string", + "required": true, + "description": "", + "example": "JOB_ID" + } + ], + "responses": [ + { + "statusCode": "200", + "description": "" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/v1/bulk-embeddings/{job_id}/download-result", + "method": "downloadResultPost", + "httpMethod": "post", + "tag": "bulk-embeddings", + "typeScriptTag": "bulkEmbeddings", + "description": "Download Result", + "parameters": [ + { + "name": "jobId", + "schema": "string", + "required": true, + "description": "", + "example": "JOB_ID" + } + ], + "responses": [ + { + "statusCode": "200", + "description": "" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/v1/rerank", + "method": "pairRanking", + "httpMethod": "post", + "tag": "rerank", + "typeScriptTag": "rerank", + "description": "Rank", + "parameters": [ + { + "name": "model", + "schema": "string", + "required": true, + "description": "", + "example": "MODEL" + }, + { + "name": "query", + "schema": "undefined", + "required": true, + "description": "" + }, + { + "name": "documents", + "schema": "undefined", + "required": true, + "description": "" + }, + { + "name": "top_n", + "schema": "integer", + "required": false, + "description": "" + }, + { + "name": "return_documents", + "schema": "boolean", + "required": false, + "description": "", + "default": true + } + ], + "responses": [ + { + "statusCode": "200", + "description": "Output of the embedding service" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/v1/multi-embeddings", + "method": "generateEmbeddings", + "httpMethod": "post", + "tag": "multi-embeddings", + "typeScriptTag": "multiEmbeddings", + "description": "Create Multi Embeddings", + "parameters": [ + { + "name": "model", + "schema": "string", + "required": true, + "description": "", + "example": "MODEL" + }, + { + "name": "input", + "schema": "undefined", + "required": true, + "description": "" + }, + { + "name": "input_type", + "schema": "string", + "required": false, + "description": "", + "default": "document" + }, + { + "name": "encoding_format", + "schema": "string", + "required": false, + "description": "" + } + ], + "responses": [ + { + "statusCode": "200", + "description": "Output of the embedding service" + }, + { + "statusCode": "422", + "description": "" + } + ] + }, + { + "url": "/", + "method": "checkStatus", + "httpMethod": "get", + "tag": "health", + "typeScriptTag": "health", + "description": "Get the health of Universal API service", + "parameters": [], + "responses": [ + { + "statusCode": "200", + "description": "Pydantic BaseModel for Jina health check, used as the response model in REST app." + } + ] + } + ], + "repositoryDescription": "Jina AI is a leading global pioneer in multimodal AI technology, founded in Feb 2020. Raised $37.5M in 20 months, open-sourced tech on GitHub for 40,000+ developers. Advancing AI generation tools benefiting 250,000+ users globally. Jina AI's {language} SDK generated by Konfig (https://konfigthis.com/).", + "logo": "https://raw.githubusercontent.com/konfig-sdks/openapi-examples/HEAD/jina-ai/logo.png", + "openApiRaw": "https://raw.githubusercontent.com/konfig-sdks/openapi-examples/HEAD/jina-ai/openapi.yaml", + "openApiGitHubUi": "https://github.com/konfig-sdks/openapi-examples/tree/HEAD/jina-ai/openapi.yaml", + "previewLinkImage": "https://raw.githubusercontent.com/konfig-sdks/openapi-examples/HEAD/jina-ai/imagePreview.png", + "faviconUrl": "https://raw.githubusercontent.com/konfig-sdks/openapi-examples/HEAD/jina-ai/favicon.png", + "clientNameCamelCase": "jinaAi", + "lastUpdated": "2024-03-26T08:05:47.803Z", + "typescriptSdkUsageCode": "import { JinaAi } from 'jina-ai-typescript-sdk';\n\nconst jinaAi = new JinaAi({\n httpBearer: \"HTTP_BEARER\"\n})", + "typescriptSdkFirstRequestCode": "// Create Embedding\nconst createRepresentationResponse = jinaAi.embeddings.createRepresentation()", + "fixedSpecFileName": "jina-ai-fixed-spec.yaml" +} \ No newline at end of file diff --git a/sdks/db/spec-data/from-custom-request_jina.ai.json b/sdks/db/spec-data/from-custom-request_jina.ai.json new file mode 100644 index 000000000..f6ff29257 --- /dev/null +++ b/sdks/db/spec-data/from-custom-request_jina.ai.json @@ -0,0 +1,24 @@ +{ + "securitySchemes": { + "HTTPBearer": { + "type": "http", + "scheme": "bearer" + } + }, + "apiBaseUrl": "https://api.jina.ai", + "apiVersion": "0.0.86", + "apiDescription": "This is the UniversalAPI to access all the Jina embedding models", + "apiTitle": "The Jina Embedding Serving API", + "endpoints": 7, + "sdkMethods": 7, + "schemas": 19, + "parameters": 14, + "originalCustomRequest": { + "type": "GET", + "url": "https://api.jina.ai/openapi.json", + "apiBaseUrl": "https://api.jina.ai" + }, + "customRequestSpecFilename": "jina.ai.yaml", + "difficultyScore": 20, + "difficulty": "Very Easy" +} \ No newline at end of file diff --git a/sdks/publish.yaml b/sdks/publish.yaml index fb7bc405c..44288d319 100644 --- a/sdks/publish.yaml +++ b/sdks/publish.yaml @@ -3630,3 +3630,34 @@ publish: serviceName: false sdkName: induced-ai-{language}-sdk clientName: InducedAi + from-custom-request_jina.ai: + homepage: jina.ai + company: Jina AI + developerDocumentation: api.jina.ai/redoc + apiStatusUrls: inherit + metaDescription: >- + Founded in February 2020, Jina AI has swiftly emerged as a global pioneer + in multimodal AI technology. Within an impressive timeframe of 20 months, + we have successfully raised $37.5M, marking our strong position in the AI + industry. Our ground-breaking technology, open-sourced on GitHub, has + empowered over 40,000 developers around the globe to seamlessly build and + deploy sophisticated multimodal applications. + + + In 2023, we've made significant strides in advancing AI generation tools + grounded on multimodal technology. This innovation has benefited over + 250,000 users worldwide, catering to a plethora of unique business + requirements. From facilitating business growth and enhancing operational + efficiency to optimizing costs, Jina AI is dedicated to empowering + businesses to excel in the multimodal era. + categories: + - artificial_intelligence + - open_source + - developer_tools + - machine_learning + - ai + - vector_search + - embedding + serviceName: false + sdkName: jina-ai-{language}-sdk + clientName: JinaAi diff --git a/sdks/src/collect-from-custom-requests.ts b/sdks/src/collect-from-custom-requests.ts index c0919ba47..18c57967a 100644 --- a/sdks/src/collect-from-custom-requests.ts +++ b/sdks/src/collect-from-custom-requests.ts @@ -1014,6 +1014,11 @@ const customRequests: Record = { return rawSpecString; }, }, + "jina.ai": { + type: "GET", + url: "https://api.jina.ai/openapi.json", + apiBaseUrl: "https://api.jina.ai", + }, "induced.ai": { type: "GET", url: "https://raw.githubusercontent.com/inducedai/docs/main/openapi.json",