diff --git a/_aggregations/bucket/geohex-grid.md b/_aggregations/bucket/geohex-grid.md index de11061055..03fd45e369 100644 --- a/_aggregations/bucket/geohex-grid.md +++ b/_aggregations/bucket/geohex-grid.md @@ -5,6 +5,8 @@ parent: Bucket aggregations grand_parent: Aggregations nav_order: 85 redirect_from: + - /opensearch/geohexgrid-agg/ + - /query-dsl/aggregations/geohexgrid-agg/ - /aggregations/geohexgrid/ - /query-dsl/aggregations/geohexgrid/ - /query-dsl/aggregations/bucket/geohex-grid/ diff --git a/_aggregations/bucket/multi-terms.md b/_aggregations/bucket/multi-terms.md index 6fca528b38..62a4d264e0 100644 --- a/_aggregations/bucket/multi-terms.md +++ b/_aggregations/bucket/multi-terms.md @@ -5,6 +5,7 @@ parent: Bucket aggregations grand_parent: Aggregations nav_order: 130 redirect_from: + - /query-dsl/aggregations/bucket/multi-terms/ - /query-dsl/aggregations/multi-terms/ --- diff --git a/_aggregations/bucket/range.md b/_aggregations/bucket/range.md index 61ec2f6276..f4e19f188d 100644 --- a/_aggregations/bucket/range.md +++ b/_aggregations/bucket/range.md @@ -5,6 +5,7 @@ parent: Bucket aggregations grand_parent: Aggregations nav_order: 150 redirect_from: + - /query-dsl/aggregations/bucket/date-range/ - /query-dsl/aggregations/bucket/range/ --- diff --git a/_aggregations/bucket/sampler.md b/_aggregations/bucket/sampler.md index 28bae47b6d..5411052d45 100644 --- a/_aggregations/bucket/sampler.md +++ b/_aggregations/bucket/sampler.md @@ -4,6 +4,8 @@ title: Sampler parent: Bucket aggregations grand_parent: Aggregations nav_order: 170 +redirect_from: + - /query-dsl/aggregations/bucket/diversified-sampler/ --- # Sampler aggregations diff --git a/_aggregations/bucket/significant-terms.md b/_aggregations/bucket/significant-terms.md index 017e3b7dd8..34a4354a73 100644 --- a/_aggregations/bucket/significant-terms.md +++ b/_aggregations/bucket/significant-terms.md @@ -4,6 +4,8 @@ title: Significant terms parent: Bucket aggregations grand_parent: Aggregations nav_order: 180 +redirect_from: + - /query-dsl/aggregations/bucket/significant-terms/ --- # Significant terms aggregations diff --git a/_aggregations/bucket/significant-text.md b/_aggregations/bucket/significant-text.md index 1c136603d6..6f1c7ebeca 100644 --- a/_aggregations/bucket/significant-text.md +++ b/_aggregations/bucket/significant-text.md @@ -4,6 +4,8 @@ title: Significant text parent: Bucket aggregations grand_parent: Aggregations nav_order: 190 +redirect_from: + - /query-dsl/aggregations/bucket/significant-text/ --- # Significant text aggregations diff --git a/_aggregations/bucket/terms.md b/_aggregations/bucket/terms.md index 072ad42cc2..5d05c328d4 100644 --- a/_aggregations/bucket/terms.md +++ b/_aggregations/bucket/terms.md @@ -4,6 +4,8 @@ title: Terms parent: Bucket aggregations grand_parent: Aggregations nav_order: 200 +redirect_from: + - /query-dsl/aggregations/bucket/terms/ --- # Terms aggregations diff --git a/_aggregations/index.md b/_aggregations/index.md index 1c43799a10..385c7a09d8 100644 --- a/_aggregations/index.md +++ b/_aggregations/index.md @@ -6,6 +6,7 @@ nav_order: 5 nav_exclude: true permalink: /aggregations/ redirect_from: + - /query-dsl/aggregations/aggregations/ - /opensearch/aggregations/ - /query-dsl/aggregations/ - /aggregations/index/ diff --git a/_api-reference/analyze-apis.md b/_api-reference/analyze-apis.md index 7c61a6fdcb..10af71c1ad 100644 --- a/_api-reference/analyze-apis.md +++ b/_api-reference/analyze-apis.md @@ -4,6 +4,7 @@ title: Analyze API has_children: true nav_order: 7 redirect_from: + - /api-reference/analyze-apis/perform-text-analysis/ - /opensearch/rest-api/analyze-apis/ - /api-reference/analyze-apis/ --- diff --git a/_api-reference/document-apis/reindex.md b/_api-reference/document-apis/reindex.md index 4a0346ede3..2bc3646e68 100644 --- a/_api-reference/document-apis/reindex.md +++ b/_api-reference/document-apis/reindex.md @@ -57,7 +57,7 @@ Your request body must contain the names of the source index and destination ind Field | Description :--- | :--- -conflicts | Indicates to OpenSearch what should happen if the delete by query operation runs into a version conflict. Valid options are `abort` and `proceed`. Default is abort. +conflicts | Indicates to OpenSearch what should happen if the Reindex operation runs into a version conflict. Valid options are `abort` and `proceed`. Default is `abort`. source | Information about the source index to include. Valid fields are `index`, `max_docs`, `query`, `remote`, `size`, `slice`, and `_source`. index | The name of the source index to copy data from. max_docs | The maximum number of documents to reindex. diff --git a/_api-reference/index-apis/get-settings.md b/_api-reference/index-apis/get-settings.md index 37ac291a77..41eb4ea113 100644 --- a/_api-reference/index-apis/get-settings.md +++ b/_api-reference/index-apis/get-settings.md @@ -4,6 +4,7 @@ title: Get settings parent: Index APIs nav_order: 45 redirect_from: + - /opensearch/rest-api/index-apis/get-settings/ - /opensearch/rest-api/index-apis/get-index/ --- diff --git a/_api-reference/index-apis/index.md b/_api-reference/index-apis/index.md index 4c059b3e48..6e1fdbcfa6 100644 --- a/_api-reference/index-apis/index.md +++ b/_api-reference/index-apis/index.md @@ -4,6 +4,7 @@ title: Index APIs has_children: true nav_order: 35 redirect_from: + - /opensearch/rest-api/index-apis/index/ - /opensearch/rest-api/index-apis/ --- diff --git a/_api-reference/index-apis/put-mapping.md b/_api-reference/index-apis/put-mapping.md index 5f6be9f138..47c47fa125 100644 --- a/_api-reference/index-apis/put-mapping.md +++ b/_api-reference/index-apis/put-mapping.md @@ -4,6 +4,7 @@ title: Create or update mappings parent: Index APIs nav_order: 27 redirect_from: + - /opensearch/rest-api/index-apis/put-mapping/ - /opensearch/rest-api/index-apis/update-mapping/ - /opensearch/rest-api/update-mapping/ --- diff --git a/_api-reference/msearch-template.md b/_api-reference/msearch-template.md new file mode 100644 index 0000000000..316cc134ff --- /dev/null +++ b/_api-reference/msearch-template.md @@ -0,0 +1,132 @@ +--- +layout: default +title: Multi-search Template +nav_order: 47 +--- + +# Multi-search Template + +**Introduced 1.0** +{: .label .label-purple } + +The Multi-search Template API runs multiple search template requests in a single API request. + +## Path and HTTP methods + +The Multi-search Template API uses the following paths: + +``` +GET /_msearch/template +POST /_msearch/template +GET /{index}/_msearch/template +POST /{index}/_msearch/template +``` + +## Request body + +The multi-search template request body follows this pattern, similar to the [Multi-search API]({{site.url}}{{site.baseurl}}/api-reference/multi-search/) pattern: + +``` +Metadata\n +Query\n +Metadata\n +Query\n + +``` + +- Metadata lines include options, such as which indexes to search and the type of search. +- Query lines use [query DSL]({{site.url}}{{site.baseurl}}/opensearch/query-dsl/). + +Like the [bulk]({{site.url}}{{site.baseurl}}/api-reference/document-apis/bulk/) operation, the JSON doesn't need to be minified---spaces are fine---but it does need to be on a single line. OpenSearch uses newline characters to parse multi-search requests and requires that the request body end with a newline character. + +## URL parameters and metadata options + +All multi-search template URL parameters are optional. Some can also be applied per search as part of each metadata line. + +Parameter | Type | Description | Supported in metadata +:--- | :--- | :--- | :--- +allow_no_indices | Boolean | Specifies whether to ignore wildcards that don't match any indexes. Default is `true`. | Yes +cancel_after_time_interval | Time | The interval of time after which the search request will be canceled. Supported at both parent and child request levels. The order of precedence is child-level parameter, parent-level parameter, and [cluster setting]({{site.url}}{{site.baseurl}}/api-reference/cluster-settings/). Default is `-1`. | Yes +css_minimize_roundtrips | Boolean | Specifies whether OpenSearch should try to minimize the number of network round trips between the coordinating node and remote clusters (only applicable to cross-cluster search requests). Default is `true`. | No +expand_wildcards | Enum | Expands wildcard expressions to concrete indexes. Combine multiple values with commas. Supported values are `all`, `open`, `closed`, `hidden`, and `none`. Default is `open`. | Yes +ignore_unavailable | Boolean | If an index or shard from the index list doesn’t exist, then this setting specifies whether to ignore the missing index or shard rather than fail the query. Default is `false`. | Yes +max_concurrent_searches | Integer | The maximum number of concurrent searches. The default depends on your node count and search thread pool size. Higher values can improve performance, but there may be a risk of overloading the cluster. | No +max_concurrent_shard_requests | Integer | The maximum number of concurrent shard requests that each search executes per node. Default is `5`. Higher values can improve performance, but there may be a risk of overloading the cluster. | No +pre_filter_shard_size | Integer | Default is `128`. | No +rest_total_hits_as_int | String | Specifies whether the `hits.total` property is returned as an integer (`true`) or an object (`false`). Default is `false`. | No +search_type | String | Affects the relevance score. Valid options are `query_then_fetch` and `dfs_query_then_fetch`. `query_then_fetch` scores documents using term and document frequencies for a single shard (faster, less accurate), whereas `dfs_query_then_fetch` uses term and document frequencies across all shards (slower, more accurate). Default is `query_then_fetch`. | Yes +typed_keys | Boolean | Specifies whether to prefix aggregation names with their internal types in the response. Default is `false`. | No + +## Metadata-only options + +Some options can't be applied as URL parameters to the entire request. Instead, you can apply them per search as part of each metadata line. All are optional. + +Option | Type | Description +:--- | :--- | :--- +index | String, string array | If you don't specify an index or multiple indexes as part of the URL (or want to override the URL value for an individual search), you can include it under this option. Examples include `"logs-*"` and `["my-store", "sample_data_ecommerce"]`. +preference | String | Specifies the nodes or shards on which you want to perform the search. This setting can be useful for testing, but in most situations, the default behavior provides the best search latencies. Options include `_local`, `_only_local`, `_prefer_nodes`, `_only_nodes`, and `_shards`. These last three options accept a list of nodes or shards. Examples include `"_only_nodes:data-node1,data-node2"` and `"_shards:0,1`. +request_cache | Boolean | Specifies whether to cache results, which can improve latency for repeated searches. Default is to use the `index.requests.cache.enable` setting for the index (which defaults to `true` for new indexes). +routing | String | Comma-separated custom routing values, for example, `"routing": "value1,value2,value3"`. + +## Example + +The following example `msearch/template` API request runs queries against a single index using multiple templates named `line_search_template` and `play_search_template`: + +### Request + +```json +GET _msearch/template +{"index":"shakespeare"} +{"id":"line_search_template","params":{"text_entry":"All the world's a stage","limit":false,"size":2}} +{"index":"shakespeare"} +{"id":"play_search_template","params":{"play_name":"Henry IV"}} +``` +{% include copy-curl.html %} + +### Response + +OpenSearch returns an array with the results of each search in the same order as in the multi-search template request: + +```json +{ + "took": 5, + "responses": [ + { + "took": 5, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 0, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + }, + { + "took": 3, + "timed_out": false, + "_shards": { + "total": 1, + "successful": 1, + "skipped": 0, + "failed": 0 + }, + "hits": { + "total": { + "value": 0, + "relation": "eq" + }, + "max_score": null, + "hits": [] + } + } + ] +} +``` diff --git a/_api-reference/multi-search.md b/_api-reference/multi-search.md index 19e6fc34fc..ff04b2d075 100644 --- a/_api-reference/multi-search.md +++ b/_api-reference/multi-search.md @@ -6,12 +6,13 @@ redirect_from: - /opensearch/rest-api/multi-search/ --- -# Multi-search +# Multi-search **Introduced 1.0** {: .label .label-purple } As the name suggests, the multi-search operation lets you bundle multiple search requests into a single request. OpenSearch then executes the searches in parallel, so you get back the response more quickly compared to sending one request per search. OpenSearch executes each search independently, so the failure of one doesn't affect the others. + ## Example ```json @@ -27,6 +28,8 @@ GET _msearch ## Path and HTTP methods +The Multi-search API uses the following paths: + ``` GET _msearch GET /_msearch @@ -34,7 +37,6 @@ POST _msearch POST /_msearch ``` - ## Request body The multi-search request body follows this pattern: diff --git a/_api-reference/security-apis.md b/_api-reference/security-apis.md new file mode 100644 index 0000000000..db3334fb0e --- /dev/null +++ b/_api-reference/security-apis.md @@ -0,0 +1,20 @@ +--- +layout: default +title: Security APIs +nav_order: 84 +--- + +# Security APIs + +Security APIs provide information that can be very useful in troubleshooting connection and configuration issues. + +API | Method | Description +:--- | :--- | :--- +`/_plugins/_security/whoami` | GET/POST | Returns basic details about the logged-in user. +`/_opendistro/_security/sslinfo` | GET | Returns details about the SSL connection when using certificate authentication. +`/_plugins/_security/api/permissionsinfo` | GET | Returns permission details for the logged-in user. +`/_plugins/_security/authinfo` | GET/POST | Returns the backend roles and OpenSearch roles mapped to the logged-in user. +`/_plugins/_security/api/ssl/certs` | GET | Displays the details and expiration dates of the certificates used on the OpenSearch HTTP and transport communication layers. Can only be called by users with the `superadmin` certificate. +`/_plugins/_security/api/ssl/transport/reloadcerts` | PUT | Reloads the certificates on the `transport` layer. For more information, see [Reload TLS certificates on the transport layer]({{site.url}}{{site.baseurl}}/security/configuration/tls/#reload-tls-certificates-on-the-transport-layer). +`/_plugins/_security/api/ssl/http/reloadcerts` | PUT | Reloads the certificates on the `http` layer. For more information, see [Reload TLS certificates on the http layer]({{site.url}}{{site.baseurl}}/security/configuration/tls/#reload-tls-certificates-on-the-http-layer). + diff --git a/_automating-configurations/api/delete-workflow.md b/_automating-configurations/api/delete-workflow.md index db3a340cee..13cd5ae5dc 100644 --- a/_automating-configurations/api/delete-workflow.md +++ b/_automating-configurations/api/delete-workflow.md @@ -7,9 +7,11 @@ nav_order: 80 # Delete a workflow -When you no longer need a workflow template, you can delete it by calling the Delete Workflow API. +When you no longer need a workflow template, you can delete it by calling the Delete Workflow API. -Note that deleting a workflow only deletes the stored template but does not deprovision its resources. +Note that deleting a workflow only deletes the stored template---it does not deprovision its resources. + +When a workflow is deleted, its corresponding status (returned by the [Workflow State API]({{site.url}}{{site.baseurl}}/automating-configurations/api/get-workflow-status/)) is also deleted unless either the provisioning status is `IN_PROGRESS` or resources have been provisioned. ## Path and HTTP methods @@ -25,13 +27,26 @@ The following table lists the available path parameters. | :--- | :--- | :--- | | `workflow_id` | String | The ID of the workflow to be retrieved. Required. | +## Query parameters + +The following table lists the available query parameters. All query parameters are optional. + +| Parameter | Data type | Description | +| :--- | :--- | :--- | +| `clear_status` | Boolean | Determines whether to delete the workflow state (without deprovisioning resources) after deleting the template. OpenSearch deletes the workflow state only if the provisioning status is not `IN_PROGRESS`. Default is `false`. | + #### Example request -``` +```json DELETE /_plugins/_flow_framework/workflow/8xL8bowB8y25Tqfenm50 ``` {% include copy-curl.html %} +```json +DELETE /_plugins/_flow_framework/workflow/8xL8bowB8y25Tqfenm50?clear_status=true +``` +{% include copy-curl.html %} + #### Example response If the workflow exists, a delete response contains the status of the deletion, where the `result` field is set to `deleted` on success or `not_found` if the workflow does not exist (it may have already been deleted): @@ -50,4 +65,4 @@ If the workflow exists, a delete response contains the status of the deletion, w "_seq_no": 2, "_primary_term": 1 } -``` \ No newline at end of file +``` diff --git a/_automating-configurations/workflow-steps.md b/_automating-configurations/workflow-steps.md index 2fba435ec7..43685a957a 100644 --- a/_automating-configurations/workflow-steps.md +++ b/_automating-configurations/workflow-steps.md @@ -42,6 +42,25 @@ The following table lists the workflow step types. The `user_inputs` fields for |`create_index`|[Create Index]({{site.url}}{{site.baseurl}}/api-reference/index-apis/create-index/) | Creates a new OpenSearch index. The inputs include `index_name`, which should be the name of the index to be created, and `configurations`, which contains the payload body of a regular REST request for creating an index. |`create_ingest_pipeline`|[Create Ingest Pipeline]({{site.url}}{{site.baseurl}}/ingest-pipelines/create-ingest/) | Creates or updates an ingest pipeline. The inputs include `pipeline_id`, which should be the ID of the pipeline, and `configurations`, which contains the payload body of a regular REST request for creating an ingest pipeline. |`create_search_pipeline`|[Create Search Pipeline]({{site.url}}{{site.baseurl}}/search-plugins/search-pipelines/creating-search-pipeline/) | Creates or updates a search pipeline. The inputs include `pipeline_id`, which should be the ID of the pipeline, and `configurations`, which contains the payload body of a regular REST request for creating a search pipeline. +|`reindex`|[Reindex]({{site.url}}{{site.baseurl}}/api-reference/document-apis/reindex/) | The reindex document API operation lets you copy all or a subset of your data from a source index into a destination index. The input includes source_index, destination_index, and the following optional parameters from the document reindex API: `refresh`, `requests_per_second`, `require_alias`, `slices`, and `max_docs`. For more information, see [Reindexing considerations](#reindexing-considerations). + +## Reindexing considerations + +Reindexing can be a resource-intensive operation, and if not managed properly, it can potentially destabilize your cluster. + +When using a `reindex` step, follow these best practices to ensure a smooth reindexing process and prevent cluster instability: + +- **Cluster scaling**: Before initiating a reindexing operation, ensure that your OpenSearch cluster is properly scaled to handle the additional workload. Increase the number of nodes and adjust resource allocation (CPU, memory, and disk) as needed to accommodate the reindexing process without impacting other operations. + +- **Request rate control**: Use the `requests_per_second` parameter to control the rate at which the reindexing requests are sent to the cluster. This helps to regulate the load on the cluster and prevent resource exhaustion. Start with a lower value and gradually increase it based on your cluster's capacity and performance. + +- **Slicing and parallelization**: The `slices` parameter allows you to divide the reindexing process into smaller, parallel tasks. This can help distribute the workload across multiple nodes and improve overall performance. However, be cautious when increasing the number of slices because adding slices can increase resource consumption. + +- **Monitoring and adjustments**: Closely monitor your cluster performance metrics (such as CPU, memory, disk usage, and thread pools) during the reindexing process. If you notice any signs of resource contention or performance degradation, adjust the reindexing parameters accordingly or consider pausing the operation until the cluster stabilizes. + +- **Prioritization and scheduling**: If possible, schedule reindexing operations during off-peak hours or periods of lower cluster utilization to minimize the impact on other operations and user traffic. + +By following these best practices and carefully managing the reindexing process, you can ensure that your OpenSearch cluster remains stable and performant while efficiently copying data between indexes. ## Additional fields diff --git a/_benchmark/reference/commands/command-flags.md b/_benchmark/reference/commands/command-flags.md index ca0606f07f..6520f80803 100644 --- a/_benchmark/reference/commands/command-flags.md +++ b/_benchmark/reference/commands/command-flags.md @@ -3,7 +3,8 @@ layout: default title: Command flags nav_order: 51 parent: Command reference -redirect_from: /benchmark/commands/command-flags/ +redirect_from: + - /benchmark/commands/command-flags/ grand_parent: OpenSearch Benchmark Reference --- diff --git a/_benchmark/reference/commands/compare.md b/_benchmark/reference/commands/compare.md index dc7ed2f558..a9eb8bcc10 100644 --- a/_benchmark/reference/commands/compare.md +++ b/_benchmark/reference/commands/compare.md @@ -4,7 +4,8 @@ title: compare nav_order: 55 parent: Command reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/commands/compare/ +redirect_from: + - /benchmark/commands/compare/ --- diff --git a/_benchmark/reference/commands/download.md b/_benchmark/reference/commands/download.md index 580e7ef84d..bc29dfd5f7 100644 --- a/_benchmark/reference/commands/download.md +++ b/_benchmark/reference/commands/download.md @@ -4,7 +4,8 @@ title: download nav_order: 60 parent: Command reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/commands/download/ +redirect_from: + - /benchmark/commands/download/ --- diff --git a/_benchmark/reference/commands/execute-test.md b/_benchmark/reference/commands/execute-test.md index d8a3e14dbd..82b677d900 100644 --- a/_benchmark/reference/commands/execute-test.md +++ b/_benchmark/reference/commands/execute-test.md @@ -4,7 +4,8 @@ title: execute-test nav_order: 65 parent: Command reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/commands/execute-test/ +redirect_from: + - /benchmark/commands/execute-test/ --- diff --git a/_benchmark/reference/commands/info.md b/_benchmark/reference/commands/info.md index c8c20ad110..3bfefabe99 100644 --- a/_benchmark/reference/commands/info.md +++ b/_benchmark/reference/commands/info.md @@ -4,7 +4,8 @@ title: info nav_order: 75 parent: Command reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/commands/info/ +redirect_from: + - /benchmark/commands/info/ --- diff --git a/_benchmark/reference/commands/list.md b/_benchmark/reference/commands/list.md index ed10f02a3b..1c51cfa27e 100644 --- a/_benchmark/reference/commands/list.md +++ b/_benchmark/reference/commands/list.md @@ -4,7 +4,8 @@ title: list nav_order: 80 parent: Command reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/commands/list/ +redirect_from: + - /benchmark/commands/list/ --- diff --git a/_benchmark/reference/index.md b/_benchmark/reference/index.md index bc7a423b49..4c71443259 100644 --- a/_benchmark/reference/index.md +++ b/_benchmark/reference/index.md @@ -3,6 +3,8 @@ layout: default title: OpenSearch Benchmark Reference nav_order: 25 has_children: true +redirect_from: + - /benchmark/commands/index/ --- # OpenSearch Benchmark Reference diff --git a/_benchmark/reference/metrics/metric-keys.md b/_benchmark/reference/metrics/metric-keys.md index aa45c9d93a..63ad9b6140 100644 --- a/_benchmark/reference/metrics/metric-keys.md +++ b/_benchmark/reference/metrics/metric-keys.md @@ -4,7 +4,8 @@ title: Metric keys nav_order: 35 parent: Metrics reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/metrics/metric-keys/ +redirect_from: + - /benchmark/metrics/metric-keys/ --- # Metric keys diff --git a/_benchmark/reference/metrics/metric-records.md b/_benchmark/reference/metrics/metric-records.md index 659f8b82fe..1809401783 100644 --- a/_benchmark/reference/metrics/metric-records.md +++ b/_benchmark/reference/metrics/metric-records.md @@ -4,7 +4,8 @@ title: Metric records nav_order: 30 parent: Metrics reference grand_parent: OpenSearch Benchmark Reference -redirect_from: /benchmark/metrics/metric-records/ +redirect_from: + - /benchmark/metrics/metric-records/ --- # Metric records diff --git a/_benchmark/reference/workloads/corpora.md b/_benchmark/reference/workloads/corpora.md index daa2ae6570..0e8d408e9a 100644 --- a/_benchmark/reference/workloads/corpora.md +++ b/_benchmark/reference/workloads/corpora.md @@ -4,7 +4,8 @@ title: corpora parent: Workload reference grand_parent: OpenSearch Benchmark Reference nav_order: 70 -redirect_from: /benchmark/workloads/corpora/ +redirect_from: + - /benchmark/workloads/corpora/ --- diff --git a/_benchmark/reference/workloads/indices.md b/_benchmark/reference/workloads/indices.md index daaf6101f3..1478179bae 100644 --- a/_benchmark/reference/workloads/indices.md +++ b/_benchmark/reference/workloads/indices.md @@ -4,7 +4,8 @@ title: indices parent: Workload reference grand_parent: OpenSearch Benchmark Reference nav_order: 65 -redirect_from: /benchmark/workloads/indices/ +redirect_from: + - /benchmark/workloads/indices/ --- diff --git a/_benchmark/user-guide/configuring-benchmark.md b/_benchmark/user-guide/configuring-benchmark.md index 4cbf223b39..2be467d587 100644 --- a/_benchmark/user-guide/configuring-benchmark.md +++ b/_benchmark/user-guide/configuring-benchmark.md @@ -3,7 +3,8 @@ layout: default title: Configuring OpenSearch Benchmark nav_order: 7 parent: User guide -redirect_from: /benchmark/configuring-benchmark/ +redirect_from: + - /benchmark/configuring-benchmark/ --- # Configuring OpenSearch Benchmark diff --git a/_benchmark/user-guide/creating-custom-workloads.md b/_benchmark/user-guide/creating-custom-workloads.md index d06610467f..ee0dca1ce9 100644 --- a/_benchmark/user-guide/creating-custom-workloads.md +++ b/_benchmark/user-guide/creating-custom-workloads.md @@ -4,6 +4,7 @@ title: Creating custom workloads nav_order: 10 parent: User guide redirect_from: + - /benchmark/user-guide/creating-custom-workloads/ - /benchmark/creating-custom-workloads/ - /benchmark/user-guide/creating-osb-workloads/ --- diff --git a/_benchmark/user-guide/installing-benchmark.md b/_benchmark/user-guide/installing-benchmark.md index d3387deefb..8383cfb2f9 100644 --- a/_benchmark/user-guide/installing-benchmark.md +++ b/_benchmark/user-guide/installing-benchmark.md @@ -3,7 +3,8 @@ layout: default title: Installing OpenSearch Benchmark nav_order: 5 parent: User guide -redirect_from: /benchmark/installing-benchmark/ +redirect_from: + - /benchmark/installing-benchmark/ --- # Installing OpenSearch Benchmark diff --git a/_dashboards/management/multi-data-sources.md b/_dashboards/management/multi-data-sources.md index cffc9e02f5..dc3096c251 100644 --- a/_dashboards/management/multi-data-sources.md +++ b/_dashboards/management/multi-data-sources.md @@ -7,15 +7,18 @@ redirect_from: - /dashboards/discover/multi-data-sources/ --- -# Configuring and using multiple data sources +# Configuring and using multiple data sources in OpenSearch Dashboards -You can ingest, process, and analyze data from multiple data sources in OpenSearch Dashboards. You configure the data sources in the **Dashboards Management** > **Data sources** app, as shown in the following image. +You can ingest, process, and analyze data from multiple data sources in OpenSearch Dashboards. You configure the data sources under **Dashboards Management** > **Data sources**. This interface is shown in the following image. -Dashboards Management Data sources main screen +Dashboards Management data sources main screen ## Getting started -The following tutorial guides you through configuring and using multiple data sources. +The following tutorial guides you through configuring and using multiple data sources in OpenSearch Dashboards. + +The following features are not supported when using multiple data sources: timeline visualization types and the `gantt-chart` plugin. +{: .note} ### Step 1: Modify the YAML file settings @@ -35,7 +38,7 @@ A data source connection specifies the parameters needed to connect to a data so To create a new data source connection: 1. From the OpenSearch Dashboards main menu, select **Dashboards Management** > **Data sources** > **Create data source connection**. - + 2. Add the required information to each field to configure the **Connection Details** and **Authentication Method**. - Under **Connection Details**, enter a title and endpoint URL. For this tutorial, use the URL `https://localhost:9200/`. Entering a description is optional. @@ -51,22 +54,22 @@ To create a new data source connection: - After you have entered the appropriate details in all of the required fields, the **Test connection** and **Create data source** buttons become active. You can select **Test connection** to confirm that the connection is valid. -3. Select **Create data source** to save your settings. The connection is created, and the new data source appears in the list on the **Data Sources** main page. The first data source you create is marked as your default. +3. Select **Create data source** to save your settings. The connection is created, and the new data source appears in the list on the **Data Sources** main page. The first data source you create is marked as your default. 4. Edit or update a data source connection. - On the **Data Sources** main page, select the connection you want to modify. The **Connection Details** window opens. - - To mark the selected data source as the default, select the **Set as default** option. + - To mark the selected data source as the default, select the **Set as default** option. - To make changes to **Connection Details**, edit one or both of the **Title** and **Description** fields and select **Save changes** in the lower-right corner of the screen. You can also cancel changes here. To change the **Authentication Method**, choose a different authentication method, enter your credentials (if applicable), and then select **Save changes** in the lower-right corner of the screen. The changes are saved. - + - When **Username & Password** is the selected authentication method, you can update the password by choosing **Update stored password** next to the **Password** field. In the pop-up window, enter a new password in the first field and then enter it again in the second field to confirm. Select **Update stored password** in the pop-up window. The new password is saved. Select **Test connection** to confirm that the connection is valid. - When **AWS SigV4** is the selected authentication method, you can update the credentials by selecting **Update stored AWS credential**. In the pop-up window, enter a new access key in the first field and a new secret key in the second field. Select **Update stored AWS credential** in the pop-up window. The new credentials are saved. Select **Test connection** in the upper-right corner of the screen to confirm that the connection is valid. 5. Delete the data source connection by selecting the check box to the left of the title and then choosing **Delete 1 connection**. Selecting multiple check boxes for multiple connections is supported. Alternatively, select the {::nomarkdown}trash can icon{:/} icon. -An example data source connection screen is shown in the following image. +A data source connection interface is shown in the following image. Data source connection screen @@ -93,13 +96,15 @@ To select a data source through the Dev Tools console, follow these steps: 5. From the **Data source** dropdown menu, select a data source and then query the source. 6. Repeat the preceding steps for each data source you want to select. -### Upload saved objects to a dashboard from connected data sources +--- + +## Uploading saved objects to a dashboard from connected data sources To upload saved objects from connected data sources to a dashboard with multiple data sources, export them as an NDJSON file from the data source's **Saved object management** page. Then upload the file to the dashboard's **Saved object management** page. This method can simplify the transfer of saved objects between dashboards. The following 20-second video shows this feature in action. Multiple data sources in Saved object management{: .img-fluid} -#### Import saved objects from a connected data source +### Importing saved objects from a connected data source Follow these steps to import saved objects from a connected data source: @@ -109,11 +114,13 @@ Follow these steps to import saved objects from a connected data source: 4. Select **Import** > **Select file** and upload the file acquired from the connected data source. 5. Choose the appropriate **Data source** from the dropdown menu, set your **Conflict management** option, and then select the **Import** button. -### Show or hide authentication methods for multiple data sources +--- + +## Showing or hiding authentication methods Introduced 2.13 {: .label .label-purple } -A feature flag in your `opensearch_dashboards.yml` file allows you to show or hide authentication methods within the `data_source` plugin. The following example setting, shown in a 10-second demo, hides the authentication method for `AWSSigV4`. +A feature flag in your `opensearch_dashboards.yml` file allows you to show or hide authentication methods within the `data_source` plugin. The following setting hides the authentication method for `AWSSigV4`. ```` # Set enabled to false to hide the authentication method from multiple data source in OpenSearch Dashboards. @@ -128,89 +135,212 @@ data_source.authTypes: enabled: false ```` +The following demo shows this process. + Multiple data sources hide and show authentication{: .img-fluid} -### Hide the local cluster option for multiple data sources +## Showing or hiding the local cluster Introduced 2.13 {: .label .label-purple } -A feature flag in your `opensearch_dashboards.yml` file allows you to hide the local cluster option within the `data_source` plugin. This option hides the local cluster from the data source dropdown menu and index creation page, which is ideal for environments with or without a local OpenSearch cluster. The following example setting, shown in a 20-second demo, hides the local cluster. +A feature flag in your `opensearch_dashboards.yml` file allows you to hide the local cluster option within the `data_source` plugin. This option hides the local cluster from the data source dropdown menu and index creation page, which is ideal for environments with or without a local OpenSearch cluster. The following example setting, shown in a 20-second demo, hides the local cluster: ```` -# hide local cluster in the data source dropdown and index pattern creation page. +# hide local cluster in the data source dropdown and index pattern creation page. data_source.hideLocalCluster: true ```` +The following demo shows this process. + Multiple data sources hide local cluster{: .img-fluid} +--- + ## Using multiple data sources with external dashboards plugins Introduced 2.14 -{: .label .label-purple } +{: .label .label-purple} -The following plugins now support multiple data sources +The following plugins now support multiple data sources. ### Index management -When the data source feature is enabled, you can navigate to **Index Management** under the **Management** menu. Using indexes as an example, you can view all connected data sources and select a specific one from the navigation bar on the upper right. By default, the indexes from the designated default data source are displayed. However, you can select any connected data source to view its corresponding indexes. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can view and select data sources and their associated indexes directly from the interface: + +1. Navigate to **Management** > **Index Management** under the main menu. +2. Select **Indexes** from the sidebar menu and then select the {::nomarkdown}database icon{:/} icon on the upper-right menu bar. +3. Choose the appropriate data source from the dropdown menu and then choose the appropriate index from the list. By default, the indexes from your default data source are displayed. You can choose any connected data source to view its corresponding indexes. + +The following GIF illustrates these steps. Multiple data sources in ISM list page -To perform operations on a specific index within a data source, select the individual index from the list. To create a new index, select the **Create Index** button, which opens a form. Fill in the required information and select the **Create** button. The index is created within the selected data source. The following GIF illustrates these steps. +To perform operations on a specific index within a data source, select the individual index from the list. To create a new index, select the **Create Index** button, which opens a form. Enter the required information and select the **Create** button. The index is created within the selected data source. The following GIF illustrates these steps. Multiple data sources in ISM create page ### Anomaly detection -When the data source feature is enabled, you can navigate to **Anomaly Detection** under the **OpenSearch Plugins** menu. On the navigation bar on the upper right, you can view all connected data sources and select a specific data source to view the dashboard from that source if it has detectors. If the selected data source does not have any detectors, the page prompts you to **Create detector**. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can create or view detectors associated with a data source: + +1. Navigate to **OpenSearch Plugins** > **Anomaly Detection** under the main menu. +2. Select the database icon on the upper-right menu bar to view a list of connected data sources. +3. Select a data source to view a list of associated detectors. If the selected data source does not have detectors, then the **Create detector** button appears under the upper-right menu bar. See [Creating anomaly detectors]({{site.url}}{{site.baseurl}}/observing-your-data/ad/dashboards-anomaly-detection/#creating-anomaly-detectors) for instructions on creating detectors through the interface. + +The following GIF illustrates these steps. Multiple data sources in Anomaly Detection dashboard page -When you select **Detectors** from the side bar, the page displays the detectors currently configured for the selected data source. You can view and configure individual detectors by selecting them from the list. The following GIF illustrates these steps. +You can edit the data source's associated detectors on the **Detectors** tab under the left side bar. + +1. Select **Detectors** and then select the {::nomarkdown}database icon{:/} icon on the upper-right menu bar. +2. From the dropdown menu, select the appropriate data source. A list of associated detectors appears. +3. Choose a detector from the list, select **Actions**, and then choose the appropriate edit option from the dropdown menu. +4. Enter the applicable settings and configuration details. + +The following GIF illustrates these steps. Multiple data sources in Anomaly Detection detector page ### Security -When the data source feature is enabled, you can navigate to **Security** under the **Management** menu. Using role management as an example, you can view all connected data sources in the navigation bar on the upper right and select a specific data source to view its existing roles. To create a new role, select the **Create role** button, which takes you to a new page. Enter the required information and select **Create** to add the new role to the selected data source. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can view and manage roles for each connected data source: + +1. Navigate to **Management** > **Security** under the main menu. +2. Select **Roles** from the left sidebar menu and then select the {::nomarkdown}database icon{:/} icon on the upper-right menu bar. +3. From the dropdown menu, select the appropriate data source and then select the **Create role** button to add a new role. +4. Enter the required configuration information and select the **Create** button to save. + +The following GIF illustrates these steps. Multiple data sources in Security plugin ### Maps -When the data source feature is enabled, you can navigate to **Maps** under the **OpenSearch Plugins** menu. To edit an existing map, select it from the maps list page, which opens the edit page. On the edit page, you can view all available data sources and the ones currently used in the map. To add a new layer, select **Add layer**, and then select **Documents** from the prompt, which opens a flyout. In the flyout, select the index pattern and geospatial field. Note that the data source name is prefixed to the index pattern name. After selecting **Update**, the new layer is added. Select the {::nomarkdown}database icon{:/} icon to verify that a new data source is now being used in the map. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can view all available data sources, including the ones currently used as layers, in a map: + +1. Navigate to **OpenSearch Plugins** > **Maps** under the main menu. +2. From the dropdown menu, select the appropriate data source to edit or create an associated map layer: + - Edit a map layer by selecting one from the **Layers** dropdown menu. In the pop-up window, view the settings and edit them as needed. + - Add a new layer by selecting the **Add layer** button from the dropdown menu and then selecting **Documents** in the pop-up window. Another pop-up window appears on the right. Enter the required information on the **Data** tab. Note that the data source name is prefixed to the index pattern name. The **Style** and **Settings** tabs include optional information. + - Select **Update** to save the settings. +3. Select the **Save** button on the menu bar to save the edited or new layer. +4. Select the {::nomarkdown}database icon{:/} icon on the upper-right menu bar to verify that the new data source is listed in the dropdown menu. + +The following GIF illustrates these steps. Multiple data sources in Maps plugin ### Machine learning -When the data source feature is enabled, you can navigate to **Machine Learning** under the **OpenSearch Plugins** menu. Initially, the models within the default data source are displayed. To view models from a different data source, switch to that data source from the navigation bar. To inspect the details of a specific model, select the {::nomarkdown}inspect icon{:/} icon to the right of the model entry. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can view and manage machine learning models from different connected data sources: + +1. Navigate to **OpenSearch Plugins** > **Machine Learning** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. A list of models associated with the selected data source is displayed. +3. Select the {::nomarkdown}inspect icon{:/} icon to the right of a listed model to view the model's configuration details for the selected data source. + +The following GIF illustrates these steps. Multiple data sources in Machine Learning Plugin ### Notifications -When the data source feature is enabled, you can navigate to **Notifications** under the **Management** menu. The page displays the notification channels configured for the currently selected data source. To view channels from a different data source, select the desired data source from the menu. To view or edit the details of an existing channel, select it from the list, which opens the channel details page. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can view and manage notification channels for different data sources: + +1. Navigate to **Management** > **Notifications** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. A list of channels associated with the selected data source is displayed. +3. Choose a channel from the list to view or manage its settings. + - Edit the channel's settings by selecting the **Actions** button and choosing the **Edit** option. Enter the required information in the **Edit channel** panel and then choose **Save**. + - Send a test message to the channel by selecting the **Send test message** button in the **Edit channel** window. Alternatively, you can select the **Actions** button in the channel details window and then choose the **Send test message** option from the dropdown menu. + +The following GIF illustrates these steps. Multiple data sources in Notification plugin ### Search relevance -When the data source feature is enabled, you can navigate to **Search Relevance** under the **OpenSearch Plugins** menu. On the navigation bar on the upper right, you can view all available data sources. To compare search results between indexes from different data sources, first select a data source and an index for **Query 1**, and then select a data source and an index for **Query 2**. Select **Search** to run the queries. The following GIF illustrates these steps. +When you set `data_source.enabled:true`, you can compare search results across indexes from different data sources: + +1. Navigate to **OpenSearch Plugins** > **Search Relevance** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. A list of available data sources is displayed. +3. Under both **Query 1** and **Query 2**, select a data source and an index. +4. Select the **Search** button to run the queries. The query results are displayed in their respective results panels. + +The following GIF illustrates these steps. Multiple data sources in Search Relevance plugin -## Next steps +### Security analytics +Introduced 2.15 +{: .label .label-purple} + +When you set `data_source.enabled:true`, you can view and manage security analytics resources, such as detection rules, across multiple connected data sources: + +1. Navigate to **OpenSearch Plugins** > **Security analytics** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. +3. Select **Dectectors** > **Detection rules** from the navigation menu on the left. A list of detection rules is displayed. +4. Select a rule to open a pop-up window containing more information about that rule. + +The following GIF illustrates these steps. + +Multiple data sources in Security analytics list page + +1. Navigate to **OpenSearch Plugins** > **Security analytics** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. +3. Select **Dectectors** > **Detection rules** from the navigation menu on the left. +4. Select the **Create detection rule** button on the upper right and then enter the required configuration details in the **Create detection rule** window. +5. Select the **Create detection rule** button on the lower right to save the rule. The rule is now associated with the data source. + +The following GIF illustrates these steps. + +Multiple data sources in Security analytics create page + +### Alerting +Introduced 2.15 +{: .label .label-purple } + +When you set `data_source.enabled:true`, you can you can view and manage alerting monitors across multiple connected data sources: + +1. Navigate to **OpenSearch Plugins** > **Alerting** under the main menu. +2. Select the {::nomarkdown}database icon{:/} icon and choose a data source from the dropdown menu. A list of associated monitors is displayed. +3. Select a monitor to view its details. -After configuring multiple data sources, you can analyze the data from each source. Refer to the following resources for more information: +The following GIF illustrates these steps. -- Learn about [managing index patterns]({{site.url}}{{site.baseurl}}/dashboards/management/index-patterns/) through OpenSearch Dashboards. -- Learn about [indexing data using Index Management]({{site.url}}{{site.baseurl}}/dashboards/im-dashboards/index/) through OpenSearch Dashboards. -- Learn about how to [connect OpenSearch and Amazon S3 through OpenSearch Dashboards]({{site.url}}{{site.baseurl}}/dashboards/management/S3-data-source/). -- Learn about the [Integrations tool]({{site.url}}{{site.baseurl}}/integrations/index/), which gives you the flexibility to use various data ingestion methods and connect data from the Dashboards UI. +Multiple data sources in Alerting list page + +To create a new monitor, select **Create monitor**. Fill out the form and select **Create**. The monitor is created within the selected data source. + +#### Managing alerting monitors from within the Dashboards application + +To manage data source monitors from within **Dashboards**: + +1. Navigate to the **Dashboards** application under the main menu and then select a dashboard from the list. +2. From the dashboard, select the {::nomarkdown}ellipsis icon{:/} icon to open the **Options** dropdown menu and then choose **Alerting**. +4. From the **Alerting** dropdown menu, choose **Associated monitors** to open the configuration window. +5. Select a monitor from the list to view or edit its details. + +The following GIF illustrates these steps. + +Multiple data sources with Feature anywhere associated monitor + +To associate a monitor with a data source: + +1. Navigate to the **Dashboards** application under the main menu and then select a dashboard from the list. +2. From the dashboard, select the {::nomarkdown}ellipsis icon{:/} icon to open the **Options** dropdown menu and then choose **Alerting**. +3. From the **Alerting** dropdown menu, choose **Add alerting monitor** to open the configuration window. +4. Enter the configuration information and then select the **Create monitor** button. The monitor is now associated with the data source. + +The following GIF illustrates these steps. + +Multiple data sources with Feature anywhere add associated monitor + +--- -## Limitations +## Next steps -The following features are not supported when using multiple data sources: +After configuring multiple data sources, you can analyze the data from each source. See the following resources for more information: -* Timeline visualization types -* Some external plugins, such as the `gantt-chart` plugin +- [Index patterns]({{site.url}}{{site.baseurl}}/dashboards/management/index-patterns/) +- [Index Management]({{site.url}}{{site.baseurl}}/dashboards/im-dashboards/index/) +- [Connecting OpenSearch and Amazon S3 through OpenSearch Dashboards]({{site.url}}{{site.baseurl}}/dashboards/management/S3-data-source/) +- [OpenSearch Integrations]({{site.url}}{{site.baseurl}}/integrations/index/) diff --git a/_dashboards/quickstart.md b/_dashboards/quickstart.md index 358efe4003..82a2b446a5 100644 --- a/_dashboards/quickstart.md +++ b/_dashboards/quickstart.md @@ -4,7 +4,8 @@ title: OpenSearch Dashboards quickstart guide nav_order: 2 has_children: false redirect_from: - - /dashboards/quickstart-dashboards/ + - /dashboards/get-started/quickstart-dashboards/ + - /dashboards/quickstart-dashboards/ --- # OpenSearch Dashboards quickstart guide diff --git a/_dashboards/visualize/maps.md b/_dashboards/visualize/maps.md index 8a4196e483..23e14d41c3 100644 --- a/_dashboards/visualize/maps.md +++ b/_dashboards/visualize/maps.md @@ -5,6 +5,8 @@ grand_parent: Building data visualizations parent: Using coordinate and region maps nav_order: 10 redirect_from: + - /dashboards/maps-plugin/ + - /dashboards/visualize/maps/ - /dashboards/maps/ --- diff --git a/_data-prepper/migrate-open-distro.md b/_data-prepper/migrate-open-distro.md index e7fdacbd8f..8b3e7a7198 100644 --- a/_data-prepper/migrate-open-distro.md +++ b/_data-prepper/migrate-open-distro.md @@ -2,6 +2,8 @@ layout: default title: Migrating from Open Distro nav_order: 30 +redirect_from: + - /clients/data-prepper/migrate-open-distro/ --- # Migrating from Open Distro diff --git a/_data-prepper/migrating-from-logstash-data-prepper.md b/_data-prepper/migrating-from-logstash-data-prepper.md index f87ca8d6be..3d87f29517 100644 --- a/_data-prepper/migrating-from-logstash-data-prepper.md +++ b/_data-prepper/migrating-from-logstash-data-prepper.md @@ -3,6 +3,7 @@ layout: default title: Migrating from Logstash nav_order: 25 redirect_from: + - /clients/data-prepper/configure-logstash-data-prepper/ - /data-prepper/configure-logstash-data-prepper/ --- diff --git a/_data-prepper/pipelines/configuration/processors/obfuscate.md b/_data-prepper/pipelines/configuration/processors/obfuscate.md index 13d906acb3..8d6bf901da 100644 --- a/_data-prepper/pipelines/configuration/processors/obfuscate.md +++ b/_data-prepper/pipelines/configuration/processors/obfuscate.md @@ -67,6 +67,7 @@ Use the following configuration options with the `obfuscate` processor. | `source` | Yes | The source field to obfuscate. | | `target` | No | The new field in which to store the obfuscated value. This leaves the original source field unchanged. When no `target` is provided, the source field updates with the obfuscated value. | | `patterns` | No | A list of regex patterns that allow you to obfuscate specific parts of a field. Only parts that match the regex pattern will obfuscate. When not provided, the processor obfuscates the whole field. | +| `single_word_only` | No | When set to `true`, a word boundary `\b` is added to the pattern, which causes obfuscation to be applied only to words that are standalone in the input text. By default, it is `false`, meaning obfuscation patterns are applied to all occurrences. Can be used for Data Prepper 2.8 or greater. | `obfuscate_when` | No | Specifies under what condition the Obfuscate processor should perform matching. Default is no condition. | | `tags_on_match_failure` | No | The tag to add to an event if the obfuscate processor fails to match the pattern. | | `action` | No | The obfuscation action. As of Data Prepper 2.3, only the `mask` action is supported. | diff --git a/_data-prepper/pipelines/configuration/sinks/opensearch.md b/_data-prepper/pipelines/configuration/sinks/opensearch.md index c93f4708d1..b1c32f0005 100644 --- a/_data-prepper/pipelines/configuration/sinks/opensearch.md +++ b/_data-prepper/pipelines/configuration/sinks/opensearch.md @@ -65,7 +65,7 @@ Option | Required | Type | Description `connect_timeout` | No | Integer| The timeout value, in milliseconds, when requesting a connection from the connection manager. A timeout value of `0` is interpreted as an infinite timeout. If this timeout value is negative or not set, the underlying Apache HttpClient will rely on operating system settings to manage connection timeouts. `insecure` | No | Boolean | Whether or not to verify SSL certificates. If set to `true`, then certificate authority (CA) certificate verification is disabled and insecure HTTP requests are sent instead. Default is `false`. `proxy` | No | String | The address of the [forward HTTP proxy server](https://en.wikipedia.org/wiki/Proxy_server). The format is `"<hostname or IP>:<port>"` (for example, `"example.com:8100"`, `"http://example.com:8100"`, `"112.112.112.112:8100"`). The port number cannot be omitted. -`index` | Conditionally | String | The name of the export index. Only required when the `index_type` is `custom`. The index can be a plain string, such as `my-index-name`, contain [Java date-time patterns](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html), such as `my-index-${yyyy.MM.dd}` or `my-${yyyy-MM-dd-HH}-index`, be formatted using field values, such as `my-index-${/my_field}`, or use [Data Prepper expressions](https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/), such as `my-index-${getMetadata(\"my_metadata_field\"}`. All formatting options can be combined to provide flexibility when creating static, dynamic, and rolling indexes. +`index` | Conditionally | String | The name of the export index. Only required when the `index_type` is `custom`. The index can be a plain string, such as `my-index-name`, contain [Java date-time patterns](https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html), such as `my-index-%{yyyy.MM.dd}` or `my-%{yyyy-MM-dd-HH}-index`, be formatted using field values, such as `my-index-${/my_field}`, or use [Data Prepper expressions](https://opensearch.org/docs/latest/data-prepper/pipelines/expression-syntax/), such as `my-index-${getMetadata(\"my_metadata_field\"}`. All formatting options can be combined to provide flexibility when creating static, dynamic, and rolling indexes. `index_type` | No | String | Tells the sink plugin what type of data it is handling. Valid values are `custom`, `trace-analytics-raw`, `trace-analytics-service-map`, or `management-disabled`. Default is `custom`. `template_type` | No | String | Defines what type of OpenSearch template to use. Available options are `v1` and `index-template`. The default value is `v1`, which uses the original OpenSearch templates available at the `_template` API endpoints. The `index-template` option uses composable [index templates]({{site.url}}{{site.baseurl}}/opensearch/index-templates/), which are available through the OpenSearch `_index_template` API. Composable index types offer more flexibility than the default and are necessary when an OpenSearch cluster contains existing index templates. Composable templates are available for all versions of OpenSearch and some later versions of Elasticsearch. When `distribution_version` is set to `es6`, Data Prepper enforces the `template_type` as `v1`. `template_file` | No | String | The path to a JSON [index template]({{site.url}}{{site.baseurl}}/opensearch/index-templates/) file, such as `/your/local/template-file.json`, when `index_type` is set to `custom`. For an example template file, see [otel-v1-apm-span-index-template.json](https://github.com/opensearch-project/data-prepper/blob/main/data-prepper-plugins/opensearch/src/main/resources/otel-v1-apm-span-index-template.json). If you supply a template file, then it must match the template format specified by the `template_type` parameter. diff --git a/_data-prepper/pipelines/configuration/sources/documentdb.md b/_data-prepper/pipelines/configuration/sources/documentdb.md index 4074680ab5..c453b60a39 100644 --- a/_data-prepper/pipelines/configuration/sources/documentdb.md +++ b/_data-prepper/pipelines/configuration/sources/documentdb.md @@ -25,8 +25,8 @@ documentdb-pipeline: host: "docdb-mycluster.cluster-random.us-west-2.docdb.amazonaws.com" port: 27017 authentication: - username: ${{aws_secrets:secret:username}} - password: ${{aws_secrets:secret:password}} + {% raw %}username: ${{aws_secrets:secret:username}} + password: ${{aws_secrets:secret:password}}{% endraw %} aws: sts_role_arn: "arn:aws:iam::123456789012:role/MyRole" s3_bucket: my-bucket diff --git a/_getting-started/quickstart.md b/_getting-started/quickstart.md index 62fe92be34..78104b1913 100644 --- a/_getting-started/quickstart.md +++ b/_getting-started/quickstart.md @@ -3,6 +3,7 @@ layout: default title: Installation quickstart nav_order: 3 redirect_from: + - /about/quickstart/ - /opensearch/install/quickstart/ - /quickstart/ --- diff --git a/_im-plugin/data-streams.md b/_im-plugin/data-streams.md index d59526b7b8..c2b1449afc 100644 --- a/_im-plugin/data-streams.md +++ b/_im-plugin/data-streams.md @@ -2,6 +2,8 @@ layout: default title: Data streams nav_order: 13 +redirect_from: + - /opensearch/data-streams/ --- # Data streams diff --git a/_ingest-pipelines/processors/set.md b/_ingest-pipelines/processors/set.md new file mode 100644 index 0000000000..1abf9775b9 --- /dev/null +++ b/_ingest-pipelines/processors/set.md @@ -0,0 +1,155 @@ +--- +layout: default +title: Set +parent: Ingest processors +nav_order: 240 +--- + +# Set processor + +The `set` processor adds or updates fields in a document. It sets one field and associates it with the specified value. If the field already exists, then its value is replaced with the provided one unless the `override` parameter is set to `false`. When `override` is `false` and the specified field exists, the value of the field remains unchanged. + +The following is the syntax for the `set` processor: + +```json +{ + "description": "...", + "processors": [ + { + "set": { + "field": "new_field", + "value": "some_value" + } + } + ] +} +``` +{% include copy-curl.html %} + +## Configuration parameters + +The following table lists the required and optional parameters for the `set` processor. + +Parameter | Required/Optional | Description | +|-----------|-----------|-----------| +`field` | Required | The name of the field to be set or updated. Supports [template snippets]({{site.url}}{{site.baseurl}}/ingest-pipelines/create-ingest/#template-snippets). +`value` | Required | The value assigned to the field. Supports [template snippets]({{site.url}}{{site.baseurl}}/ingest-pipelines/create-ingest/#template-snippets). +`override` | Optional | A Boolean flag that determines whether the processor should override the existing value of the field. +`ignore_empty_value` | Optional | A Boolean flag that determines whether the processor should ignore `null` values or empty strings. Default is `false`. +`description` | Optional | A description of the processor's purpose or configuration. +`if` | Optional | Specifies to conditionally execute the processor. +`ignore_failure` | Optional | Specifies to ignore processor failures. See [Handling pipeline failures]({{site.url}}{{site.baseurl}}/ingest-pipelines/pipeline-failures/). +`on_failure` | Optional | Specifies a list of processors to run if the processor fails during execution. These processors are executed in the order they are specified. +`tag` | Optional | An identifier tag for the processor. Useful for debugging in order to distinguish between processors of the same type. + +## Using the processor + +Follow these steps to use the processor in a pipeline. + +### Step 1: Create a pipeline + +The following query creates a pipeline named `set-pipeline` that uses the `set` processor to add a new field `new_field` with the value `some_value` to the document: + +```json +PUT _ingest/pipeline/set-pipeline +{ + "description": "Adds a new field 'new_field' with the value 'some_value'", + "processors": [ + { + "set": { + "field": "new_field", + "value": "some_value" + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 2 (Optional): Test the pipeline + +It is recommended that you test your pipeline before you ingest documents. +{: .tip} + +To test the pipeline, run the following query: + +```json +POST _ingest/pipeline/set-pipeline/_simulate +{ + "docs": [ + { + "_source": { + "existing_field": "value" + } + } + ] +} +``` +{% include copy-curl.html %} + +#### Response + +The following example response confirms that the pipeline is working as expected: + +```json +{ + "docs": [ + { + "doc": { + "_index": "_index", + "_id": "_id", + "_source": { + "existing_field": "value", + "new_field": "some_value" + }, + "_ingest": { + "timestamp": "2024-05-30T21:56:15.066180712Z" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 3: Ingest a document + +The following query ingests a document into an index named `testindex1`: + +```json +POST testindex1/_doc?pipeline=set-pipeline +{ + "existing_field": "value" +} +``` +{% include copy-curl.html %} + +#### Response + +The request indexes the document into the index `testindex1` and then indexes all documents with the `new_field` set to `some_value`, as shown in the following response: + +```json +{ + "_index": "testindex1", + "_id": "1", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 0, + "_primary_term": 1 +} +``` +{% include copy-curl.html %} + +### Step 4 (Optional): Retrieve the document + +To retrieve the document, run the following query: + +```json +GET testindex1/_doc/1 +``` +{% include copy-curl.html %} diff --git a/_ingest-pipelines/processors/sort.md b/_ingest-pipelines/processors/sort.md new file mode 100644 index 0000000000..7f1377bc32 --- /dev/null +++ b/_ingest-pipelines/processors/sort.md @@ -0,0 +1,176 @@ +--- +layout: default +title: Sort +parent: Ingest processors +nav_order: 250 +--- + +# Sort processor + +The `sort` processor sorts an array of items in either ascending or descending order. Numeric arrays are sorted numerically, while string or mixed arrays (strings and numbers) are sorted lexicographically. The processor throws an error if the input is not an array. + +The following is the syntax for the `sort` processor: + +```json +{ + "description": "Sort an array of items", + "processors": [ + { + "sort": { + "field": "my_array_field", + "order": "desc" + } + } + ] +} +``` +{% include copy-curl.html %} + +## Configuration parameters + +The following table lists the required and optional parameters for the `sort` processor. + +| Parameter | Required/Optional | Description | +|---|---|---| +`field` | Required | The field to be sorted. Must be an array. +`order` | Optional | The sort order to apply. Accepts `asc` for ascending or `desc` for descending. Default is `asc`. +`target_field` | Optional | The name of the field in which the sorted array is stored. If not specified, then the sorted array is stored in the same field as the original array (the `field` variable). +`description` | Optional | A description of the processor's purpose or configuration. +`if` | Optional | Specifies to conditionally execute the processor. +`ignore_failure` | Optional | Specifies to ignore processor failures. See [Handling pipeline failures]({{site.url}}{{site.baseurl}}/ingest-pipelines/pipeline-failures/). +`on_failure` | Optional | Specifies a list of processors to run if the processor fails during execution. These processors are executed in the order they are specified. +`tag` | Optional | An identifier tag for the processor. Useful for debugging in order to distinguish between processors of the same type. + +## Using the processor + +Follow these steps to use the processor in a pipeline. + +### Step 1: Create a pipeline + +The following query creates a pipeline named `sort-pipeline` that uses the `sort` processor to sort the `my_field` in descending order and store the sorted values in the `sorted_field`: + +```json +PUT _ingest/pipeline/sort-pipeline +{ + "description": "Sort an array of items in descending order", + "processors": [ + { + "sort": { + "field": "my_array_field", + "order": "desc", + "target_field": "sorted_array" + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 2 (Optional): Test the pipeline + +It is recommended that you test your pipeline before you ingest documents. +{: .tip} + +To test the pipeline, run the following query: + +```json +POST _ingest/pipeline/sort-pipeline/_simulate +{ + "docs": [ + { + "_source": { + "my_array_field": [3, 1, 4, 1, 5, 9, 2, 6, 5] + } + } + ] +} +``` +{% include copy-curl.html %} + +#### Response + +The following example response confirms that the pipeline is working as expected: + +```json +{ + "docs": [ + { + "doc": { + "_index": "_index", + "_id": "_id", + "_source": { + "sorted_array": [ + 9, + 6, + 5, + 5, + 4, + 3, + 2, + 1, + 1 + ], + "my_array_field": [ + 3, + 1, + 4, + 1, + 5, + 9, + 2, + 6, + 5 + ] + }, + "_ingest": { + "timestamp": "2024-05-30T22:10:13.405692128Z" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 3: Ingest a document + +The following query ingests a document into an index named `testindex1`: + +```json +POST testindex1/_doc?pipeline=sort-pipeline +{ + "my_array_field": [3, 1, 4, 1, 5, 9, 2, 6, 5] +} +``` +{% include copy-curl.html %} + +#### Response + +The request indexes the document into the index `testindex1` and then indexes all documents with the `my_array_field` sorted in descending order, as shown in the following response: + +```json +{ + "_index": "testindex1", + "_id": "no-Py48BwFahnwl9KZzf", + "_version": 1, + "result": "created", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 9, + "_primary_term": 2 +} +``` +{% include copy-curl.html %} + +### Step 4 (Optional): Retrieve the document + +To retrieve the document, run the following query: + +```json +GET testindex1/_doc/no-Py48BwFahnwl9KZzf +``` +{% include copy-curl.html %} + diff --git a/_ingest-pipelines/processors/split.md b/_ingest-pipelines/processors/split.md new file mode 100644 index 0000000000..2052c3def1 --- /dev/null +++ b/_ingest-pipelines/processors/split.md @@ -0,0 +1,180 @@ +--- +layout: default +title: Split +parent: Ingest processors +nav_order: 270 +--- + +# Split processor + +The `split` processor is used to split a string field into an array of substrings based on a specified delimiter. + +The following is the syntax for the `split` processor: + +```json +{ + "split": { + "field": "field_to_split", + "separator": "", + "target_field": "split_field" + } +} +``` +{% include copy-curl.html %} + +## Configuration parameters + +The following table lists the required and optional parameters for the `split` processor. + +Parameter | Required/Optional | Description | +|-----------|-----------|-----------| +`field` | Required | The field containing the string to be split. +`separator` | Required | The delimiter used to split the string. This can be a regular expression pattern. +`preserve_field` | Optional | If set to `true`, preserves empty trailing fields (for example, `''`) in the resulting array. If set to `false`, empty trailing fields are removed from the resulting array. Default is `false`. +`target_field` | Optional | The field where the array of substrings is stored. If not specified, then the field is updated in-place. +`ignore_missing` | Optional | Specifies whether the processor should ignore documents that do not contain the specified +field. If set to `true`, then the processor ignores missing values in the field and leaves the `target_field` unchanged. Default is `false`. +`description` | Optional | A brief description of the processor. +`if` | Optional | A condition for running the processor. +`ignore_failure` | Optional | Specifies whether the processor continues execution even if it encounters an error. If set to `true`, then failures are ignored. Default is `false`. +`on_failure` | Optional | A list of processors to run if the processor fails. +`tag` | Optional | An identifier tag for the processor. Useful for debugging in order to distinguish between processors of the same type. + +## Using the processor + +Follow these steps to use the processor in a pipeline. + +### Step 1: Create a pipeline + +The following query creates a pipeline named `split_pipeline` that uses the `split` processor to split the `log_message` field on the comma character and store the resulting array in the `log_parts` field: + +```json +PUT _ingest/pipeline/split_pipeline +{ + "description": "Split log messages by comma", + "processors": [ + { + "split": { + "field": "log_message", + "separator": ",", + "target_field": "log_parts" + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 2 (Optional): Test the pipeline + +It is recommended that you test your pipeline before you ingest documents. +{: .tip} + +To test the pipeline, run the following query: + +```json +POST _ingest/pipeline/split_pipeline/_simulate +{ + "docs": [ + { + "_source": { + "log_message": "error,warning,info" + } + } + ] +} +``` +{% include copy-curl.html %} + +#### Response + +The following example response confirms that the pipeline is working as expected: + +```json +{ + "docs": [ + { + "doc": { + "_index": "_index", + "_id": "_id", + "_source": { + "log_message": "error,warning,info", + "log_parts": [ + "error", + "warning", + "info" + ] + }, + "_ingest": { + "timestamp": "2024-04-26T22:29:23.207849376Z" + } + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 3: Ingest a document + +The following query ingests a document into an index named `testindex1`: + +```json +PUT testindex1/_doc/1?pipeline=split_pipeline +{ + "log_message": "error,warning,info" +} +``` +{% include copy-curl.html %} + +#### Response + +The request indexes the document into the index `testindex1` and splits the `log_message` field on the comma delimiter before indexing, as shown in the following response: + +```json +{ + "_index": "testindex1", + "_id": "1", + "_version": 70, + "result": "updated", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 72, + "_primary_term": 47 +} +``` + +### Step 4 (Optional): Retrieve the document + +To retrieve the document, run the following query: + +```json +GET testindex1/_doc/1 +``` +{% include copy-curl.html %} + +#### Response + +The response shows the `log_message` field as an array of values split on the comma delimiter: + +```json +{ + "_index": "testindex1", + "_id": "1", + "_version": 70, + "_seq_no": 72, + "_primary_term": 47, + "found": true, + "_source": { + "log_message": "error,warning,info", + "log_parts": [ + "error", + "warning", + "info" + ] + } +} +``` diff --git a/_ingest-pipelines/processors/trim.md b/_ingest-pipelines/processors/trim.md new file mode 100644 index 0000000000..9c1999aeb2 --- /dev/null +++ b/_ingest-pipelines/processors/trim.md @@ -0,0 +1,165 @@ +--- +layout: default +title: Trim +parent: Ingest processors +nav_order: 300 +--- + +# Trim processor + +The `trim` processor is used to remove leading and trailing white space characters from a specified field. + +The following is the syntax for the `trim` processor: + +```json +{ + "trim": { + "field": "field_to_trim", + "target_field": "trimmed_field" + } +} +``` +{% include copy-curl.html %} + +## Configuration parameters + +The following table lists the required and optional parameters for the `trim` processor. + +Parameter | Required/Optional | Description | +|-----------|-----------|-----------| +`field` | Required | The field containing the text to be trimmed. +`target_field` | Required | The field in which the trimmed text is stored. If not specified, then the field is updated in-place. +`ignore_missing` | Optional | Specifies whether the processor should ignore documents that do not contain the specified +field. If set to `true`, then the processor ignores missing values in the field and leaves the `target_field` unchanged. Default is `false`. +`description` | Optional | A brief description of the processor. +`if` | Optional | A condition for running the processor. +`ignore_failure` | Optional | Specifies whether the processor continues execution even if it encounters an error. If set to `true`, then failures are ignored. Default is `false`. +`on_failure` | Optional | A list of processors to run if the processor fails. +`tag` | Optional | An identifier tag for the processor. Useful for debugging in order to distinguish between processors of the same type. + +## Using the processor + +Follow these steps to use the processor in a pipeline. + +### Step 1: Create a pipeline + +The following query creates a pipeline named `trim_pipeline` that uses the `trim` processor to remove leading and trailing white space from the `raw_text` field and store the trimmed text in the `trimmed_text` field: + +```json +PUT _ingest/pipeline/trim_pipeline +{ + "description": "Trim leading and trailing white space", + "processors": [ + { + "trim": { + "field": "raw_text", + "target_field": "trimmed_text" + } + } + ] +} +``` +{% include copy-curl.html %} + +### Step 2 (Optional): Test the pipeline + +It is recommended that you test your pipeline before you ingest documents. +{: .tip} + +To test the pipeline, run the following query: + +```json +POST _ingest/pipeline/trim_pipeline/_simulate +{ + "docs": [ + { + "_source": { + "raw_text": " Hello, world! " + } + } + ] +} +``` +{% include copy-curl.html %} + +#### Response + +The following example response confirms that the pipeline is working as expected: + +```json +{ + "docs": [ + { + "doc": { + "_index": "_index", + "_id": "_id", + "_source": { + "raw_text": " Hello, world! ", + "trimmed_text": "Hello, world!" + }, + "_ingest": { + "timestamp": "2024-04-26T20:58:17.418006805Z" + } + } + } + ] +} +``` + +### Step 3: Ingest a document + +The following query ingests a document into an index named `testindex1`: + +```json +PUT testindex1/_doc/1?pipeline=trim_pipeline +{ + "message": " This is a test document. " +} +``` +{% include copy-curl.html %} + +#### Response + +The request indexes the document into the index `testindex1` and indexes all documents with the `raw_text` field, which is processed by the `trim_pipeline`, to populate the `trimmed_text` field, as shown in the following response: + +```json + "_index": "testindex1", + "_id": "1", + "_version": 68, + "result": "updated", + "_shards": { + "total": 2, + "successful": 1, + "failed": 0 + }, + "_seq_no": 70, + "_primary_term": 47 +} +``` +{% include copy-curl.html %} + +### Step 4 (Optional): Retrieve the document + +To retrieve the document, run the following query: + +```json +GET testindex1/_doc/1 +``` +{% include copy-curl.html %} + +The response includes the `trimmed_text` field with the leading and trailing white space removed: + +```json +{ + "_index": "testindex1", + "_id": "1", + "_version": 69, + "_seq_no": 71, + "_primary_term": 47, + "found": true, + "_source": { + "raw_text": " This is a test document. ", + "trimmed_text": "This is a test document." + } +} +``` diff --git a/_install-and-configure/configuring-opensearch/index-settings.md b/_install-and-configure/configuring-opensearch/index-settings.md index 4f37be728f..543fa92b0d 100644 --- a/_install-and-configure/configuring-opensearch/index-settings.md +++ b/_install-and-configure/configuring-opensearch/index-settings.md @@ -54,6 +54,7 @@ OpenSearch supports the following cluster-level index settings. All settings in - `cluster.remote_store.index.segment_metadata.retention.max_count` (Integer): Controls the minimum number of metadata files to keep in the segment repository on a remote store. A value below `1` disables the deletion of stale segment metadata files. Default is `10`. +- `cluster.remote_store.segment.transfer_timeout` (Time unit): Controls the maximum amount of time to wait for all new segments to update after refresh to the remote store. If the upload does not complete within a specified amount of time, it throws a `SegmentUploadFailedException` error. Default is `30m`. It has a minimum constraint of `10m`. ## Index-level index settings diff --git a/_ml-commons-plugin/cluster-settings.md b/_ml-commons-plugin/cluster-settings.md index c473af81a1..ebc9b92531 100644 --- a/_ml-commons-plugin/cluster-settings.md +++ b/_ml-commons-plugin/cluster-settings.md @@ -303,12 +303,12 @@ This setting automatically redeploys deployed or partially deployed models upon ### Setting ``` -plugins.ml_commons.model_auto_redeploy.enable: false +plugins.ml_commons.model_auto_redeploy.enable: true ``` ### Values -- Default value: false +- Default value: true - Valid values: `false`, `true` ## Set retires for auto redeploy diff --git a/_ml-commons-plugin/remote-models/blueprints.md b/_ml-commons-plugin/remote-models/blueprints.md index 0f65aee9da..254a21b068 100644 --- a/_ml-commons-plugin/remote-models/blueprints.md +++ b/_ml-commons-plugin/remote-models/blueprints.md @@ -55,41 +55,45 @@ As an ML developer, you can build connector blueprints for other platforms. Usin ## Configuration parameters -| Field | Data type | Is required | Description | -|:------------------------|:------------|:------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `name` | String | Yes | The name of the connector. | -| `description` | String | Yes | A description of the connector. | -| `version` | Integer | Yes | The version of the connector. | -| `protocol` | String | Yes | The protocol for the connection. For AWS services such as Amazon SageMaker and Amazon Bedrock, use `aws_sigv4`. For all other services, use `http`. | -| `parameters` | JSON object | Yes | The default connector parameters, including `endpoint` and `model`. Any parameters indicated in this field can be overridden by parameters specified in a predict request. | -| `credential` | JSON object | Yes | Defines any credential variables required to connect to your chosen endpoint. ML Commons uses **AES/GCM/NoPadding** symmetric encryption to encrypt your credentials. When the connection to the cluster first starts, OpenSearch creates a random 32-byte encryption key that persists in OpenSearch's system index. Therefore, you do not need to manually set the encryption key. | -| `actions` | JSON array | Yes | Defines what actions can run within the connector. If you're an administrator creating a connection, add the [blueprint]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/) for your desired connection. | -| `backend_roles` | JSON array | Yes | A list of OpenSearch backend roles. For more information about setting up backend roles, see [Assigning backend roles to users]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control#assigning-backend-roles-to-users). | -| `access_mode` | String | Yes | Sets the access mode for the model, either `public`, `restricted`, or `private`. Default is `private`. For more information about `access_mode`, see [Model groups]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control#model-groups). | -| `add_all_backend_roles` | Boolean | Yes | When set to `true`, adds all `backend_roles` to the access list, which only a user with admin permissions can adjust. When set to `false`, non-admins can add `backend_roles`. | -| `client_config` | JSON object | No | The client configuration object, which provides settings that control the behavior of the client connections used by the connector. These settings allow you to manage connection limits and timeouts, ensuring efficient and reliable communication. | +| Field | Data type | Is required | Description | +|:---|:---|:---|:---| +| `name` | String | Yes | The name of the connector. | +| `description` | String | Yes | A description of the connector. | +| `version` | Integer | Yes | The version of the connector. | +| `protocol` | String | Yes | The protocol for the connection. For AWS services such as Amazon SageMaker and Amazon Bedrock, use `aws_sigv4`. For all other services, use `http`. | +| `parameters` | JSON object | Yes | The default connector parameters, including `endpoint` and `model`. Any parameters indicated in this field can be overridden by parameters specified in a predict request. | +| `credential` | JSON object | Yes | Defines any credential variables required to connect to your chosen endpoint. ML Commons uses **AES/GCM/NoPadding** symmetric encryption to encrypt your credentials. When the connection to the cluster first starts, OpenSearch creates a random 32-byte encryption key that persists in OpenSearch's system index. Therefore, you do not need to manually set the encryption key. | +| `actions` | JSON array | Yes | Defines what actions can run within the connector. If you're an administrator creating a connection, add the [blueprint]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/) for your desired connection. | +| `backend_roles` | JSON array | Yes | A list of OpenSearch backend roles. For more information about setting up backend roles, see [Assigning backend roles to users]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control#assigning-backend-roles-to-users). | +| `access_mode` | String | Yes | Sets the access mode for the model, either `public`, `restricted`, or `private`. Default is `private`. For more information about `access_mode`, see [Model groups]({{site.url}}{{site.baseurl}}/ml-commons-plugin/model-access-control#model-groups). | +| `add_all_backend_roles` | Boolean | Yes | When set to `true`, adds all `backend_roles` to the access list, which only a user with admin permissions can adjust. When set to `false`, non-admins can add `backend_roles`. | +| `client_config` | JSON object | No | The client configuration object, which provides settings that control the behavior of the client connections used by the connector. These settings allow you to manage connection limits and timeouts, ensuring efficient and reliable communication. | The `actions` parameter supports the following options. -| Field | Data type | Description | -|:------------------------|:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `action_type` | String | Required. Sets the ML Commons API operation to use upon connection. As of OpenSearch 2.9, only `predict` is supported. | -| `method` | String | Required. Defines the HTTP method for the API call. Supports `POST` and `GET`. | -| `url` | String | Required. Sets the connection endpoint at which the action occurs. This must match the regex expression for the connection used when [adding trusted endpoints]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index#adding-trusted-endpoints). | -| `headers` | JSON object | Sets the headers used inside the request or response body. Default is `ContentType: application/json`. If your third-party ML tool requires access control, define the required `credential` parameters in the `headers` parameter. | -| `request_body` | String | Required. Sets the parameters contained in the request body of the action. The parameters must include `\"inputText\`, which specifies how users of the connector should construct the request payload for the `action_type`. | -| `pre_process_function` | String | Optional. A built-in or custom Painless script used to preprocess the input data. OpenSearch provides the following built-in preprocess functions that you can call directly:
- `connector.pre_process.cohere.embedding` for [Cohere](https://cohere.com/) embedding models
- `connector.pre_process.openai.embedding` for [OpenAI](https://platform.openai.com/docs/guides/embeddings) embedding models
- `connector.pre_process.default.embedding`, which you can use to preprocess documents in neural search requests so that they are in the format that ML Commons can process with the default preprocessor (OpenSearch 2.11 or later). For more information, see [Built-in functions](#built-in-pre--and-post-processing-functions). | -| `post_process_function` | String | Optional. A built-in or custom Painless script used to post-process the model output data. OpenSearch provides the following built-in post-process functions that you can call directly:
- `connector.pre_process.cohere.embedding` for [Cohere text embedding models](https://docs.cohere.com/reference/embed)
- `connector.pre_process.openai.embedding` for [OpenAI text embedding models](https://platform.openai.com/docs/api-reference/embeddings)
- `connector.post_process.default.embedding`, which you can use to post-process documents in the model response so that they are in the format that neural search expects (OpenSearch 2.11 or later). For more information, see [Built-in functions](#built-in-pre--and-post-processing-functions). | +| Field | Data type | Description | +|:---|:---|:---| +| `action_type` | String | Required. Sets the ML Commons API operation to use upon connection. As of OpenSearch 2.9, only `predict` is supported. | +| `method` | String | Required. Defines the HTTP method for the API call. Supports `POST` and `GET`. | +| `url` | String | Required. Sets the connection endpoint at which the action occurs. This must match the regex expression for the connection used when [adding trusted endpoints]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/index#adding-trusted-endpoints). | +| `headers` | JSON object | Sets the headers used inside the request or response body. Default is `ContentType: application/json`. If your third-party ML tool requires access control, define the required `credential` parameters in the `headers` parameter. | +| `request_body` | String | Required. Sets the parameters contained in the request body of the action. The parameters must include `\"inputText\`, which specifies how users of the connector should construct the request payload for the `action_type`. | +| `pre_process_function` | String | Optional. A built-in or custom Painless script used to preprocess the input data. OpenSearch provides the following built-in preprocess functions that you can call directly:
- `connector.pre_process.cohere.embedding` for [Cohere](https://cohere.com/) embedding models
- `connector.pre_process.openai.embedding` for [OpenAI](https://platform.openai.com/docs/guides/embeddings) embedding models
- `connector.pre_process.default.embedding`, which you can use to preprocess documents in neural search requests so that they are in the format that ML Commons can process with the default preprocessor (OpenSearch 2.11 or later). For more information, see [Built-in functions](#built-in-pre--and-post-processing-functions). | +| `post_process_function` | String | Optional. A built-in or custom Painless script used to post-process the model output data. OpenSearch provides the following built-in post-process functions that you can call directly:
- `connector.pre_process.cohere.embedding` for [Cohere text embedding models](https://docs.cohere.com/reference/embed)
- `connector.pre_process.openai.embedding` for [OpenAI text embedding models](https://platform.openai.com/docs/api-reference/embeddings)
- `connector.post_process.default.embedding`, which you can use to post-process documents in the model response so that they are in the format that neural search expects (OpenSearch 2.11 or later). For more information, see [Built-in functions](#built-in-pre--and-post-processing-functions). | The `client_config` parameter supports the following options. -| Field | Data type | Description | -|:---------------------|:----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| `max_connection` | Integer | The maximum number of concurrent connections that the client can establish with the server. | -| `connection_timeout` | Integer | The maximum amount of time (in seconds) that the client will wait while trying to establish a connection to the server. A timeout prevents the client from waiting indefinitely and allows it to recover from unreachable network endpoints. | -| `read_timeout` | Integer | The maximum amount of time (in seconds) that the client will wait for a response from the server after sending a request. Useful when the server is slow to respond or encounters issues while processing a request. | +| Field | Data type | Description | +|:---|:---|:---| +| `max_connection` | Integer | The maximum number of concurrent connections that the client can establish to the server. Some remote services, like SageMaker, constrain the maximum number of concurrent connections and throw a throttling exception if the number of concurrent connections exceeds the threshold. The maximum number of concurrent OpenSearch connections is `max_connection`*`node_number_for_connector`. To mitigate this issue, try to decrease the value of this parameter and modify the retry settings in `client_config`. Default is `30`. | +| `connection_timeout` | Integer | The maximum amount of time (in seconds) that the client will wait while trying to establish a connection to the server. A timeout prevents the client from waiting indefinitely and allows the client to recover when it encounters unreachable network endpoints. | +| `read_timeout` | Integer | The maximum amount of time (in seconds) that the client will wait for a response from the server after sending a request. This is useful when the server is slow to respond or encounters an issue while processing a request. | +| `retry_backoff_policy` | String | The backoff policy for retries to the remote connector. This is useful when there is spike in traffic causing throttling exceptions. Supported policies are `constant`, `exponential_equal_jitter`, and `exponential_full_jitter`. Default is `constant`. | +| `max_retry_times` | Integer | The maximum number of times that a single remote inference request will be retried. This is useful when there is a spike in traffic causing throttling exceptions. When set to `0`, retrying is disabled. When set to `-1`, OpenSearch does not limit the number of `retry_times`. Setting this to a positive integer specifies the maximum number of retry attempts. Default is `0`. | +| `retry_backoff_millis` | Integer | The base backoff time in milliseconds for retry policy. The suspend time during two retries is determined by this parameter and `retry_backoff_policy`. Default is `200`. | +| `retry_timeout_seconds` | Integer | The timeout value, in seconds, for the retry. If the retry can not succeed within the specified amount of time, the connector will stop retrying and throw an exception. Default is `30`. | ## Built-in pre- and post-processing functions diff --git a/_observing-your-data/event-analytics.md b/_observing-your-data/event-analytics.md index b8fe72964c..a41d55c2ff 100644 --- a/_observing-your-data/event-analytics.md +++ b/_observing-your-data/event-analytics.md @@ -3,7 +3,7 @@ layout: default title: Event analytics nav_order: 20 redirect_from: - - /observing-your-data/event-analytics/ + - /observability-plugin/event-analytics/ --- # Event analytics diff --git a/_observing-your-data/log-analytics.md b/_observing-your-data/log-analytics.md deleted file mode 100644 index e69de29bb2..0000000000 diff --git a/_query-dsl/compound/index.md b/_query-dsl/compound/index.md index d1ad1e26ef..d1051bafa3 100644 --- a/_query-dsl/compound/index.md +++ b/_query-dsl/compound/index.md @@ -5,8 +5,10 @@ has_children: true has_toc: false nav_order: 40 redirect_from: + - /opensearch/query-dsl/compound/index/ - /query-dsl/compound/index/ - /query-dsl/query-dsl/compound/ + - /query-dsl/compound/ --- # Compound queries diff --git a/_query-dsl/geo-and-xy/index.md b/_query-dsl/geo-and-xy/index.md index a1d76f0477..44e2df9b49 100644 --- a/_query-dsl/geo-and-xy/index.md +++ b/_query-dsl/geo-and-xy/index.md @@ -7,6 +7,7 @@ redirect_from: - /opensearch/query-dsl/geo-and-xy/index/ - /query-dsl/query-dsl/geo-and-xy/ - /query-dsl/query-dsl/geo-and-xy/index/ + - /query-dsl/geo-and-xy/ --- # Geographic and xy queries diff --git a/_query-dsl/query-filter-context.md b/_query-dsl/query-filter-context.md index 6985822e03..64d45b8d7b 100644 --- a/_query-dsl/query-filter-context.md +++ b/_query-dsl/query-filter-context.md @@ -3,7 +3,8 @@ layout: default title: Query and filter context nav_order: 5 redirect_from: -- /query-dsl/query-dsl/query-filter-context/ + - /opensearch/query-dsl/query-filter-context/ + - /query-dsl/query-dsl/query-filter-context/ --- # Query and filter context diff --git a/_query-dsl/term-vs-full-text.md b/_query-dsl/term-vs-full-text.md index e5019c4eb2..61d57c67f8 100644 --- a/_query-dsl/term-vs-full-text.md +++ b/_query-dsl/term-vs-full-text.md @@ -3,7 +3,8 @@ layout: default title: Term-level and full-text queries compared nav_order: 10 redirect_from: -- /query-dsl/query-dsl/term-vs-full-text + - /query-dsl/query-dsl/term-vs-full-text/ + - /opensearch/query-dsl/term-vs-full-text/ --- # Term-level and full-text queries compared diff --git a/_query-dsl/term/index.md b/_query-dsl/term/index.md index 594ece3fb5..4a789b0b72 100644 --- a/_query-dsl/term/index.md +++ b/_query-dsl/term/index.md @@ -4,6 +4,9 @@ title: Term-level queries has_children: true has_toc: false nav_order: 20 +redirect_from: + - /opensearch/query-dsl/term/ + - /query-dsl/term/ --- # Term-level queries diff --git a/_search-plugins/neural-sparse-search.md b/_search-plugins/neural-sparse-search.md index fd86b3f6b0..e22c74596f 100644 --- a/_search-plugins/neural-sparse-search.md +++ b/_search-plugins/neural-sparse-search.md @@ -4,6 +4,7 @@ title: Neural sparse search nav_order: 50 has_children: false redirect_from: + - /search-plugins/neural-sparse-search/ - /search-plugins/sparse-search/ --- @@ -390,4 +391,28 @@ The response contains both documents: ## Next steps -- To learn more about splitting long text into passages for neural search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). \ No newline at end of file +- To learn more about splitting long text into passages for neural search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). + +## FAQ + +Refer to the following frequently asked questions for more information about neural sparse search. + +### How do I mitigate remote connector throttling exceptions? + +When using connectors to call a remote service like SageMaker, ingestion and search calls sometimes fail due to remote connector throttling exceptions. + +To mitigate throttling exceptions, modify the connector's [`client_config`]({{site.url}}{{site.baseurl}}/ml-commons-plugin/remote-models/blueprints/#configuration-parameters) parameter to decrease the number of maximum connections, using the `max_connection` setting to prevent the maximum number of concurrent connections from exceeding the threshold of the remote service. You can also modify the retry settings to flatten the request spike during ingestion. + +For versions earlier than OpenSearch 2.15, the SageMaker throttling exception will be thrown as the following "error": + +``` + { + "type": "status_exception", + "reason": "Error from remote service: {\"message\":null}" + } +``` + + +## Next steps + +- To learn more about splitting long text into passages for neural search, see [Text chunking]({{site.url}}{{site.baseurl}}/search-plugins/text-chunking/). diff --git a/_search-plugins/sql/identifiers.md b/_search-plugins/sql/identifiers.md index dc9c487c0f..214b2e53cb 100644 --- a/_search-plugins/sql/identifiers.md +++ b/_search-plugins/sql/identifiers.md @@ -4,6 +4,7 @@ title: Identifiers parent: SQL and PPL nav_order: 6 redirect_from: + - /observability-plugin/ppl/identifiers/ - /search-plugins/ppl/identifiers/ --- diff --git a/_search-plugins/sql/ppl/functions.md b/_search-plugins/sql/ppl/functions.md index 7ba18c29a9..275030f723 100644 --- a/_search-plugins/sql/ppl/functions.md +++ b/_search-plugins/sql/ppl/functions.md @@ -5,7 +5,8 @@ parent: PPL grand_parent: SQL and PPL nav_order: 2 redirect_from: - - /search-plugins/ppl/commands/ + - /observability-plugin/ppl/commands/ + - /search-plugins/ppl/commands/ --- # Commands diff --git a/_search-plugins/sql/ppl/syntax.md b/_search-plugins/sql/ppl/syntax.md index 3042902eb4..45eeb3aed2 100644 --- a/_search-plugins/sql/ppl/syntax.md +++ b/_search-plugins/sql/ppl/syntax.md @@ -1,71 +1,71 @@ ---- -layout: default -title: Syntax -parent: PPL -grand_parent: SQL and PPL -nav_order: 1 ---- - -# PPL syntax - -Every PPL query starts with the `search` command. It specifies the index to search and retrieve documents from. Subsequent commands can follow in any order. - -Currently, `PPL` supports only one `search` command, which can be omitted to simplify the query. -{ : .note} - -## Syntax - -```sql -search source= [boolean-expression] -source= [boolean-expression] -``` - -Field | Description | Required -:--- | :--- |:--- -`search` | Specifies search keywords. | Yes -`index` | Specifies which index to query from. | No -`bool-expression` | Specifies an expression that evaluates to a Boolean value. | No - -## Examples - -**Example 1: Search through accounts index** - -In the following example, the `search` command refers to an `accounts` index as the source and uses `fields` and `where` commands for the conditions: - -```sql -search source=accounts -| where age > 18 -| fields firstname, lastname -``` - -In the following examples, angle brackets `< >` enclose required arguments and square brackets `[ ]` enclose optional arguments. -{: .note } - - -**Example 2: Get all documents** - -To get all documents from the `accounts` index, specify it as the `source`: - -```sql -search source=accounts; -``` - -| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | -:--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- -| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke -| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond -| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates -| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams - -**Example 3: Get documents that match a condition** - -To get all documents from the `accounts` index that either have `account_number` equal to 1 or have `gender` as `F`, use the following query: - -```sql -search source=accounts account_number=1 or gender=\"F\"; -``` - -| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | -:--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- -| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | -| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | +--- +layout: default +title: Syntax +parent: PPL +grand_parent: SQL and PPL +nav_order: 1 +--- + +# PPL syntax + +Every PPL query starts with the `search` command. It specifies the index to search and retrieve documents from. Subsequent commands can follow in any order. + +Currently, `PPL` supports only one `search` command, which can be omitted to simplify the query. +{ : .note} + +## Syntax + +```sql +search source= [boolean-expression] +source= [boolean-expression] +``` + +Field | Description | Required +:--- | :--- |:--- +`search` | Specifies search keywords. | Yes +`index` | Specifies which index to query from. | No +`bool-expression` | Specifies an expression that evaluates to a Boolean value. | No + +## Examples + +**Example 1: Search through accounts index** + +In the following example, the `search` command refers to an `accounts` index as the source and uses `fields` and `where` commands for the conditions: + +```sql +search source=accounts +| where age > 18 +| fields firstname, lastname +``` + +In the following examples, angle brackets `< >` enclose required arguments and square brackets `[ ]` enclose optional arguments. +{: .note } + + +**Example 2: Get all documents** + +To get all documents from the `accounts` index, specify it as the `source`: + +```sql +search source=accounts; +``` + +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +:--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke +| 6 | Hattie | 671 Bristol Street | 5686 | M | Dante | Netagy | TN | 36 | hattiebond@netagy.com | Bond +| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates +| 18 | Dale | 467 Hutchinson Court | 4180 | M | Orick | null | MD | 33 | daleadams@boink.com | Adams + +**Example 3: Get documents that match a condition** + +To get all documents from the `accounts` index that either have `account_number` equal to 1 or have `gender` as `F`, use the following query: + +```sql +search source=accounts account_number=1 or gender=\"F\"; +``` + +| account_number | firstname | address | balance | gender | city | employer | state | age | email | lastname | +:--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- | :--- +| 1 | Amber | 880 Holmes Lane | 39225 | M | Brogan | Pyrami | IL | 32 | amberduke@pyrami.com | Duke | +| 13 | Nanette | 789 Madison Street | 32838 | F | Nogal | Quility | VA | 28 | null | Bates | diff --git a/_search-plugins/sql/sql/basic.md b/_search-plugins/sql/sql/basic.md index 8878f3bb27..fca14ae870 100644 --- a/_search-plugins/sql/sql/basic.md +++ b/_search-plugins/sql/sql/basic.md @@ -4,7 +4,7 @@ title: Basic Queries parent: SQL grand_parent: SQL and PPL nav_order: 5 -Redirect_from: +redirect_from: - /search-plugins/sql/basic/ --- diff --git a/_search-plugins/sql/sql/complex.md b/_search-plugins/sql/sql/complex.md index c70b965870..ecd52d19c2 100644 --- a/_search-plugins/sql/sql/complex.md +++ b/_search-plugins/sql/sql/complex.md @@ -4,7 +4,7 @@ title: Complex Queries parent: SQL grand_parent: SQL and PPL nav_order: 6 -Redirect_from: +redirect_from: - /search-plugins/sql/complex/ --- diff --git a/_search-plugins/sql/sql/delete.md b/_search-plugins/sql/sql/delete.md index 41cb6705af..eef810e7f5 100644 --- a/_search-plugins/sql/sql/delete.md +++ b/_search-plugins/sql/sql/delete.md @@ -4,7 +4,7 @@ title: Delete parent: SQL grand_parent: SQL and PPL nav_order: 12 -Redirect_from: +redirect_from: - /search-plugins/sql/delete/ --- diff --git a/_search-plugins/sql/sql/functions.md b/_search-plugins/sql/sql/functions.md index 03b808243c..19d29a9143 100755 --- a/_search-plugins/sql/sql/functions.md +++ b/_search-plugins/sql/sql/functions.md @@ -4,8 +4,9 @@ title: Functions parent: SQL grand_parent: SQL and PPL nav_order: 7 -Redirect_from: - - /search-plugins/sql/functions/ +redirect_from: + - /search-plugins/ppl/commands/ + - /observability-plugin/ppl/commands/ --- # Functions diff --git a/_search-plugins/sql/sql/index.md b/_search-plugins/sql/sql/index.md index 7035b6d664..6b2cee24da 100644 --- a/_search-plugins/sql/sql/index.md +++ b/_search-plugins/sql/sql/index.md @@ -7,7 +7,6 @@ has_children: true has_toc: false redirect_from: - /search-plugins/sql/sql/index/ - --- # SQL diff --git a/_security/access-control/api.md b/_security/access-control/api.md index 9473c16c20..63717d621a 100644 --- a/_security/access-control/api.md +++ b/_security/access-control/api.md @@ -1011,6 +1011,98 @@ PATCH _plugins/_security/api/rolesmapping } ``` +--- + +## Allowlist + +### Get allowlist + +Retrieves the current `allowlist` configuration. + +#### Request + +```json +GET _plugins/_security/api/allowlist +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "config" : { + "enabled" : true, + "requests" : { + "/_cat/nodes" : [ + "GET" + ], + "/_cat/indices" : [ + "GET" + ], + "/_plugins/_security/whoami" : [ + "GET" + ] + } + } +} +``` + +### Create allowlist + +Creates an `allowlist` configuration. + +#### Request + +```json +PUT _plugins/_security/api/allowlist +{ + "enabled": true, + "requests": { + "/_cat/nodes": ["GET"], + "/_cat/indices": ["GET"], + "/_plugins/_security/whoami": ["GET"] + } +} +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "status":"OK", + "message":"'config' updated." +} +``` + +### Update allowlist + +Updates an `allowlist` configuration. + +#### Request + +```json +PATCH _plugins/_security/api/allowlist +[ + { + "op": "add", + "path": "/config/requests", + "value": { + "/_cat/nodes": ["POST"] + } + } +] +``` +{% include copy-curl.html %} + +#### Example response + +```json +{ + "status":"OK", + "message":"Resource updated." +} +``` --- diff --git a/_security/multi-tenancy/tenant-index.md b/_security/multi-tenancy/tenant-index.md index d4e13ad193..0ed9123ce0 100644 --- a/_security/multi-tenancy/tenant-index.md +++ b/_security/multi-tenancy/tenant-index.md @@ -6,6 +6,7 @@ has_children: true has_toc: false redirect_from: - /security/multi-tenancy/ + - /security-plugin/access-control/multi-tenancy/ --- # OpenSearch Dashboards multi-tenancy diff --git a/_tools/grafana.md b/_tools/grafana.md index 16a899d82e..8f38054303 100644 --- a/_tools/grafana.md +++ b/_tools/grafana.md @@ -3,6 +3,8 @@ layout: default title: Grafana nav_order: 200 has_children: false +redirect_from: + - /clients/grafana/ --- # Grafana support diff --git a/_tools/k8s-operator.md b/_tools/k8s-operator.md index 403aabd631..7ee1c1adee 100644 --- a/_tools/k8s-operator.md +++ b/_tools/k8s-operator.md @@ -3,6 +3,8 @@ layout: default title: OpenSearch Kubernetes Operator nav_order: 80 has_children: false +redirect_from: + - /clients/k8s-operator/ --- The OpenSearch Kubernetes Operator is an open-source kubernetes operator that helps automate the deployment and provisioning of OpenSearch and OpenSearch Dashboards in a containerized environment. The operator can manage multiple OpenSearch clusters that can be scaled up and down depending on your needs. diff --git a/_tools/logstash/read-from-opensearch.md b/_tools/logstash/read-from-opensearch.md index 84d44951e5..53024c233b 100644 --- a/_tools/logstash/read-from-opensearch.md +++ b/_tools/logstash/read-from-opensearch.md @@ -4,7 +4,8 @@ title: Read from OpenSearch parent: Logstash nav_order: 220 redirect_from: - - /clients/logstash/ship-to-opensearch/ + - /clients/logstash/read-from-opensearch/ + - /clients/logstash/ship-to-opensearch/ --- # Read from OpenSearch diff --git a/_tuning-your-cluster/availability-and-recovery/snapshots/snapshot-restore.md b/_tuning-your-cluster/availability-and-recovery/snapshots/snapshot-restore.md index 91d0baca92..f35115c95f 100644 --- a/_tuning-your-cluster/availability-and-recovery/snapshots/snapshot-restore.md +++ b/_tuning-your-cluster/availability-and-recovery/snapshots/snapshot-restore.md @@ -207,7 +207,7 @@ You will most likely not need to specify any parameters except for `location`. F You will most likely not need to specify any parameters except for `bucket` and `base_path`. For allowed request parameters, see [Register or update snapshot repository API](https://opensearch.org/docs/latest/api-reference/snapshots/create-repository/). -### Registering an Azure storage account +### Registering a Microsoft Azure storage account using Helm Use the following steps to register a snapshot repository backed by an Azure storage account for an OpenSearch cluster deployed using Helm. @@ -296,6 +296,56 @@ Use the following steps to register a snapshot repository backed by an Azure sto } ``` +### Set up Microsoft Azure Blob Storage + +To use Azure Blob Storage as a snapshot repository, follow these steps: +1. Install the `repository-azure` plugin on all nodes with the following command: + + ```bash + ./bin/opensearch-plugin install repository-azure + ``` + +1. After the `repository-azure` plugin is installed, define your Azure Blob Storage settings before initializing the node. Start by defining your Azure Storage account name using the following secure setting: + + ```bash + ./bin/opensearch-keystore add azure.client.default.account + ``` + +Choose one of the following options for setting up your Azure Blob Storage authentication credentials. + +#### Using an Azure Storage account key + +Use the following setting to specify your Azure Storage account key: + +```bash +./bin/opensearch-keystore add azure.client.default.key +``` + +#### Shared access signature + +Use the following setting when accessing Azure with a shared access signature (SAS): + +```bash +./bin/opensearch-keystore add azure.client.default.sas_token +``` + +#### Azure token credential + +Starting in OpenSearch 2.15, you have the option to configure a token credential authentication flow in `opensearch.yml`. This method is distinct from connection string authentication, which requires a SAS or an account key. + +If you choose to use token credential authentication, you will need to choose a token credential type. Although Azure offers multiple token credential types, as of OpenSearch version 2.15, only [managed identity](https://learn.microsoft.com/en-us/entra/identity/managed-identities-azure-resources/overview) is supported. + +To use managed identity, add your token credential type to `opensearch.yml` using either the `managed` or `managed_identity` value. This indicates that managed identity is being used to perform token credential authentication: + +```yml +azure.client.default.token_credential_type: "managed_identity" +``` + +Note the following when using Azure token credentials: + +- Token credential support is disabled in `opensearch.yml` by default. +- A token credential takes precedence over an Azure Storage account key or a SAS when multiple options are configured. + ## Take snapshots You specify two pieces of information when you create a snapshot: @@ -447,7 +497,9 @@ We recommend ceasing write requests to a cluster before restoring from a snapsho 1. A write request to the now-deleted alias creates a new index with the same name as the alias. 1. The alias from the snapshot fails to restore due to a naming conflict with the new index. -Snapshots are only forward-compatible by one major version. If you have an old snapshot, you can sometimes restore it into an intermediate cluster, reindex all indexes, take a new snapshot, and repeat until you arrive at your desired version, but you might find it easier to just manually index your data in the new cluster. +Snapshots are only forward compatible by one major version. Snapshots taken by earlier OpenSearch versions can continue to be restored by the version of OpenSearch that originally took the snapshot, even after a version upgrade. For example, a snapshot taken by OpenSearch 2.11 or earlier can continue to be restored by a 2.11 cluster even after upgrading to 2.12. + +If you have an old snapshot taken from an earlier major OpenSearch version, you can restore it to an intermediate cluster one major version newer than the snapshot's version, reindex all indexes, take a new snapshot, and repeat until you arrive at your desired major version, but you may find it easier to manually index your data in the new cluster. ## Security considerations diff --git a/images/dashboards/mds_feature_anywhere_create_alerting.gif b/images/dashboards/mds_feature_anywhere_create_alerting.gif new file mode 100644 index 0000000000..712cace8bf Binary files /dev/null and b/images/dashboards/mds_feature_anywhere_create_alerting.gif differ diff --git a/images/dashboards/mds_feature_anywhere_view_alerting.gif b/images/dashboards/mds_feature_anywhere_view_alerting.gif new file mode 100644 index 0000000000..ff840cfad4 Binary files /dev/null and b/images/dashboards/mds_feature_anywhere_view_alerting.gif differ diff --git a/images/dashboards/mds_monitor_view.gif b/images/dashboards/mds_monitor_view.gif new file mode 100644 index 0000000000..9ada1147f5 Binary files /dev/null and b/images/dashboards/mds_monitor_view.gif differ diff --git a/images/dashboards/mds_sa_detection_rules_create.gif b/images/dashboards/mds_sa_detection_rules_create.gif new file mode 100644 index 0000000000..50fc77b8d6 Binary files /dev/null and b/images/dashboards/mds_sa_detection_rules_create.gif differ diff --git a/images/dashboards/mds_sa_detection_rules_view.gif b/images/dashboards/mds_sa_detection_rules_view.gif new file mode 100644 index 0000000000..31508f10de Binary files /dev/null and b/images/dashboards/mds_sa_detection_rules_view.gif differ