From a62f180659730d4693d2ed3f9c7bf5bcda5dc5b0 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 26 Sep 2024 07:18:55 +0200 Subject: [PATCH 1/2] Sync docs from Discourse (#451) Sync charm docs from https://discourse.charmhub.io Co-authored-by: a-velasco --- docs/how-to/h-attached-storage.md | 234 +++++++++++++++++++++ docs/how-to/h-create-backup.md | 4 +- docs/how-to/h-deploy-lxd.md | 4 +- docs/how-to/h-horizontal-scaling.md | 22 +- docs/how-to/h-large-deployment.md | 267 ++++++++++++++++++++++++ docs/how-to/h-load-testing.md | 2 +- docs/overview.md | 75 ++++--- docs/reference/r-system-requirements.md | 4 +- docs/revision-168.md | 105 ++++++++++ docs/tutorial/t-deploy-opensearch.md | 27 ++- docs/tutorial/t-enable-tls.md | 40 ++-- docs/tutorial/t-horizontal-scaling.md | 199 ++++++++---------- docs/tutorial/t-integrate.md | 134 ++++++------ docs/tutorial/t-set-up.md | 2 +- 14 files changed, 872 insertions(+), 247 deletions(-) create mode 100644 docs/how-to/h-attached-storage.md create mode 100644 docs/how-to/h-large-deployment.md create mode 100644 docs/revision-168.md diff --git a/docs/how-to/h-attached-storage.md b/docs/how-to/h-attached-storage.md new file mode 100644 index 000000000..f5f41ac9c --- /dev/null +++ b/docs/how-to/h-attached-storage.md @@ -0,0 +1,234 @@ +# How to recover from attached storage + +This document describes the steps needed to reuse disks that contain data and metadata of an OpenSearch cluster. + +[note] This document's steps can only be applied for disks under Juju management. It is not currently supported to bring external disks or volumes into Juju. [/note] + +## Summary + - [Introduction](#introduction) + - [Pre-requisites](#pre-requisites) + - [Re-using Disks Use-Cases](#re-using-disks-use-cases) + - [Same Cluster Scenario](#same-cluster-scenario) + - [Different Cluster Scenarios](#different-cluster-scenarios) + - [Reusing a disk in a different cluster](#reusing-a-disk-in-a-different-cluster) + - [Bootstrapping from a *used disk*](#bootstrapping-from-a-used-disk) + - [Dangling Indices](#dangling-indices) + +--- + +[note type="caution"] **Make sure you have safely backed up your data, the steps described here may potentially cause data loss** [/note] + +## Introduction + +This document will describe the different steps needed to bring disks that have previously being used by OpenSearch, and hence still hold data and metadata of that cluster; and how to reuse these disks. These disks will be named across this document as *used disks*. + +The document is intended for cases where a quick recovery is needed. However, it is important to understand that reusing disks may cause older data to override existing / newer data. Make sure the disks and their content are known before proceeding with any of the steps described below. + +### Pre-requisites + +Before starting, make sure that the disks are visible within Juju. For the reminder of the document, the following deployment will be used as example: + +``` +$ juju status +Model Controller Cloud/Region Version SLA Timestamp +opensearch localhost-localhost localhost/localhost 3.5.3 unsupported 16:46:04Z + +App Version Status Scale Charm Channel Rev Exposed Message +opensearch active 3 opensearch 2/edge 164 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + +Unit Workload Agent Machine Public address Ports Message +opensearch/0* active idle 1 10.81.173.18 9200/tcp +opensearch/1 active idle 2 10.81.173.167 9200/tcp +opensearch/2 active idle 3 10.81.173.48 9200/tcp +self-signed-certificates/0* active idle 0 10.81.173.30 + +Machine State Address Inst id Base AZ Message +0 started 10.81.173.30 juju-e7601c-0 ubuntu@22.04 Running +1 started 10.81.173.18 juju-e7601c-1 ubuntu@22.04 Running +2 started 10.81.173.167 juju-e7601c-2 ubuntu@22.04 Running +3 started 10.81.173.48 juju-e7601c-3 ubuntu@22.04 Running +``` + +Volumes can be listed with: + +``` +$ juju storage +Unit Storage ID Type Pool Size Status Message +opensearch/0 opensearch-data/0 filesystem opensearch-pool 2.0 GiB attached +opensearch/1 opensearch-data/1 filesystem opensearch-pool 2.0 GiB attached +opensearch/2 opensearch-data/2 filesystem opensearch-pool 2.0 GiB attached +``` + +For more details, [refer to Juju storage management documentation](https://juju.is/docs/juju/manage-storage). + + +## Re-using Disks Use-Cases + +OpenSearch does have a set of APIs and mechanisms to detect the existence of previous data on a given node and how to interact with that data. Most notable mechanisms are: (i) the `/_dangling` API, [as described in the upstream docs](https://opensearch.org/docs/latest/api-reference/index-apis/dangling-index/); and (ii) the `opensearch-node` CLI that allows operators to clean up portions of the metadata in the *used disk* before re-attaching to the cluster. + +The cases can be broken down into two groups: reusing disks from older nodes but the **same cluster** or reusing disks from **another cluster**. They will be named *same cluster* and *other cluster* scenarios. + +The following scenarios will be considered: + +1) Same cluster: reusing disks from another node +2) Other cluster: bootstrapping a new cluster with an used disk +3) Other cluster: attaching an used disk from another cluster to an existing cluster + +The main concern in these cases is the management of the cluster metadata and the status of the previous indices. + +### Same Cluster Scenario + +We can check which volumes are currently available to be reattached: +``` +$ juju storage +Unit Storage ID Type Pool Size Status Message + opensearch-data/0 filesystem opensearch-pool 2.0 GiB detached +opensearch/1 opensearch-data/1 filesystem opensearch-pool 2.0 GiB attached +opensearch/2 opensearch-data/2 filesystem opensearch-pool 2.0 GiB attached +``` + +To reuse a given disk within the same cluster, it is enough to spin up a new node and attach that volume: + +``` +$ juju add-unit opensearch -n 1 --attach-storage opensearch-data/0 +``` + +The node will eventually come up: +``` +$ juju status +Model Controller Cloud/Region Version SLA Timestamp +opensearch localhost-localhost localhost/localhost 3.5.3 unsupported 16:51:39Z + +App Version Status Scale Charm Channel Rev Exposed Message +opensearch active 3 opensearch 2/edge 164 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + +Unit Workload Agent Machine Public address Ports Message +opensearch/1* active idle 2 10.81.173.167 9200/tcp +opensearch/2 active idle 3 10.81.173.48 9200/tcp +opensearch/3 active idle 4 10.81.173.102 9200/tcp +self-signed-certificates/0* active idle 0 10.81.173.30 + +Machine State Address Inst id Base AZ Message +0 started 10.81.173.30 juju-e7601c-0 ubuntu@22.04 Running +2 started 10.81.173.167 juju-e7601c-2 ubuntu@22.04 Running +3 started 10.81.173.48 juju-e7601c-3 ubuntu@22.04 Running +4 started 10.81.173.102 juju-e7601c-4 ubuntu@22.04 Running +``` + +The new node will have `opensearch-data/0` successfully attached: +``` +$ juju storage +Unit Storage ID Type Pool Size Status Message +opensearch/1 opensearch-data/1 filesystem opensearch-pool 2.0 GiB attached +opensearch/2 opensearch-data/2 filesystem opensearch-pool 2.0 GiB attached +opensearch/3 opensearch-data/0 filesystem opensearch-pool 2.0 GiB attached +``` + +Finally, the node will show up on the cluster status: +``` +$ curl -sk -u admin:$PASSWORD https://$IP:9200/_cat/nodes +10.81.173.102 15 98 16 3.75 5.59 4.99 dim cluster_manager,data,ingest,ml - opensearch-3.f1a +10.81.173.48 20 98 16 3.75 5.59 4.99 dim cluster_manager,data,ingest,ml - opensearch-2.f1a +10.81.173.167 29 98 16 3.75 5.59 4.99 dim cluster_manager,data,ingest,ml * opensearch-1.f1a +``` + +### Different Cluster Scenarios + +In these cases, the cluster has been removed and the application will be redeployed reusing these disks in part or in total. In all of the following cases, the `opensearch-node` CLI will be needed to clean up portions of the metadata. + +#### Reusing a disk in a *different cluster* + +To reuse a disk from another cluster, add a new unit with the *used disk*: +``` +$ juju add-unit opensearch --attach-storage opensearch-data/0 +``` + +The deployment of this node will eventually stop its normal process and will be unable to proceed. That happens because the new unit holds old metadata, with reference to the *old cluster UUID*. To resolve that, access the unit: +``` +$ juju ssh opensearch/0 +``` + +Checking the logs, it is possible to see the unit is waiting for the cluster to become available and intermittently listing its last-known peers that are now unreachable. The following message on the logs will show up: +``` +$ sudo journalctl -u snap.opensearch.daemon -f + +... + +Caused by: org.opensearch.cluster.coordination.CoordinationStateRejectedException: join validation on cluster state with a different cluster uuid K-LFo5AqQ--lamWCNc6ZsA than local cluster uuid gRaym5GmSUebyPO5o3Ay4w, rejecting +``` + +To remove the stale metadata, first, stop the service: +``` +$ sudo systemctl stop snap.opensearch.daemon +``` + +Then, execute detach the node from its old references: +``` +$ sudo -u snap_daemon \ + OPENSEARCH_JAVA_HOME=/snap/opensearch/current/usr/lib/jvm/java-21-openjdk-amd64 \ + OPENSEARCH_PATH_CONF=/var/snap/opensearch/current/etc/opensearch \ + OPENSEARCH_HOME=/var/snap/opensearch/current/usr/share/opensearch \ + OPENSEARCH_LIB=/var/snap/opensearch/current/usr/share/opensearch/lib \ + OPENSEARCH_PATH_CERTS=/var/snap/opensearch/current/etc/opensearch/certificates \ + /snap/opensearch/current/usr/share/opensearch/bin/opensearch-node detach-cluster +``` + +Restart the service: +``` +$ sudo systemctl start snap.opensearch.daemon +``` + +The cluster will eventually add the new node. + +#### Bootstrapping from a *used disk* + +To create a new cluster reusing one of the disks, first deploy a new OpenSearch cluster with one of the attached volumes: +``` +$ juju deploy opensearch -n1 --attach-storage opensearch-data/XXX +``` + +The deployment will eventually stop its normal process and will be unable to proceed. That happens because the cluster is currently loading its original metadata and cannot reach out to any of its peers. To resolve that, access the unit: +``` +$ juju ssh opensearch/0 +``` +Checking the logs, it is possible to see the unit is waiting for the cluster to become available and intermittently listing its last-known peers that are now unreachable. The following message on the logs will show up: +``` +$ sudo journalctl -u snap.opensearch.daemon -f + +... + +Sep 09 10:33:55 juju-05dbd1-4 opensearch.daemon[8573]: [2024-09-09T10:33:55,415][INFO ][o.o.s.c.ConfigurationRepository] [opensearch-3.bf4] Wait for cluster to be available ... +``` + +To remove the stale metadata, first, stop the service: +``` +$ sudo systemctl stop snap.opensearch.daemon +``` + +Then, execute the unsafe-bootstrap to remove the stale metadata: +``` +$ sudo -u snap_daemon \ + OPENSEARCH_JAVA_HOME=/snap/opensearch/current/usr/lib/jvm/java-21-openjdk-amd64 \ + OPENSEARCH_PATH_CONF=/var/snap/opensearch/current/etc/opensearch \ + OPENSEARCH_HOME=/var/snap/opensearch/current/usr/share/opensearch \ + OPENSEARCH_LIB=/var/snap/opensearch/current/usr/share/opensearch/lib \ + OPENSEARCH_PATH_CERTS=/var/snap/opensearch/current/etc/opensearch/certificates \ + /snap/opensearch/current/usr/share/opensearch/bin/opensearch-node unsafe-bootstrap +``` + +Restart the service: +``` +$ sudo systemctl start snap.opensearch.daemon +``` + +The cluster will be correctly form a new UUID. It is possible to also add more units, either fresh ones or even units detached from another cluster, as explained on the previous section. + + + +## Dangling Indices + +Now, the *used disk* is successfully mounted to the cluster. The next step is to check for indices that did not exist in the cluster. That can be done using the `/_dangling` API. To understand n more details how to list and recover dangling indices, refer to the [OpenSearch documentation on this API](https://opensearch.org/docs/latest/api-reference/index-apis/dangling-index/). + +[note type="caution"] **This API cannot offer any guarantees as to whether the imported data truly represents the latest state of the data when the index was still part of the cluster.** [/note] \ No newline at end of file diff --git a/docs/how-to/h-create-backup.md b/docs/how-to/h-create-backup.md index f7697972e..051954b8b 100644 --- a/docs/how-to/h-create-backup.md +++ b/docs/how-to/h-create-backup.md @@ -37,11 +37,9 @@ password: username: admin ``` -For more context about passwords during a restore, check How to restore an external backup. - ## Create a backup -Once you have a three-nodes cluster with configurations set for S3 storage, check that Charmed OpenSearch is active and idle with juju status. +Once you have a three-node cluster with configurations set for S3 storage, check that Charmed OpenSearch is active and idle with juju status. Once Charmed OpenSearch is `active` and `idle`, you can create your first backup with the `create-backup` command: diff --git a/docs/how-to/h-deploy-lxd.md b/docs/how-to/h-deploy-lxd.md index 65bb14b5d..228991cd3 100644 --- a/docs/how-to/h-deploy-lxd.md +++ b/docs/how-to/h-deploy-lxd.md @@ -5,9 +5,7 @@ This guide goes shows you how to deploy Charmed OpenSearch on [LXD](https://ubun ## Prerequisites * Charmed OpenSearch VM Revision 108+ -* Canonical LXD 5.21 or higher -* Ubuntu 20.04 LTS or higher -* Fulfil the general [system requirements](/t/14565) +* Fulfil the [system requirements](/t/14565) ## Summary * [Configure LXD](#configure-lxd) diff --git a/docs/how-to/h-horizontal-scaling.md b/docs/how-to/h-horizontal-scaling.md index 67a512869..3b553da19 100644 --- a/docs/how-to/h-horizontal-scaling.md +++ b/docs/how-to/h-horizontal-scaling.md @@ -32,12 +32,22 @@ Below is a sample output of the command `juju status --watch 1s` when the cluste ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 2.9.42 unsupported 15:46:15Z - -App Version Status Scale Charm Channel Rev Exposed Message -data-integrator active 1 data-integrator edge 11 no -opensearch blocked 2 opensearch edge 22 no 1 or more 'replica' shards are not assigned, please scale your application up. -tls-certificates-operator active 1 tls-certificates-operator stable 22 no +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 14:29:04Z + +App Version Status Scale Charm Channel Rev Exposed Message +data-integrator active 1 data-integrator latest/edge 59 no +opensearch blocked 1 opensearch 2/beta 117 no 1 or more 'replica' shards are not assigned, please scale your application up. +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + +Unit Workload Agent Machine Public address Ports Message +data-integrator/0* active idle 2 10.95.38.174 +opensearch/0* active idle 1 10.95.38.230 9200/tcp +self-signed-certificates/0* active idle 0 10.95.38.94 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-4dad5c-0 ubuntu@22.04 Running +1 started 10.95.38.230 juju-4dad5c-1 ubuntu@22.04 Running +2 started 10.95.38.174 juju-4dad5c-2 ubuntu@22.04 Running ``` In this case, the cluster is not in good health because the status is `blocked`, and the message says `1 or more 'replica' shards are not assigned, please scale your application up`. diff --git a/docs/how-to/h-large-deployment.md b/docs/how-to/h-large-deployment.md new file mode 100644 index 000000000..b3eaee692 --- /dev/null +++ b/docs/how-to/h-large-deployment.md @@ -0,0 +1,267 @@ +# How to launch a large deployment + +The Charmed OpenSearch operator can be deployed at scale to support large deployments. This guide explains how to launch a large deployment of OpenSearch using Juju. + +## Summary + - [OpenSearch node roles](#opensearch-node-roles) + - [Set roles](#set-roles) + - [Auto-generated roles](#auto-generated-roles) + - [User set roles](#user-set-roles) + - [Deploy a large OpenSearch cluster](#deploy-a-large-opensearch-cluster) + - [Deploy the clusters](#deploy-the-clusters) + - [Add the required relations](#add-the-required-relations) + - [Configure TLS encryption](#configure-tls-encryption) + - [Form the large cluster]() + - [Form the OpenSearch cluster](#form-the-opensearch-cluster-11) + +--- + +## OpenSearch node roles +When deploying OpenSearch at scale, it is important to understand the `roles` that nodes can assume on a cluster. + +Amongst the [multiple roles](https://opensearch.org/docs/latest/tuning-your-cluster/) supported by OpenSearch, two notable roles are especially crucial for a successful cluster formation: + +- `cluster_manager`: assigned to nodes responsible for handling cluster-wide operations such as creating and deleting indices, managing shards, and rebalancing data across the cluster. Every cluster has a single `cluster_manager` node elected as the master node among the `cluster_manager` eligible nodes. +- `data`: assigned to nodes which store and perform data-related operations like indexing and searching. Data nodes hold the shards that contain the indexed data. Data nodes can also be configured to perform ingest and transform operations. +In charmed OpenSearch, data nodes can optionally be further classified into tiers - to allow for defining [index lifecycle management policies](https://opensearch.org/docs/latest/im-plugin/ism/index/): + - `data.hot` + - `data.warm` + - `data.cold` + +There are also other roles that nodes can take on in an OpenSearch cluster, such as `ingest` nodes, and `coordinating` nodes etc. + +Roles in charmed OpenSearch are applied on the application level, in other words, all nodes get assigned the same set of roles defined for an application. + +### Set roles +Roles can either be set by the user or automatically generated by the charm. + +#### Auto-generated roles +When no roles are set on the `roles` config option of the opensearch application, the charm automatically assigns the following roles to all nodes. +``` +["data", "ingest", "ml", "cluster_manager"] +``` + +#### User set roles +There are currently two ways for users to set roles in an application: at deploy time, or via a config change. Note that a role change will effectively trigger a rolling restart of the OpenSearch application. + +To set roles at deploy time, run + ```none + juju deploy opensearch -n 3 --config roles="cluster_manager,data,ml" +``` + +To set roles later on through a config change, run + ```none +juju config opensearch roles="cluster_manager,data,ml" +``` + +> **Note:** We currently do not allow the removal of either `cluster_manager` or `data` roles. + +## Deploy a large OpenSearch cluster +The OpenSearch charm manages large deployments and diversity in the topology of its nodes through `juju integrations`. + +The cluster will consist of multiple integrated juju applications (clusters) with each application configured to have a mix of `cluster_manager` and `data` roles defined for its nodes. + +### Deploy the clusters + +1. First, deploy the orchestrator app. + ```shell + juju deploy -n 3 \ + opensearch main \ + --config cluster_name="app" \ + --channel 2/edge + ``` + + As a reminder, since we did not set any role to this application, the operator will assign each node the `cluster_manager,coordinating_only,data,ingest,ml` roles. + +2. (Optional, but recommended) Next, deploy a failover application with `cluster_manager` nodes to ensure high availability and fault tolerance. +The failover app will take over the orchestration of the fleet in the events where the `main` app fails or gets removed. Thus, it is important that this application has the `cluster_manager` role as part of its roles to ensure the continuity of the existence of the cluster. + ```shell + juju deploy -n 3 \ + opensearch failover \ + --config cluster_name="app" \ + --config init_hold="true" \ + --config roles="cluster_manager" + --channel 2/edge + ``` + + The failover nodes are not required for a basic deployment of OpenSearch. They are however highly recommended for production deployments to ensure high availability and fault tolerance. + + > **Note 1:** It is imperative that the `cluster_name` config values match between applications in large deployments. A cluster_name mismatch will effectively prevent 2 applications from forming a cluster. + + > **Note 2:** It is imperative that only the main orchestrator app sets the `init_hold` config option to `false` (by default) - the non-main orchestrator apps should set the value to `true` to prevent the application from starting before being integrated with the main. + +3. After deploying the nodes of the `main` app and additional `cluster_manager` nodes on the `failover`, we will deploy a new app with `data.hot` node roles. + + ```shell + juju deploy -n 3 \ + opensearch data-hot \ + --config cluster_name="app" \ + --config roles="data.hot" \ + --config init_hold="true" \ + --channel 2/edge + ``` + +4. We also need to deploy a TLS operator to enable TLS encryption for the cluster. We will deploy the `self-signed-certificates` charm to provide self-signed certificates for the cluster. + ```shell + juju deploy self-signed-certificates + ``` + +5. We can now track the progress of the deployment by running: + ```shell + juju status --watch 1s + ``` + + Once the deployment is complete, you should see the following output: + + ```shell + Model Controller Cloud/Region Version SLA Timestamp + dev development localhost/localhost 3.5.3 unsupported 06:01:06Z + + App Version Status Scale Charm Channel Rev Exposed Message + data-hot blocked 3 opensearch 2/edge 159 no Cannot start. Waiting for peer cluster relation... + failover blocked 3 opensearch 2/edge 159 no Cannot start. Waiting for peer cluster relation... + main blocked 3 opensearch 2/edge 159 no Missing TLS relation with this cluster. + self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + + Unit Workload Agent Machine Public address Ports Message + data-hot/0 active idle 6 10.214.176.165 + data-hot/1* active idle 7 10.214.176.7 + data-hot/2 active idle 8 10.214.176.161 + failover/0* active idle 3 10.214.176.194 + failover/1 active idle 4 10.214.176.152 + failover/2 active idle 5 10.214.176.221 + main/0 blocked idle 0 10.214.176.231 Missing TLS relation with this cluster. + main/1 blocked idle 1 10.214.176.57 Missing TLS relation with this cluster. + main/2* blocked idle 2 10.214.176.140 Missing TLS relation with this cluster. + self-signed-certificates/0* active idle 9 10.214.176.201 + + Machine State Address Inst id Base AZ Message + 0 started 10.214.176.231 juju-d6b263-0 ubuntu@22.04 Running + 1 started 10.214.176.57 juju-d6b263-1 ubuntu@22.04 Running + 2 started 10.214.176.140 juju-d6b263-2 ubuntu@22.04 Running + 3 started 10.214.176.194 juju-d6b263-3 ubuntu@22.04 Running + 4 started 10.214.176.152 juju-d6b263-4 ubuntu@22.04 Running + 5 started 10.214.176.221 juju-d6b263-5 ubuntu@22.04 Running + 6 started 10.214.176.165 juju-d6b263-6 ubuntu@22.04 Running + 7 started 10.214.176.7 juju-d6b263-7 ubuntu@22.04 Running + 8 started 10.214.176.161 juju-d6b263-8 ubuntu@22.04 Running + 9 started 10.214.176.201 juju-d6b263-9 ubuntu@22.04 Running + ``` + +### Add the required relations + + +#### Configure TLS encryption + +The Charmed OpenSearch operator does not function without TLS enabled. To enable TLS, integrate the `self-signed-certificates` with all opensearch applications. + +```shell +juju integrate self-signed-certificates main +juju integrate self-signed-certificates failover +juju integrate self-signed-certificates data-hot +``` + +Once the integrations are established, the `self-signed-certificates` charm will provide the required certificates for the OpenSearch clusters. + +Once TLS is fully configured in the `main` app, the latter will start immediately. As opposed to the other apps which are still waiting for the `admin` certificates to be shared with them by the `main` orchestrator. + +When the `main` app is ready, `juju status` will show something similar to the sample output below: + +```shell +Model Controller Cloud/Region Version SLA Timestamp +dev development localhost/localhost 3.5.3 unsupported 06:03:49Z + +App Version Status Scale Charm Channel Rev Exposed Message +data-hot blocked 3 opensearch 2/edge 159 no Cannot start. Waiting for peer cluster relation... +failover blocked 3 opensearch 2/edge 159 no Cannot start. Waiting for peer cluster relation... +main active 3 opensearch 2/edge 159 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + +Unit Workload Agent Machine Public address Ports Message +data-hot/0 active idle 6 10.214.176.165 +data-hot/1* active idle 7 10.214.176.7 +data-hot/2 active idle 8 10.214.176.161 +failover/0* active idle 3 10.214.176.194 +failover/1 active idle 4 10.214.176.152 +failover/2 active idle 5 10.214.176.221 +main/0 active idle 0 10.214.176.231 9200/tcp +main/1 active idle 1 10.214.176.57 9200/tcp +main/2* active idle 2 10.214.176.140 9200/tcp +self-signed-certificates/0* active idle 9 10.214.176.201 + +Machine State Address Inst id Base AZ Message +0 started 10.214.176.231 juju-d6b263-0 ubuntu@22.04 Running +1 started 10.214.176.57 juju-d6b263-1 ubuntu@22.04 Running +2 started 10.214.176.140 juju-d6b263-2 ubuntu@22.04 Running +3 started 10.214.176.194 juju-d6b263-3 ubuntu@22.04 Running +4 started 10.214.176.152 juju-d6b263-4 ubuntu@22.04 Running +5 started 10.214.176.221 juju-d6b263-5 ubuntu@22.04 Running +6 started 10.214.176.165 juju-d6b263-6 ubuntu@22.04 Running +7 started 10.214.176.7 juju-d6b263-7 ubuntu@22.04 Running +8 started 10.214.176.161 juju-d6b263-8 ubuntu@22.04 Running +9 started 10.214.176.201 juju-d6b263-9 ubuntu@22.04 Running +``` + +### Form the OpenSearch cluster + +Now, in order to form the large OpenSearch cluster (constituted of all the 3 previous opensearch apps), integrate the `main` charm to the `failover` and `data-hot` juju apps. + +```shell +juju integrate main:peer-cluster-orchestrator failover:peer-cluster +juju integrate main:peer-cluster-orchestrator data-hot:peer-cluster +juju integrate failover:peer-cluster-orchestrator data-hot:peer-cluster +``` + +Once the relations are added, the `main` application will orchestrate the formation of the OpenSearch cluster. This will start the rest of the nodes in the cluster. +You can track the progress of the cluster formation by running: + +```shell +juju status --watch 1s +``` + +Once the cluster is formed and all nodes are up and ready, `juju status` will show something similar to the sample output below: + +```shell +Model Controller Cloud/Region Version SLA Timestamp +dev development localhost/localhost 3.5.3 unsupported 06:11:18Z + +App Version Status Scale Charm Channel Rev Exposed Message +data-hot active 3 opensearch 2/edge 159 no +failover active 3 opensearch 2/edge 159 no +main active 3 opensearch 2/edge 159 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no + +Unit Workload Agent Machine Public address Ports Message +data-hot/0 active idle 6 10.214.176.165 9200/tcp +data-hot/1* active idle 7 10.214.176.7 9200/tcp +data-hot/2 active idle 8 10.214.176.161 9200/tcp +failover/0* active idle 3 10.214.176.194 9200/tcp +failover/1 active idle 4 10.214.176.152 9200/tcp +failover/2 active idle 5 10.214.176.221 9200/tcp +main/0 active idle 0 10.214.176.231 9200/tcp +main/1 active idle 1 10.214.176.57 9200/tcp +main/2* active idle 2 10.214.176.140 9200/tcp +self-signed-certificates/0* active idle 9 10.214.176.201 + +Machine State Address Inst id Base AZ Message +0 started 10.214.176.231 juju-d6b263-0 ubuntu@22.04 Running +1 started 10.214.176.57 juju-d6b263-1 ubuntu@22.04 Running +2 started 10.214.176.140 juju-d6b263-2 ubuntu@22.04 Running +3 started 10.214.176.194 juju-d6b263-3 ubuntu@22.04 Running +4 started 10.214.176.152 juju-d6b263-4 ubuntu@22.04 Running +5 started 10.214.176.221 juju-d6b263-5 ubuntu@22.04 Running +6 started 10.214.176.165 juju-d6b263-6 ubuntu@22.04 Running +7 started 10.214.176.7 juju-d6b263-7 ubuntu@22.04 Running +8 started 10.214.176.161 juju-d6b263-8 ubuntu@22.04 Running +9 started 10.214.176.201 juju-d6b263-9 ubuntu@22.04 Running +``` + +[note type="caution"] +**Caution**: The cluster will not come online if no `data` nodes are available. Ensure the `data` nodes are deployed and ready before forming the cluster. +[/note] + +[note type="reminder"] +**Reminder1**: In order to form a large deployment out of multiple juju apps, all applications must have the same `cluster_name` config option value or not set it at all, in which case it will be auto-generated in the main orchestrator and inherited by the other members. + +**Reminder2:** `init_hold` must be set to `true` for any subsequent (non main orchestrator) application. Otherwise the application may start and never be able to join the rest of the deployment fleet. +[/note] \ No newline at end of file diff --git a/docs/how-to/h-load-testing.md b/docs/how-to/h-load-testing.md index 43474b85b..46b6bac70 100644 --- a/docs/how-to/h-load-testing.md +++ b/docs/how-to/h-load-testing.md @@ -4,7 +4,7 @@ This guide will go over the steps for load testing your OpenSearch deployment wi ## Prerequisites * `juju v3.0+` - * This guide was written using `v3.4` + * This guide was written using `v3.5.3` * [`jq` command-line tool](https://jqlang.github.io/jq/) * If not already available, [a VPC set up on AWS](https://docs.aws.amazon.com/vpc/latest/userguide/vpc-getting-started.html) (or the equivalent environment in your cloud of choice) * `ACCESS_KEY` and `SECRET_KEY` for AWS. diff --git a/docs/overview.md b/docs/overview.md index 2248d54fa..26980c849 100644 --- a/docs/overview.md +++ b/docs/overview.md @@ -23,11 +23,10 @@ The upper portion of this page describes the Operating System (OS) where the cha | [**Reference**](/t/14109)
Technical information such as [system requirements](/t/14565) | | ## Project & community -If you find a bug in this operator or want to request a specific feature, here are the useful links: -- Raise the issue or feature request in the [Canonical Github repository](https://github.com/canonical/opensearch-operator/issues). -- Meet the community and chat with us if there are issues and feature requests in our [Mattermost Channel](https://chat.charmhub.io/charmhub/channels/data-platform) -and join the [Discourse Forum](https://discourse.charmhub.io/tag/opensearch). -- To learn about contribution guidelines, check the Charmed OpenSearch [CONTRIBUTING.md](https://github.com/canonical/opensearch-operator/blob/main/CONTRIBUTING.md) on GitHub and read the Ubuntu Community's [Code of Conduct](https://ubuntu.com/community/code-of-conduct). +Charmed OpenSearch is an official distribution of OpenSearch . It’s an open-source project that welcomes community contributions, suggestions, fixes and constructive feedback. +- Raise an issue or feature request in the [Github repository](https://github.com/canonical/opensearch-operator/issues). +- Meet the community and chat with us in our [Matrix channel](https://matrix.to/#/#charmhub-data-platform:ubuntu.com) or [leave a comment](https://discourse.charmhub.io/t/charmed-opensearch-documentation/9729). +- See the Charmed OpenSearch [contribution guidelines](https://github.com/canonical/opensearch-operator/blob/main/CONTRIBUTING.md) on GitHub and read the Ubuntu Community's [Code of Conduct](https://ubuntu.com/community/code-of-conduct). ## License & trademark The Charmed OpenSearch ROCK, Charmed OpenSearch Snap, and Charmed OpenSearch Operator are free software, distributed under the @@ -45,37 +44,37 @@ This documentation follows the [Diataxis Framework](https://canonical.com/blog/d | Level | Path | Navlink | |----------|-------------------------|----------------------------------------------| -| 1 | tutorial | [Tutorial]() | -| 2 | t-overview | [Overview](/t/9722) | -| 2 | t-set-up | [1. Set up the environment](/t/9724) | -| 2 | t-deploy-opensearch | [2. Deploy OpenSearch](/t/9716) | -| 2 | t-enable-tls | [3. Enable encryption](/t/9718) | -| 2 | t-integrate | [4. Integrate with a client application](/t/9714) | -| 2 | t-passwords | [5. Manage passwords](/t/9728) | -| 2 | t-horizontal-scaling | [6. Scale horizontally](/t/9720) | -| 2 | t-clean-up | [7. Clean up the environment](/t/9726) | -| 1 | how-to | [How To]() | -| 2 | h-deploy-lxd | [Deploy on LXD](/t/14575) | -| 2 | h-horizontal-scaling | [Scale horizontally](/t/10994) | -| 2 | h-integrate | [Integrate with your charm](/t/15333) | -| 2 | h-enable-tls | [Enable TLS encryption](/t/14783) | -| 2 | h-rotate-tls-ca-certificates | [Rotate TLS/CA certificates](/t/15422) | -| 2 | h-enable-monitoring | [Enable monitoring](/t/14560) | -| 2 | h-load-testing | [Perform load testing](/t/13987) | -| 2 | h-backups | [Back up and restore]() | -| 3 | h-configure-s3 | [Configure S3](/t/14097) | -| 3 | h-create-backup | [Create a backup](/t/14098) | -| 3 | h-restore-backup | [Restore a local backup](/t/14099) | -| 3 | h-migrate-cluster | [Migrate a cluster](/t/14100) | -| 2 | h-upgrade | [Upgrade]() | -| 3 | h-minor-upgrade | [Perform a minor upgrade](/t/14141) | -| 3 | h-minor-rollback | [Perform a minor rollback](/t/14142) | -| 1 | reference | [Reference]() | -| 2 | r-system-requirements | [System requirements](/t/14565) | -| 2 | r-software-testing | [Charm testing](/t/14109) | +| 1 | tutorial | [Tutorial]() | +| 2 | t-overview | [Overview](/t/9722) | +| 2 | t-set-up | [1. Set up the environment](/t/9724) | +| 2 | t-deploy-opensearch | [2. Deploy OpenSearch](/t/9716) | +| 2 | t-enable-tls | [3. Enable encryption](/t/9718) | +| 2 | t-integrate | [4. Integrate with a client application](/t/9714) | +| 2 | t-passwords | [5. Manage passwords](/t/9728) | +| 2 | t-horizontal-scaling | [6. Scale horizontally](/t/9720) | +| 2 | t-clean-up | [7. Clean up the environment](/t/9726) | +| 1 | how-to | [How To]() | +| 2 | h-deploy-lxd | [Deploy on LXD](/t/14575) | +| 2 | h-large-deployment | [Launch a large deployment](/t/15573) | +| 2 | h-horizontal-scaling | [Scale horizontally](/t/10994) | +| 2 | h-integrate | [Integrate with your charm](/t/15333) | +| 2 | h-enable-tls | [Enable TLS encryption](/t/14783) | +| 2 | h-rotate-tls-ca-certificates | [Rotate TLS/CA certificates](/t/15422) | +| 2 | h-enable-monitoring | [Enable monitoring](/t/14560) | +| 2 | h-load-testing | [Perform load testing](/t/13987) | +| 2 | h-attached-storage| [Recover from attached storage](/t/15616) | +| 2 | h-backups | [Back up and restore]() | +| 3 | h-configure-s3 | [Configure S3](/t/14097) | +| 3 | h-create-backup | [Create a backup](/t/14098) | +| 3 | h-restore-backup | [Restore a local backup](/t/14099) | +| 3 | h-migrate-cluster | [Migrate a cluster](/t/14100) | +| 2 | h-upgrade | [Upgrade]() | +| 3 | h-minor-upgrade | [Perform a minor upgrade](/t/14141) | +| 3 | h-minor-rollback | [Perform a minor rollback](/t/14142) | +| 1 | reference | [Reference]() | +| 2 | release-notes| [Release notes]() | +| 3 | revision-168| [Revision 168](/t/14050) | +| 2 | r-system-requirements | [System requirements](/t/14565) | +| 2 | r-software-testing | [Charm testing](/t/14109) | -[/details] - - \ No newline at end of file +[/details] \ No newline at end of file diff --git a/docs/reference/r-system-requirements.md b/docs/reference/r-system-requirements.md index 61ef936db..2fb20e518 100644 --- a/docs/reference/r-system-requirements.md +++ b/docs/reference/r-system-requirements.md @@ -5,7 +5,9 @@ The following are the minimum software and hardware requirements to run Charmed ## Software * Ubuntu 22.04 LTS (Jammy) or later -* Juju `v.3.1.7+` +* Juju `v.3.5.3+` + * Older minor versions of Juju 3 may be compatible, but are not officially supported. Use at your own risk. +* LXD `6.1+` ## Hardware diff --git a/docs/revision-168.md b/docs/revision-168.md new file mode 100644 index 000000000..6d4e918f9 --- /dev/null +++ b/docs/revision-168.md @@ -0,0 +1,105 @@ +# Revision 168 release notes +24 September 2024 + +Charmed OpenSearch Revision 168 has been deployed to the [`2/stable` channel](https://charmhub.io/opensearch?channel=2/stable) on Charmhub. + +To upgrade from a previous revision of the OpenSearch charm, see [how to perform a minor upgrade](https://charmhub.io/opensearch/docs/h-minor-upgrade). + +## Summary +* [Highlights and features](#highlights) +* [Requirements and compatibility](#requirements-and-compatibility) +* [Integrations](#integrations) +* [Software contents](#software-contents) +* [Known issues and limitations](#known-issues-and-limitations) +* [Join the community](#join-the-community) + +--- + +## Highlights +This section goes over the features included in this release, starting with a description of major highlights, and finishing with a comprehensive list of [all other features](#other-features). + +### Large scale deployments + +Deploy a single OpenSearch cluster composed of multiple Juju applications. Each application executes any of the available roles in OpenSearch. Large deployments support a diverse range of deployment constraints, roles, and regions. +* [How to set up a large deployment](/t/15573) + +### Security automations + +Manage TLS certificates and passwords in single point with Juju integrations and rotate your TLS certificates without any downtime. + +* [How to enable TLS encryption](/t/14783) +* [How to rotate TLS/CA certificates](/t/15422) + +### Monitoring + +Integrate with the Canonical Observability Stack (COS) and the OpenSearch Dashboards charm to monitor operational performance and visualize stored data across all clusters. + +* [How to enable monitoring](/t/14560) +* [OpenSearch Dashboards: How to connect to OpenSearch](/t/https://charmhub.io/opensearch-dashboards/docs/h-db-connect) + +### Backups + +Backup and restore with an Amazon S3-compatible storage backend. + +* [How to configure S3 storage](/t/14097) +* [How to create a backup](/t/14098) + +### Other features +* **Automated rolling restart** +* **Automated minor upgrade** of OpenSearch version +* **Automated deployment** for single and multiple clusters +* **Backup and restore** for single and multiple clusters +* **User management** and automated user and index setup with the [Data Integrator charm](https://charmhub.io/data-integrator) +* **TLS encryption** (HTTP and transport layers) and certificate rotation +* **Observability** of OpenSearch clusters and operational tooling via COS and the + [OpenSearch Dashboards charm](https://charmhub.io/opensearch-dashboards) +* **Plugin management**: Index State Management, KNN and MLCommons +* **OpenSearch security patching** and bug-fixing mechanisms + +For a detailed list of commits throughout all revisions, check our [GitHub Releases](https://github.com/canonical/opensearch-operator/releases). + +## Requirements and compatibility +* Juju `v3.5.3+` + * Older minor versions of Juju 3 may be compatible, but are not officially supported. +* LXD `v6.1` + * Older LXD versions may be compatible, but are not officially supported. +* Integration with a TLS charm + * [`self-signed-certificates`](https://charmhub.io/self-signed-certificates) revision 155+ or [`manual-tls-certificates`](https://charmhub.io/manual-tls-certificates) revision 108+ + +See the [system requirements page](/t/14565) for more information about software and hardware prerequisites. + +## Integrations + +Like all Juju charms, OpenSearch can easily integrate with other charms by implementing common interfaces/endpoints. + +OpenSearch can be seamlessly integrated out of the box with: + +* [TLS certificates charms](https://charmhub.io/topics/security-with-x-509-certificates#heading--understanding-your-x-509-certificates-requirements) + * **Note**: Charmed OpenSearch supports integration with [tls-certificates library](https://charmhub.io/tls-certificates-interface/libraries/tls_certificates) `v2` or higher. +* [COS Lite](https://charmhub.io/cos-lite) +* [OpenSearch Dashboards](https://charmhub.io/opensearch-dashboards) +* [Data Integrator](https://charmhub.io/data-integrator) +* [S3 Integrator](https://charmhub.io/s3-integrator) + +See the [Integrations page](https://charmhub.io/opensearch/integrations) for a list of all interfaces and compatible charms. + +## Software contents + +This charm is based on the Canonical [opensearch-snap](https://github.com/canonical/opensearch-snap). It packages: +* OpenSearch v2.17.0 +* OpenJDK `v21` + +## Known issues and limitations + +The following issues are known and scheduled to be fixed in the next maintenance release. + +* We currently do not support node role repurposing from cluster manager to a non cluster manager +* Storage re-attachment from previous clusters is not currently automated. For manual instructions, see the how-to guide [How to recover from attached storage](/t/15616). + +## Join the community + +Charmed OpenSearch is an official distribution of OpenSearch . It’s an open-source project that welcomes community contributions, suggestions, fixes and constructive feedback. + +* Raise an issue or feature request in the [GitHub repository](https://github.com/canonical/opensearch-operator/issues). +* Meet the community and chat with us in our [Matrix channel](https://matrix.to/#/#charmhub-data-platform:ubuntu.com) or [leave a comment](https://discourse.charmhub.io/t/charmed-opensearch-documentation/9729). +* See the Charmed OpenSearch [contribution guidelines](https://github.com/canonical/opensearch-operator/blob/main/CONTRIBUTING.md) on GitHub and read the Ubuntu Community's [Code of Conduct](https://ubuntu.com/community/code-of-conduct). \ No newline at end of file diff --git a/docs/tutorial/t-deploy-opensearch.md b/docs/tutorial/t-deploy-opensearch.md index 3e91f9d89..88edc59e2 100644 --- a/docs/tutorial/t-deploy-opensearch.md +++ b/docs/tutorial/t-deploy-opensearch.md @@ -5,10 +5,14 @@ To deploy Charmed OpenSearch, all you need to do is run the following command: ```shell -juju deploy opensearch --channel 2/beta +juju deploy opensearch -n 3 --channel 2/beta ``` -The command will fetch the charm from [Charmhub](https://charmhub.io/opensearch?channel=beta) and deploy it to the LXD cloud. This process can take several minutes depending on your machine. +[note] +**Note:** The `-n` flag is optional and specifies the number of units to deploy. In this case, we are deploying three units of Charmed OpenSearch. We recommend deploying at least three units for high availability. +[/note] + +The command will fetch the charm from [Charmhub](https://charmhub.io/opensearch?channel=beta) and deploy 3 units to the LXD cloud. This process can take several minutes depending on your machine. You can track the progress by running: @@ -22,17 +26,20 @@ When the application is ready, `juju status` will show something similar to the ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:20:34Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 12:36:56Z -App Version Status Scale Charm Channel Rev Exposed Message -opensearch blocked 1 opensearch 2/beta 117 no Missing TLS relation with this cluster. -self-signed-certificates active 1 self-signed-certificates latest/stable 155 no +App Version Status Scale Charm Channel Rev Exposed Message +opensearch blocked 3 opensearch 2/beta 117 no Missing TLS relation with this cluster. -Unit Workload Agent Machine Public address Ports Message -opensearch/0* blocked idle 0 10.214.176.107 Missing TLS relation with this cluster. +Unit Workload Agent Machine Public address Ports Message +opensearch/0* blocked idle 0 10.95.38.94 Missing TLS relation with this cluster. +opensearch/1 blocked executing 1 10.95.38.139 Missing TLS relation with this cluster. +opensearch/2 blocked idle 2 10.95.38.212 Missing TLS relation with this cluster. -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running ``` To exit the `juju status` screen, enter `Ctrl + C`. diff --git a/docs/tutorial/t-enable-tls.md b/docs/tutorial/t-enable-tls.md index 7199f6b78..ebad2ed02 100644 --- a/docs/tutorial/t-enable-tls.md +++ b/docs/tutorial/t-enable-tls.md @@ -30,19 +30,23 @@ Wait until `self-signed-certificates` is active. Use `juju status --watch 1s` to ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:22:05Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 12:39:22Z App Version Status Scale Charm Channel Rev Exposed Message -opensearch blocked 1 opensearch 2/beta 117 no Missing TLS relation with this cluster. +opensearch blocked 3 opensearch 2/beta 117 no Missing TLS relation with this cluster. self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -opensearch/0* blocked idle 0 10.214.176.107 Missing TLS relation with this cluster. -self-signed-certificates/0* active idle 1 10.214.176.116 - -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running -1 started 10.214.176.116 juju-b0826b-1 ubuntu@22.04 Running +opensearch/0* blocked idle 0 10.95.38.94 Missing TLS relation with this cluster. +opensearch/1 blocked idle 1 10.95.38.139 Missing TLS relation with this cluster. +opensearch/2 blocked idle 2 10.95.38.212 Missing TLS relation with this cluster. +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running ``` ## Integrate with OpenSearch @@ -59,19 +63,23 @@ The OpenSearch service will start. This might take some time. Once done, you can ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:23:24Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 12:41:22Z App Version Status Scale Charm Channel Rev Exposed Message -opensearch active 1 opensearch 2/beta 117 no +opensearch active 3 opensearch 2/beta 117 no self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -opensearch/0* active idle 0 10.214.176.107 9200/tcp -self-signed-certificates/0* active idle 1 10.214.176.116 - -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running -1 started 10.214.176.116 juju-b0826b-1 ubuntu@22.04 Running +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running Integration provider Requirer Interface Type Message opensearch:node-lock-fallback opensearch:node-lock-fallback node_lock_fallback peer diff --git a/docs/tutorial/t-horizontal-scaling.md b/docs/tutorial/t-horizontal-scaling.md index 9fce056b3..a9ab7f098 100644 --- a/docs/tutorial/t-horizontal-scaling.md +++ b/docs/tutorial/t-horizontal-scaling.md @@ -5,115 +5,81 @@ After having indexed some data in our previous section, let's take a look at the status of our charm: ```shell -juju status --color +juju status ``` - -This should result in the following output (notice the `blocked` status and application message): +The output should look similar to the following: ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.4.4 unsupported 17:16:43+02:00 +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:57:38Z App Version Status Scale Charm Channel Rev Exposed Message -opensearch blocked 1 opensearch 2/edge 117 no 1 or more 'replica' shards are not assigned, please scale your application up. -self-signed-certificates active 1 self-signed-certificates latest/stable 155 no +data-integrator active 1 data-integrator latest/edge 59 no +opensearch active 3 opensearch 2/beta 117 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -opensearch/0* active idle 0 10.121.127.140 9200/tcp -self-signed-certificates/0* active idle 1 10.121.127.164 - -Machine State Address Inst id Base AZ Message -0 started 10.121.127.140 juju-454312-0 ubuntu@22.04 Running -1 started 10.121.127.164 juju-454312-1 ubuntu@22.04 Running -``` - -Out of curiosity, let's take a look at the health of the current 1 node OpenSearch cluster: - -```shell -curl --cacert demo-ca.pem -XGET https://username:password@opensearch_node_ip:9200/_cluster/health -``` - -You should get a similar output to the following: - -```json -{ - "cluster_name": "opensearch-tutorial", - "status": "yellow", - "timed_out": false, - "number_of_nodes": 1, - "number_of_data_nodes": 1, - "discovered_master": true, - "discovered_cluster_manager": true, - "active_primary_shards": 3, - "active_shards": 3, - "relocating_shards": 0, - "initializing_shards": 0, - "unassigned_shards": 1, - "delayed_unassigned_shards": 0, - "number_of_pending_tasks": 0, - "number_of_in_flight_fetch": 0, - "task_max_waiting_in_queue_millis": 0, - "active_shards_percent_as_number": 75 -} -``` - -You'll notice 2 things: -- The `status` of the cluster is `yellow` -- The `unassigned_shards` is `1` - -This means that one of our replica shards could not be assigned to a node, which is normal since we only have a single OpenSearch node. - -In order to have a healthy cluster `"status": "green"` we need to scale our cluster up (horizontally). - -You could also list the shards in your cluster and visualize which one is not assigned. - -```shell -curl --cacert demo-ca.pem -XGET https://username:password@opensearch_node_ip:9200/_cat/shards +data-integrator/0* active idle 4 10.95.38.22 +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running + +Integration provider Requirer Interface Type Message +data-integrator:data-integrator-peers data-integrator:data-integrator-peers data-integrator-peers peer +opensearch:node-lock-fallback opensearch:node-lock-fallback node_lock_fallback peer +opensearch:opensearch-client data-integrator:opensearch opensearch_client regular +opensearch:opensearch-peers opensearch:opensearch-peers opensearch_peers peer +opensearch:upgrade-version-a opensearch:upgrade-version-a upgrade peer +self-signed-certificates:certificates opensearch:certificates tls-certificates regular ``` -Which should result in the following output: - -```shell -.opensearch-observability 0 p STARTED 0 208b 10.111.61.68 opensearch-0 -albums 0 p STARTED 4 10.6kb 10.111.61.68 opensearch-0 -albums 0 r UNASSIGNED -.opendistro_security 0 p STARTED 10 68.4kb 10.111.61.68 opensearch-0 -``` ## Add node You can add two additional nodes to your deployed OpenSearch application with the following command: ```shell -juju add-unit opensearch -n 2 +juju add-unit opensearch -n 1 ``` -You can now watch the new units join the cluster with: `watch -c juju status --color`. It usually takes a few minutes for the new nodes to be added to the cluster formation. You’ll know that all three nodes are ready when `watch -c juju status --color` reports: +Where `-n 1` specifies the number of units to add. In this case, we are adding one unit to the OpenSearch application. You can add more units by changing the number after `-n`. + +You can now watch the new units join the cluster with: `juju status --watch 1s`. It usually takes a few minutes for the new nodes to be added to the cluster formation. You’ll know that all three nodes are ready when `juju status --watch 1s` reports: ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.4.4 unsupported 17:28:02+02:00 +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 14:02:18Z App Version Status Scale Charm Channel Rev Exposed Message -opensearch active 3 opensearch 2/edge 117 no -self-signed-certificates active 1 self-signed-certificates latest/stable 155 no - -Unit Workload Agent Machine Public address Ports Message -opensearch/0* active idle 0 10.121.127.140 9200/tcp -opensearch/1 active idle 3 10.121.127.126 9200/tcp -opensearch/2 active executing 4 10.121.127.102 9200/tcp -self-signed-certificates/0* active idle 1 10.121.127.164 - -Machine State Address Inst id Base AZ Message -0 started 10.121.127.140 juju-454312-0 ubuntu@22.04 Running -1 started 10.121.127.164 juju-454312-1 ubuntu@22.04 Running -3 started 10.121.127.126 juju-454312-3 ubuntu@22.04 Running -4 started 10.121.127.102 juju-454312-4 ubuntu@22.04 Running -``` +data-integrator active 1 data-integrator latest/edge 59 no +opensearch active 4 opensearch 2/beta 117 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no -You will now notice that the application message regarding unassigned replica shards disappeared from the output of `juju status`. +Unit Workload Agent Machine Public address Ports Message +data-integrator/0* active idle 4 10.95.38.22 +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +opensearch/3 active idle 5 10.95.38.39 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running +5 started 10.95.38.39 juju-be3883-5 ubuntu@22.04 Running +``` -You can trust that Charmed OpenSearch added these nodes correctly, and that your replica shard is now assigned to a new node. But if you want to verify that your data is correctly replicated, feel free to run the above command accessing the endpoint `/_cluster/health` and see if `"status": "green"`. -You can also query the shards as shown previously: +You can trust that Charmed OpenSearch added these nodes correctly, and that your replica shards are all assigned. But if you want to verify that your data is correctly replicated, you can also query the shards with the following command: ```shell curl --cacert demo-ca.pem -XGET https://username:password@opensearch_node_ip:9200/_cat/shards @@ -122,16 +88,30 @@ curl --cacert demo-ca.pem -XGET https://username:password@opensearch_node_ip:920 Which should result in the following output: ```shell -.opensearch-observability 0 r STARTED 0 208b 10.111.61.76 opensearch-1 -.opensearch-observability 0 r STARTED 0 208b 10.111.61.79 opensearch-2 -.opensearch-observability 0 p STARTED 0 208b 10.111.61.68 opensearch-0 -albums 0 r STARTED 4 10.6kb 10.111.61.76 opensearch-1 -albums 0 p STARTED 4 10.6kb 10.111.61.68 opensearch-0 -.opendistro_security 0 r STARTED 10 68.4kb 10.111.61.76 opensearch-1 -.opendistro_security 0 r STARTED 10 68.4kb 10.111.61.79 opensearch-2 -.opendistro_security 0 p STARTED 10 68.4kb 10.111.61.68 opensearch-0 +test-index 0 r STARTED 0 208b 10.95.38.94 opensearch-0.0f3 +test-index 0 p STARTED 0 208b 10.95.38.139 opensearch-1.0f3 +.plugins-ml-config 0 r STARTED 1 3.9kb 10.95.38.94 opensearch-0.0f3 +.plugins-ml-config 0 r STARTED 1 3.9kb 10.95.38.139 opensearch-1.0f3 +.plugins-ml-config 0 p STARTED 1 3.9kb 10.95.38.212 opensearch-2.0f3 +.opensearch-observability 0 r STARTED 0 208b 10.95.38.94 opensearch-0.0f3 +.opensearch-observability 0 p STARTED 0 208b 10.95.38.139 opensearch-1.0f3 +.opensearch-observability 0 r STARTED 0 208b 10.95.38.212 opensearch-2.0f3 +albums 0 r STARTED 4 10.7kb 10.95.38.139 opensearch-1.0f3 +albums 0 p STARTED 4 10.7kb 10.95.38.212 opensearch-2.0f3 +.opensearch-sap-log-types-config 0 r STARTED 10.95.38.94 opensearch-0.0f3 +.opensearch-sap-log-types-config 0 r STARTED 10.95.38.139 opensearch-1.0f3 +.opensearch-sap-log-types-config 0 p STARTED 10.95.38.212 opensearch-2.0f3 +.opendistro_security 0 r STARTED 10 54.2kb 10.95.38.94 opensearch-0.0f3 +.opendistro_security 0 r STARTED 10 54.2kb 10.95.38.139 opensearch-1.0f3 +.opendistro_security 0 p STARTED 10 155.1kb 10.95.38.212 opensearch-2.0f3 +.charm_node_lock 0 r STARTED 1 3.8kb 10.95.38.94 opensearch-0.0f3 +.charm_node_lock 0 r STARTED 1 6.9kb 10.95.38.139 opensearch-1.0f3 +.charm_node_lock 0 p STARTED 1 11.8kb 10.95.38.212 opensearch-2.0f3 ``` +Notice that the shards are distributed across all nodes. + + ## Remove nodes [note type="caution"] **Note:** Refer to [safe-horizontal-scaling guide](/t/10994) to understand how to safely remove units in a production environment. @@ -141,31 +121,36 @@ albums 0 p STARTED 4 10.6kb 10.111.61.68 opensearch-0 **Warning:** In highly available deployment, only scaling down to 3 nodes is safe. If only 2 nodes are online, neither can be unavailable nor removed. The service will become **unavailable** and **data may be lost** if scaling below 2 nodes. [/note] -Removing a unit from the Juju application scales down your OpenSearch cluster by one node. Before we scale down the nodes we no longer need, list all the units with `juju status`. Here you will see three units / nodes: `opensearch/0`, `opensearch/1`, and `opensearch/2`. To remove the unit `opensearch/2` run: +Removing a unit from the Juju application scales down your OpenSearch cluster by one node. Before we scale down the nodes we no longer need, list all the units with `juju status`. Here you will see four units / nodes: `opensearch/0`, `opensearch/1`, `opensearch/2`, `opensearch/3`. To remove the unit `opensearch/3` run: ```shell -juju remove-unit opensearch/2 +juju remove-unit opensearch/3 ``` -You’ll know that the node was successfully removed when `watch -c juju status --color` reports: +You’ll know that the node was successfully removed when `juju status --watch 1s` reports: ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.4.4 unsupported 17:30:45+02:00 +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 14:05:58Z App Version Status Scale Charm Channel Rev Exposed Message -opensearch active 2 opensearch 2/edge 117 no -self-signed-certificates active 1 self-signed-certificates latest/stable 155 no +data-integrator active 1 data-integrator latest/edge 59 no +opensearch active 3 opensearch 2/beta 117 no +self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -opensearch/0* active idle 0 10.121.127.140 9200/tcp -opensearch/1 active idle 3 10.121.127.126 9200/tcp -self-signed-certificates/0* active idle 1 10.121.127.164 - -Machine State Address Inst id Base AZ Message -0 started 10.121.127.140 juju-454312-0 ubuntu@22.04 Running -1 started 10.121.127.164 juju-454312-1 ubuntu@22.04 Running -3 started 10.121.127.126 juju-454312-3 ubuntu@22.04 Running +data-integrator/0* active idle 4 10.95.38.22 +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running ``` >**Next step**: [7. Clean up the environment](/t/9726). \ No newline at end of file diff --git a/docs/tutorial/t-integrate.md b/docs/tutorial/t-integrate.md index 6e110fa0b..78bfc7651 100644 --- a/docs/tutorial/t-integrate.md +++ b/docs/tutorial/t-integrate.md @@ -23,29 +23,40 @@ juju deploy data-integrator --channel=edge --config index-name=test-index --conf The expected output: ```shell -Deployed "data-integrator" from charm-hub charm "data-integrator", revision 43 in channel latest/edge on ubuntu@22.04/stable +Deployed "data-integrator" from charm-hub charm "data-integrator", revision 59 in channel latest/edge on ubuntu@22.04/stable ``` Wait for `juju status --watch 1s` to show: ```shell Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:27:49Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 12:43:22Z App Version Status Scale Charm Channel Rev Exposed Message -data-integrator blocked 1 data-integrator latest/edge 43 no Please relate the data-integrator with the desired product -opensearch active 1 opensearch 2/beta 117 no +data-integrator blocked 1 data-integrator latest/edge 59 no Please relate the data-integrator with the desired product +opensearch active 3 opensearch 2/beta 117 no self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -data-integrator/0* blocked idle 2 10.214.176.202 Please relate the data-integrator with the desired product -opensearch/0* active idle 0 10.214.176.107 9200/tcp -self-signed-certificates/0* active idle 1 10.214.176.116 - -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running -1 started 10.214.176.116 juju-b0826b-1 ubuntu@22.04 Running -2 started 10.214.176.202 juju-b0826b-2 ubuntu@22.04 Running +data-integrator/0* blocked idle 4 10.95.38.22 Please relate the data-integrator with the desired product +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running + +Integration provider Requirer Interface Type Message +data-integrator:data-integrator-peers data-integrator:data-integrator-peers data-integrator-peers peer +opensearch:node-lock-fallback opensearch:node-lock-fallback node_lock_fallback peer +opensearch:opensearch-peers opensearch:opensearch-peers opensearch_peers peer +opensearch:upgrade-version-a opensearch:upgrade-version-a upgrade peer +self-signed-certificates:certificates opensearch:certificates tls-certificates regular ``` Notice that the status of the `data-integrator` application is `blocked`. This is because it is waiting for a relation to be established with another application namely `opensearch`. @@ -63,22 +74,26 @@ Wait for `juju status --relations --watch 1s` to show that the `data-integrator` ```bash Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:28:43Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 12:44:43Z App Version Status Scale Charm Channel Rev Exposed Message -data-integrator active 1 data-integrator latest/edge 43 no -opensearch active 1 opensearch 2/beta 117 no +data-integrator active 1 data-integrator latest/edge 59 no +opensearch active 3 opensearch 2/beta 117 no self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -data-integrator/0* active idle 2 10.214.176.202 -opensearch/0* active idle 0 10.214.176.107 9200/tcp -self-signed-certificates/0* active idle 1 10.214.176.116 - -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running -1 started 10.214.176.116 juju-b0826b-1 ubuntu@22.04 Running -2 started 10.214.176.202 juju-b0826b-2 ubuntu@22.04 Running +data-integrator/0* active idle 4 10.95.38.22 +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running Integration provider Requirer Interface Type Message data-integrator:data-integrator-peers data-integrator:data-integrator-peers data-integrator-peers peer @@ -109,18 +124,17 @@ ok: "True" opensearch: data: '{"extra-user-roles": "admin", "index": "test-index", "requested-secrets": "[\"username\", \"password\", \"tls\", \"tls-ca\", \"uris\"]"}' - endpoints: 10.214.176.107:9200 + endpoints: 10.95.38.139:9200,10.95.38.212:9200,10.95.38.94:9200 index: test-index - password: BX4QD3GNYAQrFxFXtuXF5wz1ruxmU1iY + password: j3JWFnDkoumCxn0CtKZRCmdRMUlYTZFI tls-ca: |- -----BEGIN CERTIFICATE----- - MIIDPzCCAiegAwIB... -----END CERTIFICATE----- -----BEGIN CERTIFICATE----- - MIIDOzCCAiOgAwIB... -----END CERTIFICATE----- username: opensearch-client_5 version: 2.14.0 + ``` Save the CA certificate (value of `tls-ca` in the previous response), username, and password, because you will need them in the next section. @@ -139,9 +153,9 @@ Sending a `GET` request to this `/` endpoint should return some basic informatio ```json { - "name" : "opensearch-0.c6e", - "cluster_name" : "opensearch-qjae", - "cluster_uuid" : "Hs6XAFVVSkKjUSkuYPKCPA", + "name" : "opensearch-2.0f3", + "cluster_name" : "opensearch-x3y6", + "cluster_uuid" : "yFS58g6hTbS0VxzJi0u7_g", "version" : { "distribution" : "opensearch", "number" : "2.14.0", @@ -199,20 +213,17 @@ This call should output something like the following: ```json { - "_index": "albums", - "_id": "1", - "_version": 1, - "_seq_no": 0, - "_primary_term": 1, - "found": true, - "_source": { - "artist": "Vulfpeck", - "genre": [ - "Funk", - "Jazz" - ], - "title": "Thrill of the Arts" - } + "_index": "albums", + "_id": "1", + "_version": 1, + "_seq_no": 0, + "_primary_term": 1, + "found": true, + "_source": { + "artist": "Vulfpeck", + "genre": ["Funk", "Jazz"], + "title": "Thrill of the Arts" + } } ``` @@ -238,7 +249,7 @@ curl --cacert demo-ca.pem -XPOST https://username:password@opensearch_node_ip:92 This should return a JSON response with the results of the bulk indexing operation: ``` { - "took": 16, + "took": 17, "errors": false, "items": [ ... ] } @@ -315,38 +326,39 @@ To remove the user used in the previous calls, remove the relation. Removing the juju remove-relation opensearch data-integrator ``` -if you run `juju status --relations` you will see that the relation has been removed and that the `data-integrator` application is now in a blocked state. +if you run `juju status --relations` you will see that the relation has been removed and that the `data-integrator` application is now in a `blocked` state. ```bash Model Controller Cloud/Region Version SLA Timestamp -tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:35:12Z +tutorial opensearch-demo localhost/localhost 3.5.3 unsupported 13:48:08Z App Version Status Scale Charm Channel Rev Exposed Message -data-integrator blocked 1 data-integrator latest/edge 43 no Please relate the data-integrator with the desired product -opensearch blocked 1 opensearch 2/beta 117 no 1 or more 'replica' shards are not assigned, please scale your application up. +data-integrator blocked 1 data-integrator latest/edge 59 no Please relate the data-integrator with the desired product +opensearch active 3 opensearch 2/beta 117 no self-signed-certificates active 1 self-signed-certificates latest/stable 155 no Unit Workload Agent Machine Public address Ports Message -data-integrator/0* blocked idle 2 10.214.176.202 Please relate the data-integrator with the desired product -opensearch/0* active idle 0 10.214.176.107 9200/tcp -self-signed-certificates/0* active idle 1 10.214.176.116 - -Machine State Address Inst id Base AZ Message -0 started 10.214.176.107 juju-b0826b-0 ubuntu@22.04 Running -1 started 10.214.176.116 juju-b0826b-1 ubuntu@22.04 Running -2 started 10.214.176.202 juju-b0826b-2 ubuntu@22.04 Running +data-integrator/0* blocked idle 4 10.95.38.22 Please relate the data-integrator with the desired product +opensearch/0* active idle 0 10.95.38.94 9200/tcp +opensearch/1 active idle 1 10.95.38.139 9200/tcp +opensearch/2 active idle 2 10.95.38.212 9200/tcp +self-signed-certificates/0* active idle 3 10.95.38.54 + +Machine State Address Inst id Base AZ Message +0 started 10.95.38.94 juju-be3883-0 ubuntu@22.04 Running +1 started 10.95.38.139 juju-be3883-1 ubuntu@22.04 Running +2 started 10.95.38.212 juju-be3883-2 ubuntu@22.04 Running +3 started 10.95.38.54 juju-be3883-3 ubuntu@22.04 Running +4 started 10.95.38.22 juju-be3883-4 ubuntu@22.04 Running Integration provider Requirer Interface Type Message data-integrator:data-integrator-peers data-integrator:data-integrator-peers data-integrator-peers peer opensearch:node-lock-fallback opensearch:node-lock-fallback node_lock_fallback peer -opensearch:opensearch-client data-integrator:opensearch opensearch_client regular opensearch:opensearch-peers opensearch:opensearch-peers opensearch_peers peer opensearch:upgrade-version-a opensearch:upgrade-version-a upgrade peer self-signed-certificates:certificates opensearch:certificates tls-certificates regular ``` -Do not mind the `blocked` status of the `opensearch` application. We will fix it in a next tutorial. - Now try again to connect in the same way as the previous section ```shell @@ -363,7 +375,7 @@ If you wanted to recreate this user all you would need to do is relate the two a ```shell juju integrate data-integrator opensearch -juju run-action data-integrator/leader get-credentials +juju run data-integrator/leader get-credentials ``` You can now connect to the database with this new username and password: diff --git a/docs/tutorial/t-set-up.md b/docs/tutorial/t-set-up.md index 73acd6ba1..dcf762155 100644 --- a/docs/tutorial/t-set-up.md +++ b/docs/tutorial/t-set-up.md @@ -47,7 +47,7 @@ You can list all LXD containers by executing the command `lxc list`. At this poi As with LXD, Juju is installed using a snap package: ```shell -sudo snap install juju --channel 3.4/stable --classic +sudo snap install juju --channel 3.5/stable --classic ``` Juju already has a built-in knowledge of LXD and how it works, so there is no additional setup or configuration needed, however, because Juju 3.x is a [strictly confined snap](https://snapcraft.io/docs/classic-confinement), and is not allowed to create a `~/.local/share` directory, we need to create it manually. From c9edade318c94541fb5eb362242e03b04aac186d Mon Sep 17 00:00:00 2001 From: phvalguima Date: Fri, 27 Sep 2024 08:12:56 +0200 Subject: [PATCH 2/2] [DPE-5558] Break CA rotation into integration test groups (#458) Currently, we are having a lot of time outs in CA rotation testing. Breaking between small and large deployments and having parallel runners will help with that overall duration. --- tests/integration/tls/test_ca_rotation.py | 147 ++++++++++------------ 1 file changed, 65 insertions(+), 82 deletions(-) diff --git a/tests/integration/tls/test_ca_rotation.py b/tests/integration/tls/test_ca_rotation.py index 19297ff66..d9b398be6 100644 --- a/tests/integration/tls/test_ca_rotation.py +++ b/tests/integration/tls/test_ca_rotation.py @@ -37,9 +37,32 @@ APP_UNITS = {MAIN_APP: 3, FAILOVER_APP: 1, DATA_APP: 1} +SMALL_DEPLOYMENT = "small" +LARGE_DEPLOYMENT = "large" +ALL_GROUPS = { + (deploy_type): pytest.param( + deploy_type, + id=deploy_type, + marks=[ + pytest.mark.group(deploy_type), + pytest.mark.runner( + [ + "self-hosted", + "linux", + "X64", + "jammy", + "xlarge" if deploy_type == LARGE_DEPLOYMENT else "large", + ] + ), + ], + ) + for deploy_type in [LARGE_DEPLOYMENT, SMALL_DEPLOYMENT] +} +ALL_DEPLOYMENTS = list(ALL_GROUPS.values()) -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) + +@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "large"]) +@pytest.mark.group(SMALL_DEPLOYMENT) @pytest.mark.abort_on_fail @pytest.mark.skip_if_deployed async def test_build_and_deploy_active(ops_test: OpsTest) -> None: @@ -71,69 +94,10 @@ async def test_build_and_deploy_active(ops_test: OpsTest) -> None: @pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) -@pytest.mark.abort_on_fail -async def test_rollout_new_ca(ops_test: OpsTest) -> None: - """Test that the cluster restarted and functional after processing a new CA certificate""" - c_writes = ContinuousWrites(ops_test, APP_NAME) - await c_writes.start() - - # trigger a rollout of the new CA by changing the config on TLS Provider side - new_config = {"ca-common-name": "NEW_CA"} - await ops_test.model.applications[TLS_CERTIFICATES_APP_NAME].set_config(new_config) - - start_count = await c_writes.count() - - await wait_until( - ops_test, - apps=[APP_NAME], - apps_statuses=["active"], - units_statuses=["active"], - timeout=1800, - idle_period=60, - wait_for_exact_units=len(UNIT_IDS), - ) - - # Check if the continuous-writes client works with the new certs as well - with open(ContinuousWrites.CERT_PATH, "r") as f: - orig_cert = f.read() - await c_writes.stop() - - await c_writes.start() # Forces the Cont. Writes to pick the new cert - - with open(ContinuousWrites.CERT_PATH, "r") as f: - new_cert = f.read() - - assert orig_cert != new_cert, "New cert was not picked up" - await asyncio.sleep(30) - final_count = await c_writes.count() - await c_writes.stop() - assert final_count > start_count, "Writes have not continued during CA rotation" - - # using the SSL API requires authentication with app-admin cert and key - leader_unit_ip = await get_leader_unit_ip(ops_test) - url = f"https://{leader_unit_ip}:9200/_plugins/_security/api/ssl/certs" - admin_secret = await get_secret_by_label(ops_test, "opensearch:app:app-admin") - - with open("admin.cert", "w") as cert: - cert.write(admin_secret["cert"]) - - with open("admin.key", "w") as key: - key.write(admin_secret["key"]) - - response = requests.get(url, cert=("admin.cert", "admin.key"), verify=False) - data = response.json() - assert new_config["ca-common-name"] in data["http_certificates_list"][0]["issuer_dn"] - - -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) +@pytest.mark.group(LARGE_DEPLOYMENT) @pytest.mark.abort_on_fail async def test_build_large_deployment(ops_test: OpsTest) -> None: """Setup a large deployments cluster.""" - # remove the existing application - await ops_test.model.remove_application(APP_NAME, block_until_done=True) - # deploy new cluster my_charm = await ops_test.build_charm(".") await asyncio.gather( @@ -162,6 +126,11 @@ async def test_build_large_deployment(ops_test: OpsTest) -> None: series=SERIES, config={"cluster_name": CLUSTER_NAME, "init_hold": True, "roles": "data"}, ), + ops_test.model.deploy( + TLS_CERTIFICATES_APP_NAME, + channel="stable", + config={"ca-common-name": "CN_CA"}, + ), ) # integrate TLS to all applications @@ -188,33 +157,47 @@ async def test_build_large_deployment(ops_test: OpsTest) -> None: ) -@pytest.mark.runner(["self-hosted", "linux", "X64", "jammy", "xlarge"]) -@pytest.mark.group(1) +@pytest.mark.parametrize("deploy_type", ALL_DEPLOYMENTS) @pytest.mark.abort_on_fail -async def test_rollout_new_ca_large_deployment(ops_test: OpsTest) -> None: +async def test_rollout_new_ca(ops_test: OpsTest, deploy_type) -> None: """Repeat the CA rotation test for the large deployment.""" - c_writes = ContinuousWrites(ops_test, DATA_APP) + if deploy_type == SMALL_DEPLOYMENT: + app = APP_NAME + else: + app = DATA_APP + c_writes = ContinuousWrites(ops_test, app) await c_writes.start() # trigger a rollout of the new CA by changing the config on TLS Provider side - new_config = {"ca-common-name": "EVEN_NEWER_CA"} + new_config = {"ca-common-name": "NEW_CA"} await ops_test.model.applications[TLS_CERTIFICATES_APP_NAME].set_config(new_config) start_count = await c_writes.count() - await wait_until( - ops_test, - apps=[MAIN_APP, DATA_APP, FAILOVER_APP], - apps_full_statuses={ - MAIN_APP: {"active": []}, - DATA_APP: {"active": []}, - FAILOVER_APP: {"active": []}, - }, - units_statuses=["active"], - wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, - timeout=2400, - idle_period=IDLE_PERIOD, - ) + if deploy_type == SMALL_DEPLOYMENT: + await wait_until( + ops_test, + apps=[APP_NAME], + apps_statuses=["active"], + units_statuses=["active"], + wait_for_exact_units=len(UNIT_IDS), + timeout=2400, + idle_period=IDLE_PERIOD, + ) + else: + await wait_until( + ops_test, + apps=[MAIN_APP, DATA_APP, FAILOVER_APP], + apps_full_statuses={ + MAIN_APP: {"active": []}, + DATA_APP: {"active": []}, + FAILOVER_APP: {"active": []}, + }, + units_statuses=["active"], + wait_for_exact_units={app: units for app, units in APP_UNITS.items()}, + timeout=2400, + idle_period=IDLE_PERIOD, + ) # Check if the continuous-writes client works with the new certs as well with open(ContinuousWrites.CERT_PATH, "r") as f: @@ -233,9 +216,9 @@ async def test_rollout_new_ca_large_deployment(ops_test: OpsTest) -> None: assert final_count > start_count, "Writes have not continued during CA rotation" # using the SSL API requires authentication with app-admin cert and key - leader_unit_ip = await get_leader_unit_ip(ops_test, DATA_APP) + leader_unit_ip = await get_leader_unit_ip(ops_test, app) url = f"https://{leader_unit_ip}:9200/_plugins/_security/api/ssl/certs" - admin_secret = await get_secret_by_label(ops_test, "opensearch-data:app:app-admin") + admin_secret = await get_secret_by_label(ops_test, f"{app}:app:app-admin") with open("admin.cert", "w") as cert: cert.write(admin_secret["cert"])