From 099cdcca83646d3c7d57207b7608f833434684d7 Mon Sep 17 00:00:00 2001 From: Njuguna Ndung'u Date: Wed, 17 Jan 2024 13:30:26 +0300 Subject: [PATCH] feat: remove superset container and update Readme --- .github/workflows/test.yml | 2 -- README.md | 19 ++++++------------- docker-compose.yml | 9 --------- env.template | 4 ---- scripts/config.ts | 5 ----- superset/Dockerfile | 20 -------------------- tests/e2e-test.spec.ts | 14 +------------- 7 files changed, 7 insertions(+), 66 deletions(-) delete mode 100644 superset/Dockerfile diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 85a01ca..d510ae5 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -23,8 +23,6 @@ jobs: COUCHDB_HOST: "couchdb" COUCHDB_PORT: 5984 COUCHDB_SECURE: false - SUPERSET_PASSWORD: "password" - SUPERSET_ADMIN_EMAIL: "user@superset.com" steps: - uses: actions/checkout@v2 - uses: actions/setup-node@v2 diff --git a/README.md b/README.md index 4e92c79..f9d085b 100644 --- a/README.md +++ b/README.md @@ -1,6 +1,6 @@ # CHT Sync -CHT Sync is a bundled solution consisting of [Logstash](https://www.elastic.co/logstash/), [CouchDB](https://couchdb.apache.org/), [PostgREST](https://postgrest.org/en/stable/), [DBT](https://www.getdbt.com/), and [Superset](https://superset.apache.org/). Its purpose is to synchronize data from CouchDB to PostgreSQL, facilitating analytics on a Superset dashboard. This synchronization occurs in real-time, ensuring that the data displayed on the dashboard is always up-to-date. CHT Sync copies data from CouchDB to PostgreSQL, enabling seamless integration and timely analytics. +CHT Sync is a bundled solution consisting of [Logstash](https://www.elastic.co/logstash/), [CouchDB](https://couchdb.apache.org/), [PostgREST](https://postgrest.org/en/stable/), and [DBT](https://www.getdbt.com/). Its purpose is to synchronize data from CouchDB to PostgreSQL, facilitating analytics on a dashboard. This synchronization occurs in real-time, ensuring that the data displayed on the dashboard is always up-to-date. CHT Sync copies data from CouchDB to PostgreSQL, enabling seamless integration and timely analytics. **WARNING!** The schema differs from couch2pg. See [`./postgres/init-dbt-resources.sh`](./postgres/init-dbt-resources.sh). @@ -16,9 +16,7 @@ At the core of the CHT Sync are Logstash, PostgREST, and DBT. Logstash plays a k Once the data is synchronized and stored in PostgreSQL, it undergoes transformation using predefined DBT models from the [cht-pipeline](https://github.com/medic/cht-pipeline). DBT plays a crucial role in preparing the data in a format that is optimized for querying and analysis, ensuring the data is readily available for analytics purposes. -CHT Sync also leverages Superset, an analytics and dashboarding platform, to provide intuitive visualizations and interactive analytics on the synchronized data stored in PostgreSQL. Superset empowers users to explore and gain valuable insights from the data, enabling informed decision-making and data-driven actions. - -The overall architecture of CHT-sync is driven by the seamless integration of these technologies. CouchDB serves as the source database, containing the original data to be synchronized. Logstash, PostgREST, and DBT facilitate the data flow from CouchDB to PostgreSQL, transforming it into a queriable format. PostgreSQL acts as the centralized repository for the synchronized and transformed data, while Superset provides the interface for users to explore and visualize the analytics. +The overall architecture of CHT-sync is driven by the seamless integration of these technologies. CouchDB serves as the source database, containing the original data to be synchronized. Logstash, PostgREST, and DBT facilitate the data flow from CouchDB to PostgreSQL, transforming it into a queriable format. PostgreSQL acts as the centralized repository for the synchronized and transformed data. ## Getting Started @@ -34,7 +32,6 @@ There are four environment variable groups in the `.env.template` file. To succe 1. Postgresql and Postgres: Are used to establish the Postgres database to synchronize CouchDB data. They also define the schema and table names to store the CouchDB data. The main objective is to define the environment where the raw CouchDB data will be copied. 2. DBT: These environment variables are exclusive to the DBT configuration. The main objective is to define the environment where the tables and views for the models defined in `CHT_PIPELINE_BRANCH_URL` will be created. It is important to separate this environment from the previous group. `DBT_POSTGRES_USER` and `DBT_POSTGRES_SCHEMA` must be different from `POSTGRES_USER` and `POSTGRES_SCHEMA`. `DBT_POSTGRES_HOST` has to be the Postgres instance created with the environment variables set in the first group. 3. The following environment variables define the CouchDB instance we want to sync with. With `COUCHDB_DBS`, we can specify a list of databases to sync. -4. Superset: These environment variables are exclusive to the Superset configuration. ### Local Setup @@ -49,7 +46,7 @@ COUCHDB_DBS= # space separated list of databases you want to sync e 2. Install the dependencies and run the Docker containers locally: ```sh -# starts: logstash, superset, postgres, postgrest, data-generator, couchdb and dbt +# starts: logstash, postgres, postgrest, data-generator, couchdb and dbt npm install npm run local ``` @@ -82,10 +79,6 @@ COUCHDB_DBS= # space separated list of databases you want to sync e COUCHDB_HOST= COUCHDB_PORT= COUCHDB_SECURE=false - -# superset: required environment variables for 'gamma', 'prod' and 'local' -SUPERSET_PASSWORD= -SUPERSET_ADMIN_EMAIL= ``` If `CHT_PIPELINE_BRANCH_URL` is pointing to a private repo then you need to provide an access token in the url i.e. `https://@github.com/medic/cht-pipeline.git#main`. In this example you will replace `` with an access token from Github. Instruction on how to generate one can be found [here](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens). @@ -93,7 +86,7 @@ If `CHT_PIPELINE_BRANCH_URL` is pointing to a private repo then you need to prov 2. Install the dependencies and run the Docker containers locally: ```sh -# starts: logstash, superset, postgres, postgrest, data-generator, couchdb and dbt +# starts: logstash, postgres, postgrest, data-generator, couchdb and dbt npm install npm run local ``` @@ -125,7 +118,7 @@ COUCHDB_SECURE=false 2. Install the dependencies and start the Docker containers: ```sh -# starts: logstash, superset, postgres, postgrest, and dbt +# starts: logstash, postgres, postgrest, and dbt npm install npm run gamma ``` @@ -169,7 +162,7 @@ docker-compose -f docker-compose.postgres.yml -f docker-compose.yml up postgres 3. Install the dependencies and start the Docker containers: ```sh -# starts: logstash, superset, postgrest and dbt +# starts: logstash, postgrest and dbt npm install npm run prod ``` diff --git a/docker-compose.yml b/docker-compose.yml index 756c23b..66f42be 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,15 +14,6 @@ services: - COUCHDB_SECURE=${COUCHDB_SECURE:-true} - HTTP_ENDPOINT=postgrest:3000 - superset: - build: - context: ./superset/ - args: - SUPERSET_PASSWORD: ${SUPERSET_PASSWORD:-password} - SUPERSET_ADMIN_EMAIL: ${SUPERSET_ADMIN_EMAIL:-user@superset.com} - ports: - - 8080:8088 - dbt: platform: linux/amd64 image: medicmobile/dataemon:latest diff --git a/env.template b/env.template index d2f364e..e6e9807 100644 --- a/env.template +++ b/env.template @@ -22,7 +22,3 @@ COUCHDB_DBS="couchdb couchdb_sentinel" # space separated list of databases you w COUCHDB_HOST=couchdb COUCHDB_PORT=5984 COUCHDB_SECURE=false - -# superset: required environment variables for 'gamma', 'prod' and 'local' -SUPERSET_PASSWORD=password -SUPERSET_ADMIN_EMAIL=user@superset.com diff --git a/scripts/config.ts b/scripts/config.ts index ab09a3e..27adfd3 100644 --- a/scripts/config.ts +++ b/scripts/config.ts @@ -11,11 +11,6 @@ export const POSTGRES = { schema: process.env.POSTGRES_SCHEMA }; -export const SUPERSET = { - username: process.env.SUPERSET_ADMIN_EMAIL || 'user@superset.com', - password: process.env.SUPERSET_PASSWORD || 'password', -}; - export const DBT_POSTGRES = { schema: process.env.DBT_POSTGRES_SCHEMA || 'dbt' } diff --git a/superset/Dockerfile b/superset/Dockerfile deleted file mode 100644 index 844e7b7..0000000 --- a/superset/Dockerfile +++ /dev/null @@ -1,20 +0,0 @@ -FROM apache/superset:2.0.0 - -USER root - -ARG SUPERSET_PASSWORD=admin -ARG SUPERSET_ADMIN_EMAIL=admin@superset.com - -RUN pip install psycopg2-binary - -USER superset - -RUN superset superset fab create-admin \ - --username admin \ - --firstname Superset \ - --lastname Admin \ - --email $SUPERSET_ADMIN_EMAIL \ - --password $SUPERSET_PASSWORD - -RUN superset superset db upgrade -RUN superset superset init diff --git a/tests/e2e-test.spec.ts b/tests/e2e-test.spec.ts index bec58d1..f7a4dae 100644 --- a/tests/e2e-test.spec.ts +++ b/tests/e2e-test.spec.ts @@ -1,7 +1,7 @@ import { Client } from "ts-postgres"; import { rootConnect } from "./postgres-utils"; import request from 'supertest'; -import { POSTGRES, SUPERSET, DBT_POSTGRES } from "../scripts/config"; +import { POSTGRES, DBT_POSTGRES } from "../scripts/config"; describe("Main workflow Test Suite", () => { let client: Client; @@ -20,16 +20,4 @@ describe("Main workflow Test Suite", () => { let personTableResult = await client.query("SELECT * FROM " + DBT_POSTGRES.schema + ".person"); expect(personTableResult.rows.length).toBeGreaterThan(0); }); - - it("should be able to login to superset dashboard", async () => { - const supersetDashboardResponse = await request('http://localhost:8080') - .post('/api/v1/security/login') - .send({ - password: SUPERSET.password, - provider: "db", - refresh: true, - username: SUPERSET.username - }); - expect(supersetDashboardResponse.status).toBe(200); - }); });