Skip to content

Commit

Permalink
feat(#25): custom databases (#33)
Browse files Browse the repository at this point in the history
* updated database to be an env

* removed git modules add DBT_POSTGRES_HOST

* initial script setup

* began writing script for generating configuration files

* completed building scripts waiting on adding tests

* fixed script generation bug

* removed unused variable

* structuring docker bootstrap

* removed all docker settings

* fixed the docker startup bug in bootstrapper

* about to pull in bug fix

* production readygit add .git add .

* fixed ci build bug

* removed makefile because it's useless

* added default database and version listing

* updated documentation to give instructions on adding databases

* added typescript to the dependencies

* removed the wrong tsc compiler

* escaped characters in .env

* fixed dbt transformation issue

* added postgres table to env templates

* removed postgres table

* added postgres table for dbt

* added comments

* Update README.md

Co-authored-by: Lore <[email protected]>

* Update README.md

Co-authored-by: Lore <[email protected]>

* Update env.template

Co-authored-by: Lore <[email protected]>

* Update README.md

Co-authored-by: Lore <[email protected]>

---------

Co-authored-by: Lore <[email protected]>
  • Loading branch information
samuelimoisili and lorerod authored Jul 10, 2023
1 parent e563c35 commit cd10db0
Show file tree
Hide file tree
Showing 20 changed files with 4,484 additions and 83 deletions.
38 changes: 38 additions & 0 deletions .eslintrc.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
{
"env": {
"es2021": true,
"node": true
},
"extends": [
"eslint:recommended",
"plugin:@typescript-eslint/recommended"
],
"overrides": [
],
"parser": "@typescript-eslint/parser",
"parserOptions": {
"ecmaVersion": "latest",
"sourceType": "module"
},
"plugins": [
"@typescript-eslint"
],
"rules": {
"indent": [
"error",
"space"
],
"linebreak-style": [
"error",
"unix"
],
"quotes": [
"error",
"double"
],
"semi": [
"error",
"always"
]
}
}
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
.env
node_modules
node_modules
25 changes: 0 additions & 25 deletions Makefile

This file was deleted.

20 changes: 16 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,15 +13,21 @@ CHT sync has been specifically designed to work in both local development enviro
### Prerequisites

- `Docker`
- `Make`

### Local Setup

The local environment setup involves starting Logstash, PostgreSQL, PostgREST, DBT, and CouchDB. This configuration facilitates data synchronization, transformation, and storage for local development and testing. Fake data is generated for CouchDB. The required environment variables can be found in the `env.template` file, which should be customized accordingly for the specific deployment needs.

1. Provide the databases you want to sync in the `.env` file:

```
COUCHDB_DBS=<dbs-to-sync> # space separated list of databases you want to sync e.g "medic medic_sentinel"
```

```sh
# starts: logstash, superset, postgres, postgrest, data-generator, couchdb and dbt
make local
npm install
npm run local
```

### Gamma Setup
Expand All @@ -34,6 +40,8 @@ The gamma environment setup involves starting Logstash, PostgreSQL, PostgREST, a
# project wide: optional
COMPOSE_PROJECT_NAME=pipeline
COUCHDB_DBS=<dbs-to-sync> # space separated list of databases you want to sync e.g "medic medic_sentinel"
# couchdb and logstash: required environment variables for 'gamma', 'prod' and 'local'
COUCHDB_PASSWORD=<your-couchdb-password>
COUCHDB_SECURE=false
Expand All @@ -42,7 +50,8 @@ COUCHDB_SECURE=false
1. Start docker containers
```sh
# starts: logstash, superset, postgres, postgrest, and dbt
make gamma
npm install
npm run gamma
```

### Production Setup
Expand All @@ -55,6 +64,8 @@ The production environment setup involves starting Logstash, PostgREST, and DBT.
# project wide: optional
COMPOSE_PROJECT_NAME=pipeline
COUCHDB_DBS=<dbs-to-sync> # space separated list of databases you want to sync e.g "medic medic_sentinel"
# postgrest and pogresql: required environment variables for 'gamma', prod and 'local'
POSTGRES_USER=<your-postgres-user>
POSTGRES_PASSWORD=<your-postgres-password>
Expand Down Expand Up @@ -83,7 +94,8 @@ docker-compose -f docker-compose.postgres.yml -f docker-compose.yml up postgres
1. Start docker containers
```sh
# starts: logstash, superset, postgrest and dbt
make prod
npm install
npm run prod
```

## Environment Variables
Expand Down
31 changes: 0 additions & 31 deletions data-generator.py

This file was deleted.

7 changes: 7 additions & 0 deletions data/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
FROM curlimages/curl

WORKDIR /app

COPY ./start.sh ./start.sh

ENTRYPOINT ["/bin/sh", "/app/start.sh"]
33 changes: 33 additions & 0 deletions data/data-generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
from urllib import request
import os
import time
import json
import base64
import time
import glob

credentials = ('%s:%s' % (os.getenv("COUCHDB_USER"), os.getenv("COUCHDB_PASSWORD")))
encoded_credentials = base64.b64encode(credentials.encode('ascii')).decode("ascii")


for db in os.getenv("COUCHDB_DBS").split(" "):
url = os.path.join(os.getenv("COUCHDB_URL"), db)

for doc_path in glob.glob(os.getenv("DOCS_PATH")+"/*.json"):
with open(doc_path, "rb") as doc_file:
doc = json.loads(doc_file.read())

req = request.Request(
os.path.join(url, doc["_id"]),
data=json.dumps(doc).encode("utf-8"),
method='PUT'
)

req.add_header('Authorization', 'Basic %s' % encoded_credentials)

try:
res = request.urlopen(req)
print(doc_path, res.info())
except Exception as e:
print(e)
time.sleep(2)
7 changes: 7 additions & 0 deletions data/start.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
#!bin/bash

curl -X PUT http://${COUCHDB_USER}:${COUCHDB_PASSWORD}@couchdb:5984/_users

for DB in $COUCHDB_DBS; do
curl -X PUT http://${COUCHDB_USER}:${COUCHDB_PASSWORD}@couchdb:5984/${DB}
done
21 changes: 9 additions & 12 deletions docker-compose.couchdb.yml
Original file line number Diff line number Diff line change
Expand Up @@ -12,31 +12,28 @@ services:
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}

_couchdb_bootstrap:
image: curlimages/curl
bootstrap:
build: ./data/
depends_on:
- couchdb
command: -X PUT http://${COUCHDB_USER}:${COUCHDB_PASSWORD}@couchdb:5984/${COUCHDB_DB}
restart: on-failure:5

_couchdb_bootstrap_users:
image: curlimages/curl
depends_on:
- couchdb
command: -X PUT http://${COUCHDB_USER}:${COUCHDB_PASSWORD}@couchdb:5984/_users
restart: on-failure:5
environment:
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}
- COUCHDB_DBS=${COUCHDB_DBS}

generator:
image: python:3
depends_on:
- couchdb
command: python3 /code/data-generator.py
environment:
- URL=http://couchdb:5984/${COUCHDB_DB}/
- COUCHDB_URL=http://couchdb:5984/
- DOCS_PATH=/data/
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}
- COUCHDB_DBS=${COUCHDB_DBS}
volumes:
- ./data-generator.py:/code/data-generator.py:z
- ./data/data-generator.py:/code/data-generator.py:z
- ./data/json_docs/:/data/:z
restart: always
2 changes: 1 addition & 1 deletion docker-compose.postgres.yml
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ services:
- DBT_POSTGRES_PASSWORD=${DBT_POSTGRES_PASSWORD}
- DBT_POSTGRES_SCHEMA=${DBT_POSTGRES_SCHEMA}
- POSTGRES_DB=${POSTGRES_DB}
- POSTGRES_TABLES=${POSTGRES_TABLE} ${POSTGRES_TABLE}_sentinel
- POSTGRES_TABLES=${COUCHDB_DBS}
- POSTGRES_SCHEMA=${POSTGRES_SCHEMA}

postgrest:
Expand Down
3 changes: 1 addition & 2 deletions docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,10 @@ services:
environment:
- COUCHDB_USER=${COUCHDB_USER}
- COUCHDB_PASSWORD=${COUCHDB_PASSWORD}
- COUCHDB_DB=${COUCHDB_DB}
- COUCHDB_HOST=${COUCHDB_HOST}
- COUCHDB_PORT=${COUCHDB_PORT}
- COUCHDB_SEQ=/tmp/couchdb/sequence_path.txt
- COUCHDB_SECURE=${COUCHDB_SECURE:-true}
- POSTGRES_TABLE=${POSTGRES_TABLE}
- HTTP_ENDPOINT=postgrest:3000

superset:
Expand All @@ -37,3 +35,4 @@ services:
- POSTGRES_SCHEMA=${DBT_POSTGRES_SCHEMA}
- ROOT_POSTGRES_SCHEMA=${POSTGRES_SCHEMA}
- DATAEMON_INITAL_PACKAGE=${CHT_PIPELINE_BRANCH_URL}

6 changes: 3 additions & 3 deletions env.template
Original file line number Diff line number Diff line change
Expand Up @@ -5,20 +5,20 @@ COMPOSE_PROJECT_NAME=pipeline
POSTGRES_USER=root
POSTGRES_PASSWORD=supercoolpassword
POSTGRES_DB=data
POSTGRES_TABLE=couchdb
POSTGRES_TABLE=couchdb # for dbt use only
POSTGRES_SCHEMA=v1

# dbt: required environment variables for 'gamma', 'prod' and 'local'
DBT_POSTGRES_USER=dbt_user
DBT_POSTGRES_PASSWORD=supercoolpassword
DBT_POSTGRES_SCHEMA=dbt
DBT_POSTGRES_HOST=postgres # Your postgres instance IP or endpoint in "prod".
CHT_PIPELINE_BRANCH_URL=https://github.com/medic/cht-pipeline.git#main
CHT_PIPELINE_BRANCH_URL="https://github.com/medic/cht-pipeline.git#main"

# couchdb and logstash: required environment variables for 'gamma', 'prod' and 'local'
COUCHDB_USER=test
COUCHDB_PASSWORD=test
COUCHDB_DB=medic
COUCHDB_DBS="couchdb couchdb_sentinel" # space separated list of databases you want to sync e.g "medic medic_sentinel"
COUCHDB_HOST=couchdb
COUCHDB_PORT=5984
COUCHDB_SECURE=false
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@

input {
couchdb_changes {
always_reconnect => true
db => "${COUCHDB_DB}"
db => "couchdb"
host => "${COUCHDB_HOST}"
username => "${COUCHDB_USER}"
password => "${COUCHDB_PASSWORD}"
Expand All @@ -13,7 +14,7 @@ input {
}
}

filter{
filter {
json{
source => "message"
}
Expand All @@ -28,6 +29,6 @@ output {
format => "json"
http_method => "post"
ignorable_codes => 409
url => "http://${HTTP_ENDPOINT}/${POSTGRES_TABLE}"
url => "http://${HTTP_ENDPOINT}/couchdb"
}
}
}
34 changes: 34 additions & 0 deletions logstash/pipeline/couchdb_sentinel.conf
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@

input {
couchdb_changes {
always_reconnect => true
db => "couchdb_sentinel"
host => "${COUCHDB_HOST}"
username => "${COUCHDB_USER}"
password => "${COUCHDB_PASSWORD}"
keep_id => true
keep_revision => true
secure => "${COUCHDB_SECURE}"
port => "${COUCHDB_PORT}"
sequence_path => "${COUCHDB_SEQ}"
}
}

filter {
json{
source => "message"
}
mutate {
add_field => { "_id" => "%{[doc][_id]}" }
add_field => { "_rev" => "%{[doc][_rev]}" }
}
}

output {
http {
format => "json"
http_method => "post"
ignorable_codes => 409
url => "http://${HTTP_ENDPOINT}/couchdb_sentinel"
}
}
Loading

0 comments on commit cd10db0

Please sign in to comment.