Skip to content

Datalake

Claiton edited this page Nov 10, 2024 · 44 revisions

Base de dados e domínios:

Passos recriação datalake

DATALAKE_DBNAME='dl06t_main'
DATALAKE_DOCKER='pgrestDL06'

PROD_DB='dl05s_main'
DUMP_FOLDER="${HOME}/dump_production"

# 00. Atualizar repositórios

# Define project mappings
declare -A projects=(
    ["dg"]="digital-guard"
    ["a4a"]="AddressForAll"
    ["osmc"]="osm-codes"
)

# List of repositories to update
repositories=(
    a4a/WS
    a4a/pg_pubLib-v1
    dg/licenses
    dg/preserv
    dg/preserv-AR
    dg/preserv-BO
    dg/preserv-BR
    dg/preserv-CL
    dg/preserv-CM
    dg/preserv-CO
    dg/preserv-EC
    dg/preserv-PE
    dg/preserv-PY
    dg/preserv-SR
    dg/preserv-UY
    dg/preserv-VE
    dg/preserv-MX
    osmc/BR_new
    osmc/CO_new
    osmc/CM
    osmc/EC_new
    osmc/GGeohash
    osmc/UY_new
    osmc/WS
    osmc/gridMap-draftPages
    osmc/NaturalCodes
)

# Base directory for repositories
base_dir="/var/gits"

# Loop through each repository
for repo in "${repositories[@]}"; do
    prefix=${repo%%/*}
    project=${projects[$prefix]}
    
    if [ -z "$project" ]; then
        echo "Unknown project prefix: $prefix. Skipping $repo."
        continue
    fi

    repo_dir="${base_dir}/_${repo}"
    echo "Processing ${repo_dir} for project ${project}"

    if [ -d "$repo_dir" ]; then
        # Update existing repository
        pushd "$repo_dir" > /dev/null
        git remote set-url origin [email protected]:${project}/${repo##*/}.git
        git pull
        popd > /dev/null
    else
        # Clone new repository
        mkdir -p "$repo_dir"
        git clone [email protected]:${project}/${repo##*/}.git "$repo_dir"
    fi
done

# 01. dump da produção
mkdir ${DUMP_FOLDER}
pg_dump --disable-triggers --column-inserts --data-only -t optim.jurisdiction    postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_geom                  postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_eez                   postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent              postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_not_approved postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_cloudcontrol postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data_pre              postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data_pre.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data                  postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data.sql

# 02. Parar docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose stop ${DATALAKE_DOCKER}

# 03. Drop/create do datalake
cd /var/gits/_dg/preserv/src
make ini_datalake pg_datalake=${DATALAKE_DBNAME}
# Ignorar o erro:
# psql postgres://postgres@localhost/dl06t_main < /var/gits/_dg/preserv/src/optim-download-ini.sql
# ERROR:  null value in column "id" of relation "donor" violates not-null constraint

# 04. Carregar ggeohash
cd /var/gits/_osmc/GGeohash/src
make all pg_datalake=${DATALAKE_DBNAME}

# 05. Carregar dump de jurisdiction (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction.sql

# 06. Carregar tabelas de geometrias (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql

# 07. Carregar abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_ref    FROM '/var/gits/_osmc/BR_new/data/synonym_ref.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/BR_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CM/data/synonym.csv'         WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CO_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/EC_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/UY_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"

# 08. Carregar dados de donor e donatePack, corrigindo erro reportado no item 03.
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-download-ini.sql

# 09. Carregar dados de componentes de pacotes (que já foram publicados e disponiveis para publicar)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql

# 10. Carregar links dos arquivos filtrados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql

# 11. Carregar dump de dados consolidados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_consolidated_data.sql

# 12. Coberturas nacionais (FUTURO: ler do conf.yaml):
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('BR');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('CM');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('CO');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('EC');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('UY');"

# 13. Coberturas municipais:
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/BR_new/data/citycover.csv',        'tmp_orig.br_citycover',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/BR_new/data/citycover_dust.csv',   'tmp_orig.br_citycover_dust',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/BR_new/data/citycover_dustDEL.csv','tmp_orig.br_citycover_dustdel',',');"

psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CM/data/citycover.csv',        'tmp_orig.cm_citycover',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CM/data/citycover_dust.csv',   'tmp_orig.cm_citycover_dust',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CM/data/citycover_dustDEL.csv','tmp_orig.cm_citycover_dustdel',',');"

psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CO_new/data/coverage.csv','tmp_orig.coverageco',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/UY_new/data/coverage.csv','tmp_orig.coverageuy',',');"

psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.br_citycover;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.cm_citycover;"

psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageco;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageuy;"

# 14. Carregar api
## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/lib-geoUriExt.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01geoUriExt-api.sql

## Recriar view materializada, após carregar tabelas de jurisdições e abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-step4-api.sql

## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01ggeohash-api.sql

# 15. Rodar testes
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/GGeohash/src/step90assert-p1.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/assert.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/CO_new/src/assert/gridLib.sql

# 16. Subir docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose  up -d  ${DATALAKE_DOCKER}

### Quando necessário:
# 17. Atualizar site afa.codes
rm -rf /var/www/osm.codes
cp -rf /var/gits/_osmc/gridMap-draftPages /var/www/osm.codes

cd /var/gits/_dg/preserv/src/
sudo bash fixaPermissoes.sh /var/www/osm.codes

#18. Atualizar nginx