Skip to content

Datalake

Claiton edited this page Jun 27, 2024 · 44 revisions

Correspondência entre base de dados e domínios:

Passos para recriar um datalake

DATALAKE_DBNAME='dl06t_main'
DATALAKE_DOCKER='pgrestDL06'

PROD_DB='dl05s_main'
DUMP_FOLDER="${HOME}/dump_production"

# 00. git pull em todos os repositórios e atualizar nginx

# 01. dump da produção
mkdir ${DUMP_FOLDER}
pg_dump --disable-triggers --column-inserts --data-only -t optim.jurisdiction    postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_geom                  postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_eez                   postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent              postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_not_approved postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_cloudcontrol postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data_pre              postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data_pre.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data                  postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data.sql
#pg_dump --column-inserts --data-only -t optim.jurisdiction_abbrev_ref            postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_abbrev_ref.sql
#pg_dump --column-inserts --data-only -t optim.jurisdiction_abbrev_option         postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_abbrev_option.sql
#pg_dump --column-inserts --data-only -t osmc.coverage                            postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_coverage.sql

# 02. Parar docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose stop ${DATALAKE_DOCKER}

# 03. Drop/create do datalake
cd /var/gits/_dg/preserv/src
make ini_datalake pg_datalake=${DATALAKE_DBNAME}
# Ignorar o erro:
# psql postgres://postgres@localhost/dl06t_main < /var/gits/_dg/preserv/src/optim-download-ini.sql
# ERROR:  null value in column "id" of relation "donor" violates not-null constraint

# 04. Carregar ggeohash
cd /var/gits/_osmc/GGeohash/src
make all pg_datalake=${DATALAKE_DBNAME}

# 05. Carregar dump de jurisdiction (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction.sql

# 06. Carregar tabelas de geometrias (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql

# 07. Carregar abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_ref    FROM '/var/gits/_osmc/BR_new/data/synonym_ref.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/BR_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CM/data/synonym.csv'         WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CO_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
#psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/EC_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/UY_new/data/synonym.csv'     WITH DELIMITER ',' CSV HEADER;"

# 08. Carregar dados de donor e donatePack, corrigindo erro reportado no item 03.
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-download-ini.sql

# 09. Carregar dados de componentes de pacotes (que já foram publicados e disponiveis para publicar)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql

# 10. Carregar links dos arquivos filtrados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql

# 11. Carregar dump de dados consolidados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_consolidated_data.sql

# 12. Coberturas nacionais (FUTURO: ler do conf.yaml):
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('BR'); SELECT osmc.L0cover_upsert('CM'); SELECT osmc.L0cover_upsert('CO'); SELECT osmc.L0cover_upsert('EC'); SELECT osmc.L0cover_upsert('UY');"

# 13. Coberturas municipais:
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/BR_new/data/coverage.csv','tmp_orig.coveragebr',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CM/data/coverage.csv',    'tmp_orig.coveragecm',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CO_new/data/coverage.csv','tmp_orig.coverageco',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/UY_new/data/coverage.csv','tmp_orig.coverageuy',',');"

psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3_161c(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coveragebr;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(     isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coveragecm;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3(     isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageco;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3_161c(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageuy;"

# 14. Carregar api
## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/lib-geoUriExt.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01geoUriExt-api.sql

## Recriar view materializada, após carregar tabelas de jurisdições e abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-step4-api.sql

## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01ggeohash-api.sql

# 15. Rodar testes
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/GGeohash/src/step90assert-p1.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/assert.sql

# 16. Subir docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose  up -d  ${DATALAKE_DOCKER}