-
Notifications
You must be signed in to change notification settings - Fork 0
Datalake
Claiton edited this page Aug 19, 2024
·
44 revisions
-
dl03t_main
: https://old.osm.codes -
dl06t_main
(teste): https://test.afa.codes -
dl05s_main
(produção): https://afa.codes
DATALAKE_DBNAME='dl06t_main'
DATALAKE_DOCKER='pgrestDL06'
PROD_DB='dl05s_main'
DUMP_FOLDER="${HOME}/dump_production"
# 00. git pull em todos os repositórios e atualizar nginx
# Define project mappings
declare -A projects=(
["dg"]="digital-guard"
["a4a"]="AddressForAll"
["osmc"]="osm-codes"
)
# List of repositories to update
repositories=(
a4a/WS
a4a/pg_pubLib-v1
dg/licenses
dg/preserv
dg/preserv-AR
dg/preserv-BO
dg/preserv-BR
dg/preserv-CL
dg/preserv-CM
dg/preserv-CO
dg/preserv-EC
dg/preserv-PE
dg/preserv-PY
dg/preserv-SR
dg/preserv-UY
dg/preserv-VE
dg/preserv-MX
osmc/BR_new
osmc/CO_new
osmc/CM
osmc/EC_new
osmc/GGeohash
osmc/UY_new
osmc/WS
osmc/gridMap-draftPages
osmc/NaturalCodes
)
# Base directory for repositories
base_dir="/var/gits"
# Loop through each repository
for repo in "${repositories[@]}"; do
prefix=${repo%%/*}
project=${projects[$prefix]}
if [ -z "$project" ]; then
echo "Unknown project prefix: $prefix. Skipping $repo."
continue
fi
repo_dir="${base_dir}/_${repo}"
echo "Processing ${repo_dir} for project ${project}"
if [ -d "$repo_dir" ]; then
# Update existing repository
pushd "$repo_dir" > /dev/null
git remote set-url origin [email protected]:${project}/${repo##*/}.git
git pull
popd > /dev/null
else
# Clone new repository
mkdir -p "$repo_dir"
git clone [email protected]:${project}/${repo##*/}.git "$repo_dir"
fi
done
# 01. dump da produção
mkdir ${DUMP_FOLDER}
pg_dump --disable-triggers --column-inserts --data-only -t optim.jurisdiction postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_geom postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
pg_dump --column-inserts --data-only -t optim.jurisdiction_eez postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_not_approved postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql
pg_dump --column-inserts --data-only -t optim.donated_packcomponent_cloudcontrol postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data_pre postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data_pre.sql
pg_dump --column-inserts --data-only -t optim.consolidated_data postgres://postgres@localhost/${PROD_DB} > ${DUMP_FOLDER}/pg_dump_consolidated_data.sql
# 02. Parar docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose stop ${DATALAKE_DOCKER}
# 03. Drop/create do datalake
cd /var/gits/_dg/preserv/src
make ini_datalake pg_datalake=${DATALAKE_DBNAME}
# Ignorar o erro:
# psql postgres://postgres@localhost/dl06t_main < /var/gits/_dg/preserv/src/optim-download-ini.sql
# ERROR: null value in column "id" of relation "donor" violates not-null constraint
# 04. Carregar ggeohash
cd /var/gits/_osmc/GGeohash/src
make all pg_datalake=${DATALAKE_DBNAME}
# 05. Carregar dump de jurisdiction (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction.sql
# 06. Carregar tabelas de geometrias (FUTURO: carregar de stable)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_geom.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_jurisdiction_eez.sql
# 07. Carregar abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_ref FROM '/var/gits/_osmc/BR_new/data/synonym_ref.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/BR_new/data/synonym.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CM/data/synonym.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/CO_new/data/synonym.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/EC_new/data/synonym.csv' WITH DELIMITER ',' CSV HEADER;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c "COPY optim.jurisdiction_abbrev_option FROM '/var/gits/_osmc/UY_new/data/synonym.csv' WITH DELIMITER ',' CSV HEADER;"
# 08. Carregar dados de donor e donatePack, corrigindo erro reportado no item 03.
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-download-ini.sql
# 09. Carregar dados de componentes de pacotes (que já foram publicados e disponiveis para publicar)
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_not_approved.sql
# 10. Carregar links dos arquivos filtrados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_donated_packcomponent_cloudcontrol.sql
# 11. Carregar dump de dados consolidados
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < ${DUMP_FOLDER}/pg_dump_consolidated_data.sql
# 12. Coberturas nacionais (FUTURO: ler do conf.yaml):
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.L0cover_upsert('BR'); SELECT osmc.L0cover_upsert('CM'); SELECT osmc.L0cover_upsert('CO'); SELECT osmc.L0cover_upsert('EC'); SELECT osmc.L0cover_upsert('UY');"
# 13. Coberturas municipais:
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/BR_new/data/coverage.csv','tmp_orig.coveragebr',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CM/data/coverage.csv', 'tmp_orig.coveragecm',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/CO_new/data/coverage.csv','tmp_orig.coverageco',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT optim.fdw_generate_direct_csv('/var/gits/_osmc/UY_new/data/coverage.csv','tmp_orig.coverageuy',',');"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3_161c(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coveragebr;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3( isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coveragecm;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3( isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageco;"
psql postgres://postgres@localhost/${DATALAKE_DBNAME} -c"SELECT osmc.update_coverage_isolevel3_161c(isolabel_ext,status::smallint,string_to_array(cover,' ',',')::text[],string_to_array(overlay,' ',',')::text[]) FROM tmp_orig.coverageuy;"
# 14. Carregar api
## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/lib-geoUriExt.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01geoUriExt-api.sql
## Recriar view materializada, após carregar tabelas de jurisdições e abreviações
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_dg/preserv/src/optim-step4-api.sql
## osm-codes/WS
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/step01ggeohash-api.sql
# 15. Rodar testes
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/GGeohash/src/step90assert-p1.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/WS/src/assert.sql
psql postgres://postgres@localhost/${DATALAKE_DBNAME} < /var/gits/_osmc/CO_new/src/assert/gridLib.sql
# 16. Subir docker
cd /var/gits/_a4a/WS/src/
sudo docker-compose up -d ${DATALAKE_DOCKER}
### Quando necessário:
# 17. Atualizar site afa.codes
rm -rf /var/www/osm.codes
cp -rf /var/gits/_osmc/gridMap-draftPages /var/www/osm.codes
cd /var/gits/_dg/preserv/src/
sudo bash fixaPermissoes.sh /var/www/osm.codes
#18. Atualizar nginx