-
Notifications
You must be signed in to change notification settings - Fork 5
Do banco testing para o banco stable no PostgreSQL
Qual a melhor prática para trocar uma versão stable no banco de dados? Como os webservices apontam para um nome fixo, talvez não tenha problema em fazer DROP DATABASE por alguns minutos, ou durante a madrugada, a cada semestre.
-- após pg_dump osm_br_stable | gzip > /tmp/osm_br_stable-date.sql.gz
DROP DATABASE osm_br_stable;
CREATE DATABASE osm_br_stable WITH TEMPLATE osm_br_testing;
Mais convenções:
- prefixo "mvw" para MATERIALIZED VIEW em public e local
- prefixo "kx" para arquivos CSV, tabelas ou campos calculados. Analogo de MATERIALIZED porém gerado manualmente.
- contexto estadual e federal para vias.
CREATE or replace FUNCTION stable.geohash_pre(p_geom geometry, p_len int default 3) RETURNS text AS $f$
-- prefixo de geohash até p_len letras (entrequadrantes no nordeste pode ficar com 'a')
SELECT CASE WHEN x='' THEN 'a' ELSE 'x' END
FROM (SELECT substr(ST_Geohash(p_geom),1,p_len)) t(x)
$f$ LANGUAGE SQL IMMUTABLE;
CREATE or replace FUNCTION stable.road_prename(p_name text) RETURNS text AS $f$
-- para usar com tags->'ref', fornece o pre-nome, cortando demais subpartes.
SELECT replace( substring(p_name from '^[^;,/\-]+\-?[^;,/\-]+') , ' ', '')
$f$ LANGUAGE SQL IMMUTABLE;
CREATE or replace FUNCTION copy_csv(
p_filename text,
p_query text,
p_useheader boolean = true,
p_root text = '/tmp/' ) RETURNS text AS $f$
BEGIN
EXECUTE format(
'COPY (%s) TO %L CSV %s'
,CASE WHEN position(' ' in p_query)=0 THEN ('SELECT * FROM '||p_query) ELSE p_query END
,p_root||p_filename
,CASE WHEN p_useheader THEN 'HEADER' ELSE '' END
);
RETURN p_filename;
END;
$f$ LANGUAGE plpgsql STRICT;
COMMENT ON FUNCTION copy_csv
IS 'Easy transform query or view-name to COPY-to-CSV, with optional header';
CREATE MATERIALIZED VIEW mvw_osm_city_roads_inside AS
SELECT c.osm_id AS city_osm_id, p.osm_id road_osm_id
FROM vw_osm_city_polygon c INNER JOIN planet_osm_roads p
ON c.way && p.way AND ST_ContainsProperly(c.way,p.way)
WHERE not(p.tags?'boundary')
; -- 30 min
CREATE MATERIALIZED VIEW mvw_osm_city_points_inside AS
SELECT c.osm_id AS city_osm_id, p.osm_id point_osm_id
FROM vw_osm_city_polygon c INNER JOIN planet_osm_point p
ON ST_ContainsProperly(c.way,p.way)
WHERE p.tags?'wikidata' OR (p.tags?'addr:street' AND p.tags?'addr:housenumber')
; -- horas
CREATE MATERIALIZED VIEW mvw_osm_city_polygon_summary AS
SELECT -osm_id AS osm_rel_id, ibge_id, 'Q'||wikidata_id AS wikidata_id, name, uf
,round(st_area(way,true)/10000.0)/100 as area_km2
,(SELECT COUNT(*) FROM mvw_osm_city_roads_inside WHERE city_osm_id=v.osm_id) AS roads_inside
,(SELECT COUNT(*) FROM mvw_osm_city_points_inside WHERE city_osm_id=v.osm_id) AS points_inside
FROM vw_osm_city_polygon v
; -- minutos
----
CREATE VIEW stable.vw01_osm_roads_outside AS
SELECT p.*
FROM stable.mvw_osm_roads_outside m INNER JOIN planet_osm_roads p
ON m.road_osm_id=p.osm_id
;
CREATE VIEW stable.vw01_osm_roads_city_inside AS
SELECT m.city_osm_id, p.*
FROM mvw_osm_city_roads_inside m INNER JOIN planet_osm_roads p
ON m.road_osm_id=p.osm_id
;
-------
-- check sudo chown -R :postgres /opt/gits
SELECT uf, copy_csv(
'kx_sumario.csv', -- kx pois informação redundante
format('select * from mvw_osm_city_polygon_summary where uf=%L',uf),
true,
'/opt/gits/OSM/stable/data/'|| uf ||'/'
)
FROM vw_brcodes_state;
--- Checks -----
-- select tags from planet_osm_roads where not(tags?'boundary')
-- ver tambem https://github.com/OSMBrasil/stable/wiki/Teste123
-- federal e estadual = mvw_osm_notcity_roads_inside = uf ou BR. Decidir se DF=GO/Brasilia ou não.
SELECT count(*) as n_tot,
count(distinct m.road_osm_id::text ||city_osm_id::text) as n_inside,
count(distinct m.road_osm_id) as n_inside_distinct,
count(*) FILTER(where m.road_osm_id is null) n_outside
FROM planet_osm_roads p LEFT JOIN mvw_osm_city_roads_inside m ON m.road_osm_id=p.osm_id
; -- confere abrangencia de uso da tabela roads... mas precisa descontar tags?'boundary'.
-- talvez ideal até deletar previamente já que não faz sentido.
SELECT road_osm_id, COUNT(*) n, array_agg(DISTINCT city_osm_id) as cities
FROM mvw_osm_city_roads_inside GROUP BY 1 HAVING count(*)>1 ORDER BY 2 desc,1;
SELECT point_osm_id, COUNT(*) n, array_agg(DISTINCT city_osm_id) as cities
FROM mvw_osm_city_points_inside GROUP BY 1 HAVING count(*)>1 ORDER BY 2 desc,1;
Resultado estranho do check de duplicados:
road_osm_id | n | cities |
---|---|---|
-5320131 | 19 | {-242738,-242531,-242397} |
-6278289 | 7 | {-185554} |
-6278288 | 7 | {-185554} |
-4817687 | 7 | {-314973} |
-7483614 | 6 | {-332493} |
-6275043 | 5 | {-185668} |
-6251404 | 5 | {-332477} |
-5135366 | 5 | {-326286} |
... | ... | ... |
-1757114 | 2 | {-301091} |
-368819 | 2 | {-368786} |
28885785 | 2 | {-298463,-298442} |
28897407 | 2 | {-298463,-298442} |
28897410 | 2 | {-298463,-298442} |
... | ... | ... |
Valor de id negativo é relation e positivo é way. Pontos, supostamente de fronteira:
point_osm_id | n | cities |
---|---|---|
317503284 | 2 | {-298463,-298442} |
317642239 | 2 | {-298463,-298442} |
368179764 | 2 | {-298463,-298442} |
415522653 | 2 | {-2709162,-334457} |
416644922 | 2 | {-1832492,-1831586} |
416765466 | 2 | {-2709162,-334457} |
... | ... | ... |
100 tags mais utilizadas no Brasil (ou 10 mais usadas no estado)
tag_name IN ('highway', 'oneway', 'name', 'surface', 'ref', 'lanes', 'maxspeed', 'boundary',
'bridge', 'layer', 'railway', 'operator', 'lit', 'wikidata', 'gauge',
'electrified', 'foot', 'junction', 'alt_name', 'horse', 'usage', 'bicycle',
'destination', 'sidewalk', 'border_type', 'place', 'service', 'old_ref', 'shoulder',
'shoulder', 'smoothness', 'short_name', 'old_name', 'official_name'
'owner', 'access', 'loc_name', 'maxheight', 'toll')
SELECT tag, COUNT(*) n FROM (SELECT jsonb_object_keys(tags) FROM planet_osm_roads) t(x) ORDER BY 2 desc, 1;
No padrão LexML do Brasil, em uso desde 2008, nomes de jurisdição são representados sem acento, hifens e apóstrofes ou preposições. Nomes como Machadinho D'Oeste (RO) e Pingo-d'Água (MG) ficam normalizados para "br;ro;machadinho.oeste" e "br;mg;pingo.agua" respectivamente. Na convenção OSM-Stable esses nomes são mapeados de forma reversível para CamelCase e siglas em maiúsculas, em path Unix: "BR/RO/MachadinhoOeste" e "BR/MG/PingoAgua". A função responsável por esta conversão de nomes próprios é a stable.std_name2unix(). Exemplos:
uf | name | ibge_id | path |
---|---|---|---|
AM | Boca do Acre | 1300706 | AM/BocaAcre |
AC | Brasiléia | 1200104 | AC/Brasileia |
RO | Alta Floresta D'Oeste | 1100015 | RO/AltaFlorestaOeste |
RO | Colorado do Oeste | 1100064 | RO/ColoradoOeste |
RO | Espigão D'Oeste | 1100098 | RO/EspigaoOeste |
RO | Guajará-Mirim | 1100106 | RO/GuajaraMirim |
RO | Ji-Paraná | 1100122 | RO/JiParana |
RO | Alto Alegre dos Parecis | 1100379 | RO/AltoAlegreParecis |
# cria todas as pastas desejadas e dentro do padrão esperado.
psql "postgres://localhost/osm_stable_br" -c "select 'mv /tmp/pg_io/'|| path||' /opt/gits/OSM/stable/'|| path FROM (SELECT 'data/'||stable.std_name2unix(name,uf) ||'/municipio.geojson' path from brcodes_city) t" | tail -n +3 > gera_paths3.sh
psql "postgres://localhost/osm_stable_br" -c "select stable.save_city_polygons()" &
sudo sh gera_paths3.sh
cd /opt/gits/OSM/stable
git config core.fileMode false
git add .
git commit -m "Atualizando poligonos de municipios"
git push
-- ver função stable.audit_ibgecodes()
SELECT t1.name_path, t1.id,
file_put_contents('/tmp/final-'||t1.id||'.json', (
SELECT
trim(jsonb_pretty(ST_AsGeoJSONb(
( ( -- properties
jsonb_strip_nulls(stable.rel_properties(r1.id)
|| COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}'
)
::text)
FROM planet_osm_rels r1 where r1.id=t1.id
) ) -- /selct /file
FROM (
SELECT *, stable.getcity_rels_id(name_path) id from stable.city_test_names
) t1, LATERAL (
SELECT * FROM planet_osm_rels r WHERE r.id=t1.id
) t2;
CREATE or replace FUNCTION stable.rel_properties(
p_osm_id bigint
) RETURNS JSONb AS $f$
SELECT tags || jsonb_build_object('members',members)
-- bug || COALESCE(stable.rel_dup_properties(id,'r',members_md5_int,members),'{}'::jsonb)
FROM planet_osm_rels
WHERE id = abs(p_osm_id)
$f$ LANGUAGE SQL IMMUTABLE;
/**
* Enhances ST_AsGeoJSON() PostGIS function.
* Use ST_AsGeoJSONb( geom, 6, 1, osm_id::text, stable.element_properties(osm_id) - 'name:' ).
*/
CREATE or replace FUNCTION ST_AsGeoJSONb( -- ST_AsGeoJSON_complete
-- st_asgeojsonb(geometry, integer, integer, bigint, jsonb
p_geom geometry,
p_decimals int default 6,
p_options int default 1, -- 1=better (implicit WGS84) tham 5 (explicit)
p_id text default null,
p_properties jsonb default null,
p_name text default null,
p_title text default null,
p_id_as_int boolean default false
) RETURNS JSONb AS $f$
-- Do ST_AsGeoJSON() adding id, crs, properties, name and title
SELECT ST_AsGeoJSON(p_geom,p_decimals,p_options)::jsonb
|| CASE
WHEN p_properties IS NULL OR jsonb_typeof(p_properties)!='object' THEN '{}'::jsonb
ELSE jsonb_build_object('properties',p_properties)
END
|| CASE
WHEN p_id IS NULL THEN '{}'::jsonb
WHEN p_id_as_int THEN jsonb_build_object('id',p_id::bigint)
ELSE jsonb_build_object('id',p_id)
END
|| CASE WHEN p_name IS NULL THEN '{}'::jsonb ELSE jsonb_build_object('name',p_name) END
|| CASE WHEN p_title IS NULL THEN '{}'::jsonb ELSE jsonb_build_object('title',p_title) END
$f$ LANGUAGE SQL IMMUTABLE;
---
CREATE or replace FUNCTION stable.save_city_test_names(
p_root text DEFAULT '/tmp/'
) RETURNS TABLE(city_name text, osm_id bigint, filename text)
LANGUAGE sql IMMUTABLE
AS $f$
SELECT t1.name_path, t1.id,
file_put_contents(p_root||replace(t1.name_path,'/','-')||'.json', jsonb_pretty((
SELECT
ST_AsGeoJSONb( (SELECT ST_SimplifyPreserveTopology(way,0) FROM planet_osm_polygon WHERE osm_id=-r1.id), 6, 1, 'R'||r1.id::text,
(jsonb_strip_nulls(stable.rel_properties(r1.id)
|| COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}'
)
FROM planet_osm_rels r1 where r1.id=t1.id
)) ) -- /selct /pretty /file
FROM (
SELECT *, stable.getcity_rels_id(name_path) id from stable.city_test_names
) t1, LATERAL (
SELECT * FROM planet_osm_rels r WHERE r.id=t1.id
) t2;
$f$;
-------=====-----------
CREATE or replace FUNCTION stable.save_city_polygons(
p_root text DEFAULT '/tmp/pg_io/data/'
) RETURNS TABLE(city_name text, osm_id bigint, filename text)
LANGUAGE sql IMMUTABLE
AS $f$
SELECT t1.pathname2, t1.id,
file_put_contents(p_root||t1.pathname2 ||'/municipio.geojson', jsonb_pretty((
SELECT
ST_AsGeoJSONb( (SELECT ST_SimplifyPreserveTopology(way,0) FROM planet_osm_polygon WHERE osm_id=-r1.id), 6, 1, 'R'||r1.id::text,
(jsonb_strip_nulls(stable.rel_properties(r1.id)
|| COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}'
)
FROM planet_osm_rels r1 where r1.id=t1.id
)) ) -- /selct /pretty /file
FROM (
SELECT *, stable.getcity_rels_id(ibge_id) id, stable.std_name2unix(name,uf) pathname2
FROM brcodes_city WHERE ibge_id!=53
) t1, LATERAL (
SELECT * FROM planet_osm_rels r WHERE r.id=t1.id
) t2;
$f$;