Skip to content

Do banco testing para o banco stable no PostgreSQL

Peter edited this page Mar 5, 2020 · 18 revisions

Qual a melhor prática para trocar uma versão stable no banco de dados? Como os webservices apontam para um nome fixo, talvez não tenha problema em fazer DROP DATABASE por alguns minutos, ou durante a madrugada, a cada semestre.

-- após pg_dump osm_br_stable | gzip > /tmp/osm_br_stable-date.sql.gz
DROP DATABASE osm_br_stable;
CREATE DATABASE osm_br_stable WITH TEMPLATE osm_br_testing;

LIXO

Mais convenções:

  • prefixo "mvw" para MATERIALIZED VIEW em public e local
  • prefixo "kx" para arquivos CSV, tabelas ou campos calculados. Analogo de MATERIALIZED porém gerado manualmente.
  • contexto estadual e federal para vias.
CREATE or replace FUNCTION stable.geohash_pre(p_geom geometry, p_len int default 3) RETURNS text AS $f$
   -- prefixo de geohash até p_len letras (entrequadrantes no nordeste pode ficar com 'a')
   SELECT  CASE WHEN x='' THEN 'a' ELSE 'x' END
   FROM (SELECT substr(ST_Geohash(p_geom),1,p_len)) t(x)
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION stable.road_prename(p_name text) RETURNS text AS $f$
   -- para usar com tags->'ref', fornece o pre-nome, cortando demais subpartes.
   SELECT replace( substring(p_name from '^[^;,/\-]+\-?[^;,/\-]+') , ' ', '')
$f$ LANGUAGE SQL IMMUTABLE;

CREATE or replace FUNCTION copy_csv(
  p_filename text,
  p_query text,
  p_useheader boolean = true,
  p_root text = '/tmp/' ) RETURNS text AS $f$
 BEGIN
  EXECUTE format(
    'COPY (%s) TO %L CSV %s'
    ,CASE WHEN position(' ' in p_query)=0 THEN ('SELECT * FROM '||p_query) ELSE p_query END
    ,p_root||p_filename
    ,CASE WHEN p_useheader THEN 'HEADER' ELSE '' END
  );
  RETURN p_filename;
 END;
$f$ LANGUAGE plpgsql STRICT;
COMMENT ON FUNCTION copy_csv
 IS 'Easy transform query or view-name to COPY-to-CSV, with optional header';

CREATE MATERIALIZED VIEW mvw_osm_city_roads_inside AS 
  SELECT c.osm_id AS city_osm_id, p.osm_id road_osm_id
  FROM vw_osm_city_polygon c INNER JOIN planet_osm_roads p 
    ON c.way && p.way AND ST_ContainsProperly(c.way,p.way)
  WHERE not(p.tags?'boundary')
; -- 30 min
CREATE MATERIALIZED VIEW mvw_osm_city_points_inside AS
  SELECT c.osm_id AS city_osm_id, p.osm_id point_osm_id
  FROM vw_osm_city_polygon c INNER JOIN planet_osm_point p
    ON ST_ContainsProperly(c.way,p.way)
  WHERE p.tags?'wikidata' OR (p.tags?'addr:street' AND p.tags?'addr:housenumber')
; -- horas
CREATE MATERIALIZED VIEW mvw_osm_city_polygon_summary AS
  SELECT  -osm_id AS osm_rel_id,  ibge_id, 'Q'||wikidata_id AS wikidata_id, name, uf
         ,round(st_area(way,true)/10000.0)/100 as area_km2
         ,(SELECT COUNT(*) FROM mvw_osm_city_roads_inside  WHERE city_osm_id=v.osm_id) AS roads_inside
         ,(SELECT COUNT(*) FROM mvw_osm_city_points_inside WHERE city_osm_id=v.osm_id) AS points_inside
  FROM vw_osm_city_polygon v
; -- minutos

----
CREATE VIEW stable.vw01_osm_roads_outside AS
  SELECT p.*
  FROM stable.mvw_osm_roads_outside m INNER JOIN planet_osm_roads p
   ON  m.road_osm_id=p.osm_id
;

CREATE VIEW stable.vw01_osm_roads_city_inside AS
  SELECT  m.city_osm_id, p.* 
  FROM mvw_osm_city_roads_inside m INNER JOIN planet_osm_roads p
   ON  m.road_osm_id=p.osm_id
;
-------
-- check sudo chown -R :postgres /opt/gits
SELECT uf,  copy_csv(
    'kx_sumario.csv', -- kx pois informação redundante
    format('select * from mvw_osm_city_polygon_summary where uf=%L',uf), 
    true,
    '/opt/gits/OSM/stable/data/'|| uf ||'/'
  )
FROM vw_brcodes_state;

--- Checks -----

-- select tags from planet_osm_roads where not(tags?'boundary')

-- ver tambem https://github.com/OSMBrasil/stable/wiki/Teste123
-- federal e estadual = mvw_osm_notcity_roads_inside = uf ou BR.  Decidir se DF=GO/Brasilia ou não. 
SELECT count(*) as n_tot, 
       count(distinct m.road_osm_id::text ||city_osm_id::text) as n_inside,
       count(distinct m.road_osm_id) as n_inside_distinct,
       count(*) FILTER(where m.road_osm_id is null) n_outside
FROM planet_osm_roads p LEFT JOIN mvw_osm_city_roads_inside m ON m.road_osm_id=p.osm_id
; -- confere abrangencia de uso da tabela roads... mas precisa descontar tags?'boundary'.
-- talvez ideal até deletar previamente já que não faz sentido.

SELECT road_osm_id, COUNT(*) n, array_agg(DISTINCT city_osm_id) as cities 
FROM mvw_osm_city_roads_inside GROUP BY 1 HAVING count(*)>1 ORDER BY 2 desc,1;

SELECT point_osm_id, COUNT(*) n, array_agg(DISTINCT city_osm_id) as cities 
FROM mvw_osm_city_points_inside GROUP BY 1 HAVING count(*)>1 ORDER BY 2 desc,1;

Resultado estranho do check de duplicados:

road_osm_id n cities
-5320131 19 {-242738,-242531,-242397}
-6278289 7 {-185554}
-6278288 7 {-185554}
-4817687 7 {-314973}
-7483614 6 {-332493}
-6275043 5 {-185668}
-6251404 5 {-332477}
-5135366 5 {-326286}
... ... ...
-1757114 2 {-301091}
-368819 2 {-368786}
28885785 2 {-298463,-298442}
28897407 2 {-298463,-298442}
28897410 2 {-298463,-298442}
... ... ...

Valor de id negativo é relation e positivo é way. Pontos, supostamente de fronteira:

point_osm_id n cities
317503284 2 {-298463,-298442}
317642239 2 {-298463,-298442}
368179764 2 {-298463,-298442}
415522653 2 {-2709162,-334457}
416644922 2 {-1832492,-1831586}
416765466 2 {-2709162,-334457}
... ... ...

100 tags mais utilizadas no Brasil (ou 10 mais usadas no estado)

tag_name IN ('highway', 'oneway', 'name', 'surface', 'ref', 'lanes', 'maxspeed', 'boundary', 
 'bridge', 'layer', 'railway', 'operator', 'lit', 'wikidata', 'gauge', 
 'electrified', 'foot', 'junction', 'alt_name', 'horse', 'usage', 'bicycle', 
 'destination', 'sidewalk', 'border_type', 'place', 'service', 'old_ref', 'shoulder', 
 'shoulder', 'smoothness', 'short_name', 'old_name', 'official_name' 
 'owner', 'access', 'loc_name', 'maxheight', 'toll') 
SELECT tag, COUNT(*) n FROM (SELECT jsonb_object_keys(tags) FROM planet_osm_roads) t(x) ORDER BY 2 desc, 1;

Convenção

No padrão LexML do Brasil, em uso desde 2008, nomes de jurisdição são representados sem acento, hifens e apóstrofes ou preposições. Nomes como Machadinho D'Oeste (RO) e Pingo-d'Água (MG) ficam normalizados para "br;ro;machadinho.oeste" e "br;mg;pingo.agua" respectivamente. Na convenção OSM-Stable esses nomes são mapeados de forma reversível para CamelCase e siglas em maiúsculas, em path Unix: "BR/RO/MachadinhoOeste" e "BR/MG/PingoAgua". A função responsável por esta conversão de nomes próprios é a stable.std_name2unix(). Exemplos:

uf name ibge_id path
AM Boca do Acre 1300706 AM/BocaAcre
AC Brasiléia 1200104 AC/Brasileia
RO Alta Floresta D'Oeste 1100015 RO/AltaFlorestaOeste
RO Colorado do Oeste 1100064 RO/ColoradoOeste
RO Espigão D'Oeste 1100098 RO/EspigaoOeste
RO Guajará-Mirim 1100106 RO/GuajaraMirim
RO Ji-Paraná 1100122 RO/JiParana
RO Alto Alegre dos Parecis 1100379 RO/AltoAlegreParecis
# cria todas as pastas desejadas e dentro do padrão esperado.
psql "postgres://localhost/osm_stable_br" -c "select 'mv /tmp/pg_io/'|| path||'  /opt/gits/OSM/stable/'|| path FROM (SELECT 'data/'||stable.std_name2unix(name,uf) ||'/municipio.geojson' path from  brcodes_city) t" | tail -n +3  > gera_paths3.sh

psql "postgres://localhost/osm_stable_br" -c "select stable.save_city_polygons()" &
sudo sh gera_paths3.sh
cd /opt/gits/OSM/stable
git config core.fileMode false
git add .
git commit -m "Atualizando poligonos de municipios"
git push

-- ver função stable.audit_ibgecodes()

SELECT t1.name_path, t1.id,
 file_put_contents('/tmp/final-'||t1.id||'.json', (
  SELECT 
    trim(jsonb_pretty(ST_AsGeoJSONb(
     
    ( ( -- properties
       jsonb_strip_nulls(stable.rel_properties(r1.id)
       || COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
      ) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}' 
    )

::text)
  FROM  planet_osm_rels r1 where r1.id=t1.id
 ) ) -- /selct /file
FROM (
 SELECT *, stable.getcity_rels_id(name_path) id  from stable.city_test_names
) t1, LATERAL (
 SELECT * FROM planet_osm_rels r WHERE  r.id=t1.id
) t2;

CREATE or replace FUNCTION stable.rel_properties(
  p_osm_id bigint
) RETURNS JSONb AS $f$
  SELECT tags || jsonb_build_object('members',members)
  -- bug  || COALESCE(stable.rel_dup_properties(id,'r',members_md5_int,members),'{}'::jsonb)
  FROM planet_osm_rels
  WHERE id = abs(p_osm_id)
$f$ LANGUAGE SQL IMMUTABLE;


/**
 * Enhances ST_AsGeoJSON() PostGIS function.
 * Use ST_AsGeoJSONb( geom, 6, 1, osm_id::text, stable.element_properties(osm_id) - 'name:' ).
 */
CREATE or replace FUNCTION ST_AsGeoJSONb( -- ST_AsGeoJSON_complete
  -- st_asgeojsonb(geometry, integer, integer, bigint, jsonb
  p_geom geometry,
  p_decimals int default 6,
  p_options int default 1,  -- 1=better (implicit WGS84) tham 5 (explicit)
  p_id text default null,
  p_properties jsonb default null,
  p_name text default null,
  p_title text default null,
  p_id_as_int boolean default false
) RETURNS JSONb AS $f$
  -- Do ST_AsGeoJSON() adding id, crs, properties, name and title
  SELECT ST_AsGeoJSON(p_geom,p_decimals,p_options)::jsonb
         || CASE
            WHEN p_properties IS NULL OR jsonb_typeof(p_properties)!='object' THEN '{}'::jsonb
            ELSE jsonb_build_object('properties',p_properties)
            END
         || CASE
            WHEN p_id IS NULL THEN '{}'::jsonb
            WHEN p_id_as_int THEN jsonb_build_object('id',p_id::bigint)
            ELSE jsonb_build_object('id',p_id)
            END
         || CASE WHEN p_name IS NULL THEN '{}'::jsonb ELSE jsonb_build_object('name',p_name) END
         || CASE WHEN p_title IS NULL THEN '{}'::jsonb ELSE jsonb_build_object('title',p_title) END
$f$ LANGUAGE SQL IMMUTABLE;

---

CREATE or replace FUNCTION stable.save_city_test_names(
  p_root text DEFAULT '/tmp/'
) RETURNS TABLE(city_name text, osm_id bigint, filename text)
    LANGUAGE sql IMMUTABLE
    AS $f$
  SELECT t1.name_path, t1.id,
   file_put_contents(p_root||replace(t1.name_path,'/','-')||'.json', jsonb_pretty((
    SELECT
       ST_AsGeoJSONb( (SELECT ST_SimplifyPreserveTopology(way,0) FROM planet_osm_polygon WHERE osm_id=-r1.id), 6, 1, 'R'||r1.id::text,
         (jsonb_strip_nulls(stable.rel_properties(r1.id)
         || COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
         ) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}' 
      )
    FROM  planet_osm_rels r1 where r1.id=t1.id
   )) ) -- /selct /pretty /file
  FROM (
   SELECT *, stable.getcity_rels_id(name_path) id  from stable.city_test_names
  ) t1, LATERAL (
   SELECT * FROM planet_osm_rels r WHERE  r.id=t1.id
  ) t2;
$f$;
-------=====-----------

CREATE or replace FUNCTION stable.save_city_polygons(
  p_root text DEFAULT '/tmp/pg_io/data/'
) RETURNS TABLE(city_name text, osm_id bigint, filename text)
    LANGUAGE sql IMMUTABLE
    AS $f$
  SELECT t1.pathname2, t1.id,
   file_put_contents(p_root||t1.pathname2 ||'/municipio.geojson', jsonb_pretty((
    SELECT
       ST_AsGeoJSONb( (SELECT ST_SimplifyPreserveTopology(way,0) FROM planet_osm_polygon WHERE osm_id=-r1.id), 6, 1, 'R'||r1.id::text,
         (jsonb_strip_nulls(stable.rel_properties(r1.id)
         || COALESCE(stable.rel_dup_properties(r1.id,'r',r1.members_md5_int,r1.members),'{}'::jsonb) )
         ) -'flag' -'name:' #-'{"members","n_md5"}' #-'{"members","w_md5"}' #-'{"members","n_size"}' #-'{"members","w_size"}' 
      )
    FROM  planet_osm_rels r1 where r1.id=t1.id
   )) ) -- /selct /pretty /file
  FROM (
     SELECT *, stable.getcity_rels_id(ibge_id) id, stable.std_name2unix(name,uf)  pathname2
     FROM  brcodes_city  WHERE ibge_id!=53
  ) t1, LATERAL (
     SELECT * FROM planet_osm_rels r WHERE  r.id=t1.id
  ) t2;
$f$;