From 71a2067ea05c8bd334405e17cd8bbcdb0b9060b6 Mon Sep 17 00:00:00 2001 From: LDouziech Date: Thu, 11 Apr 2024 10:36:24 +0200 Subject: [PATCH] Update to INSEE/Sirene 3.11 (#637) * [UniteLegale] Remove / Set "CaractereEmployeur" to null * [Etablissement] Add new data & remove sync for null fields * [Insee] Update BASE URL to v3.11 * [SyncInsee] Minimum date for v3.11 * [SyncInsee] Remove min date --- .vscode/settings.json | 8 ++- .../2024-02-27-161209_siren_3_11/down.sql | 13 ++++ .../2024-02-27-161209_siren_3_11/up.sql | 13 ++++ src/connectors/insee/implementation.rs | 2 +- src/connectors/insee/types/etablissement.rs | 60 ++++++++----------- src/connectors/insee/types/unite_legale.rs | 3 +- src/models/etablissement/columns.rs | 5 ++ src/models/etablissement/common.rs | 7 ++- src/models/etablissement/mod.rs | 22 +++++-- src/models/schema.rs | 18 ++++-- src/models/unite_legale/columns.rs | 4 +- src/models/unite_legale/common.rs | 4 +- src/models/unite_legale/mod.rs | 8 +-- src/update/action/sync_insee.rs | 2 + 14 files changed, 109 insertions(+), 60 deletions(-) create mode 100644 migrations/2024-02-27-161209_siren_3_11/down.sql create mode 100644 migrations/2024-02-27-161209_siren_3_11/up.sql diff --git a/.vscode/settings.json b/.vscode/settings.json index 02a859c..247d232 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,6 +1,10 @@ { "editor.codeActionsOnSave": { - "source.fixAll": true, - "source.organizeImports": true + "source.fixAll": "explicit", + "source.organizeImports": "explicit" + }, + "[rust]": { + "editor.defaultFormatter": "rust-lang.rust-analyzer", + "editor.formatOnSave": true } } diff --git a/migrations/2024-02-27-161209_siren_3_11/down.sql b/migrations/2024-02-27-161209_siren_3_11/down.sql new file mode 100644 index 0000000..dcd06e5 --- /dev/null +++ b/migrations/2024-02-27-161209_siren_3_11/down.sql @@ -0,0 +1,13 @@ +ALTER TABLE "public"."etablissement" +DROP COLUMN IF EXISTS "dernier_numero_voie", +DROP COLUMN IF EXISTS "indice_repetition_dernier_numero_voie", +DROP COLUMN IF EXISTS "identifiant_adresse", +DROP COLUMN IF EXISTS "coordonnee_lambert_x", +DROP COLUMN IF EXISTS "coordonnee_lambert_y"; + +ALTER TABLE "public"."etablissement_staging" +DROP COLUMN IF EXISTS "dernier_numero_voie", +DROP COLUMN IF EXISTS "indice_repetition_dernier_numero_voie", +DROP COLUMN IF EXISTS "identifiant_adresse", +DROP COLUMN IF EXISTS "coordonnee_lambert_x", +DROP COLUMN IF EXISTS "coordonnee_lambert_y"; diff --git a/migrations/2024-02-27-161209_siren_3_11/up.sql b/migrations/2024-02-27-161209_siren_3_11/up.sql new file mode 100644 index 0000000..c601785 --- /dev/null +++ b/migrations/2024-02-27-161209_siren_3_11/up.sql @@ -0,0 +1,13 @@ +ALTER TABLE "public"."etablissement" +ADD COLUMN "dernier_numero_voie" text DEFAULT NULL, +ADD COLUMN "indice_repetition_dernier_numero_voie" text DEFAULT NULL, +ADD COLUMN "identifiant_adresse" text DEFAULT NULL, +ADD COLUMN "coordonnee_lambert_x" text DEFAULT NULL, +ADD COLUMN "coordonnee_lambert_y" text DEFAULT NULL; + +ALTER TABLE "public"."etablissement_staging" +ADD COLUMN "dernier_numero_voie" text DEFAULT NULL, +ADD COLUMN "indice_repetition_dernier_numero_voie" text DEFAULT NULL, +ADD COLUMN "identifiant_adresse" text DEFAULT NULL, +ADD COLUMN "coordonnee_lambert_x" text DEFAULT NULL, +ADD COLUMN "coordonnee_lambert_y" text DEFAULT NULL; diff --git a/src/connectors/insee/implementation.rs b/src/connectors/insee/implementation.rs index c34f258..7303e65 100644 --- a/src/connectors/insee/implementation.rs +++ b/src/connectors/insee/implementation.rs @@ -10,7 +10,7 @@ use chrono::NaiveDateTime; const MAX_CALL: u8 = 20; const MAX_DURATION: std::time::Duration = std::time::Duration::from_secs(60); -const BASE_URL: &str = "https://api.insee.fr/entreprises/sirene/V3"; +const BASE_URL: &str = "https://api.insee.fr/entreprises/sirene/V3.11"; pub const INITIAL_CURSOR: &str = "*"; struct EndpointConfig { diff --git a/src/connectors/insee/types/etablissement.rs b/src/connectors/insee/types/etablissement.rs index b16e1c2..5951f1d 100644 --- a/src/connectors/insee/types/etablissement.rs +++ b/src/connectors/insee/types/etablissement.rs @@ -32,7 +32,6 @@ pub struct InseeEtablissementInner { pub etablissement_siege: bool, pub nombre_periodes_etablissement: Option, pub adresse_etablissement: InseeAdresseEtablissement, - pub adresse2_etablissement: InseeAdresse2Etablissement, } #[derive(Deserialize, Debug)] @@ -55,6 +54,8 @@ pub struct InseeAdresseEtablissement { pub complement_adresse_etablissement: Option, pub numero_voie_etablissement: Option, pub indice_repetition_etablissement: Option, + pub dernier_numero_voie_etablissement: Option, + pub indice_repetition_dernier_numero_voie_etablissement: Option, pub type_voie_etablissement: Option, pub libelle_voie_etablissement: Option, pub code_postal_etablissement: Option, @@ -66,25 +67,9 @@ pub struct InseeAdresseEtablissement { pub libelle_cedex_etablissement: Option, pub code_pays_etranger_etablissement: Option, pub libelle_pays_etranger_etablissement: Option, -} - -#[derive(Deserialize, Debug, Clone)] -#[serde(rename_all = "camelCase")] -pub struct InseeAdresse2Etablissement { - complement_adresse2_etablissement: Option, - numero_voie2_etablissement: Option, - indice_repetition2_etablissement: Option, - type_voie2_etablissement: Option, - libelle_voie2_etablissement: Option, - code_postal2_etablissement: Option, - libelle_commune2_etablissement: Option, - libelle_commune_etranger2_etablissement: Option, - distribution_speciale2_etablissement: Option, - code_commune2_etablissement: Option, - code_cedex2_etablissement: Option, - libelle_cedex2_etablissement: Option, - code_pays_etranger2_etablissement: Option, - libelle_pays_etranger2_etablissement: Option, + pub identifiant_adresse_etablissement: Option, + pub coordonnee_lambert_abscisse_etablissement: Option, + pub coordonnee_lambert_ordonnee_etablissement: Option, } #[derive(Deserialize, Debug, Clone)] @@ -121,7 +106,6 @@ impl From<&InseeEtablissement> for Option { impl From for Etablissement { fn from(e: InseeEtablissementWithPeriode) -> Self { let adresse = e.content.adresse_etablissement; - let adresse2 = e.content.adresse2_etablissement; Etablissement { siret: e.content.siret, @@ -151,20 +135,26 @@ impl From for Etablissement { libelle_cedex: adresse.libelle_cedex_etablissement, code_pays_etranger: adresse.code_pays_etranger_etablissement, libelle_pays_etranger: adresse.libelle_pays_etranger_etablissement, - complement_adresse2: adresse2.complement_adresse2_etablissement, - numero_voie_2: adresse2.numero_voie2_etablissement, - indice_repetition_2: adresse2.indice_repetition2_etablissement, - type_voie_2: adresse2.type_voie2_etablissement, - libelle_voie_2: adresse2.libelle_voie2_etablissement, - code_postal_2: adresse2.code_postal2_etablissement, - libelle_commune_2: adresse2.libelle_commune2_etablissement, - libelle_commune_etranger_2: adresse2.libelle_commune_etranger2_etablissement, - distribution_speciale_2: adresse2.distribution_speciale2_etablissement, - code_commune_2: adresse2.code_commune2_etablissement, - code_cedex_2: adresse2.code_cedex2_etablissement, - libelle_cedex_2: adresse2.libelle_cedex2_etablissement, - code_pays_etranger_2: adresse2.code_pays_etranger2_etablissement, - libelle_pays_etranger_2: adresse2.libelle_pays_etranger2_etablissement, + dernier_numero_voie: adresse.dernier_numero_voie_etablissement, + identifiant_adresse: adresse.identifiant_adresse_etablissement, + indice_repetition_dernier_numero_voie: adresse + .indice_repetition_dernier_numero_voie_etablissement, + coordonnee_lambert_x: adresse.coordonnee_lambert_abscisse_etablissement, + coordonnee_lambert_y: adresse.coordonnee_lambert_ordonnee_etablissement, + complement_adresse2: None, + numero_voie_2: None, + indice_repetition_2: None, + type_voie_2: None, + libelle_voie_2: None, + code_postal_2: None, + libelle_commune_2: None, + libelle_commune_etranger_2: None, + distribution_speciale_2: None, + code_commune_2: None, + code_cedex_2: None, + libelle_cedex_2: None, + code_pays_etranger_2: None, + libelle_pays_etranger_2: None, date_debut: e.periode.date_debut, etat_administratif: e.periode.etat_administratif_etablissement, enseigne_1: e.periode.enseigne1_etablissement, diff --git a/src/connectors/insee/types/unite_legale.rs b/src/connectors/insee/types/unite_legale.rs index b3c4d98..eb2a474 100644 --- a/src/connectors/insee/types/unite_legale.rs +++ b/src/connectors/insee/types/unite_legale.rs @@ -82,7 +82,6 @@ pub struct PeriodeInseeUniteLegale { pub nomenclature_activite_principale_unite_legale: Option, pub nic_siege_unite_legale: Option, pub economie_sociale_solidaire_unite_legale: Option, - pub caractere_employeur_unite_legale: Option, pub societe_mission_unite_legale: Option, } @@ -138,8 +137,8 @@ impl From for UniteLegale { .nomenclature_activite_principale_unite_legale, nic_siege: u.periode.nic_siege_unite_legale, economie_sociale_solidaire: u.periode.economie_sociale_solidaire_unite_legale, - caractere_employeur: u.periode.caractere_employeur_unite_legale, societe_mission: u.periode.societe_mission_unite_legale, + caractere_employeur: None, } } } diff --git a/src/models/etablissement/columns.rs b/src/models/etablissement/columns.rs index d2bae7f..681cbf3 100644 --- a/src/models/etablissement/columns.rs +++ b/src/models/etablissement/columns.rs @@ -13,6 +13,8 @@ nombre_periodes, complement_adresse, numero_voie, indice_repetition, +dernier_numero_voie, +indice_repetition_dernier_numero_voie, type_voie, libelle_voie, code_postal, @@ -24,6 +26,9 @@ code_cedex, libelle_cedex, code_pays_etranger, libelle_pays_etranger, +identifiant_adresse, +coordonnee_lambert_x, +coordonnee_lambert_y, complement_adresse2, numero_voie_2, indice_repetition_2, diff --git a/src/models/etablissement/common.rs b/src/models/etablissement/common.rs index 9a2732e..41bdb67 100644 --- a/src/models/etablissement/common.rs +++ b/src/models/etablissement/common.rs @@ -1,6 +1,6 @@ -use diesel::prelude::*; use super::super::schema::etablissement; use chrono::{NaiveDate, NaiveDateTime}; +use diesel::prelude::*; use serde::Serialize; #[derive(Insertable, Queryable, Serialize, Clone, Debug)] @@ -54,4 +54,9 @@ pub struct Etablissement { pub activite_principale: Option, pub nomenclature_activite_principale: Option, pub caractere_employeur: Option, + pub dernier_numero_voie: Option, + pub indice_repetition_dernier_numero_voie: Option, + pub identifiant_adresse: Option, + pub coordonnee_lambert_x: Option, + pub coordonnee_lambert_y: Option, } diff --git a/src/models/etablissement/mod.rs b/src/models/etablissement/mod.rs index cce5f39..6063362 100644 --- a/src/models/etablissement/mod.rs +++ b/src/models/etablissement/mod.rs @@ -20,14 +20,20 @@ pub fn get(connection: &mut Connection, siret: &str) -> Result Result, Error> { +pub fn get_with_siren( + connection: &mut Connection, + siren: &str, +) -> Result, Error> { dsl::etablissement .filter(dsl::siren.eq(siren)) .load::(connection) .map_err(|error| error.into()) } -pub fn get_siege_with_siren(connection: &mut Connection, siren: &str) -> Result { +pub fn get_siege_with_siren( + connection: &mut Connection, + siren: &str, +) -> Result { dsl::etablissement .filter(dsl::siren.eq(siren).and(dsl::etablissement_siege.eq(true))) .first::(connection) @@ -77,10 +83,8 @@ impl UpdatableModel for EtablissementModel { fn swap(&self, connectors: &Connectors) -> Result<(), UpdatableError> { let mut connection = connectors.local.pool.get()?; connection.build_transaction().read_write().run(|conn| { - sql_query("ALTER TABLE etablissement RENAME TO etablissement_temp") - .execute(conn)?; - sql_query("ALTER TABLE etablissement_staging RENAME TO etablissement") - .execute(conn)?; + sql_query("ALTER TABLE etablissement RENAME TO etablissement_temp").execute(conn)?; + sql_query("ALTER TABLE etablissement_staging RENAME TO etablissement").execute(conn)?; sql_query("ALTER TABLE etablissement_temp RENAME TO etablissement_staging") .execute(conn)?; sql_query("TRUNCATE etablissement_staging").execute(conn)?; @@ -203,6 +207,12 @@ impl UpdatableModel for EtablissementModel { dsl::nomenclature_activite_principale .eq(excluded(dsl::nomenclature_activite_principale)), dsl::caractere_employeur.eq(excluded(dsl::caractere_employeur)), + dsl::dernier_numero_voie.eq(excluded(dsl::dernier_numero_voie)), + dsl::indice_repetition_dernier_numero_voie + .eq(excluded(dsl::indice_repetition_dernier_numero_voie)), + dsl::identifiant_adresse.eq(excluded(dsl::identifiant_adresse)), + dsl::coordonnee_lambert_x.eq(excluded(dsl::coordonnee_lambert_x)), + dsl::coordonnee_lambert_y.eq(excluded(dsl::coordonnee_lambert_y)), )) .execute(&mut connection)?; diff --git a/src/models/schema.rs b/src/models/schema.rs index af7fce3..3e63529 100644 --- a/src/models/schema.rs +++ b/src/models/schema.rs @@ -55,6 +55,11 @@ diesel::table! { activite_principale -> Nullable, nomenclature_activite_principale -> Nullable, caractere_employeur -> Nullable, + dernier_numero_voie -> Nullable, + indice_repetition_dernier_numero_voie -> Nullable, + identifiant_adresse -> Nullable, + coordonnee_lambert_x -> Nullable, + coordonnee_lambert_y -> Nullable, } } @@ -113,6 +118,11 @@ diesel::table! { activite_principale -> Nullable, nomenclature_activite_principale -> Nullable, caractere_employeur -> Nullable, + dernier_numero_voie -> Nullable, + indice_repetition_dernier_numero_voie -> Nullable, + identifiant_adresse -> Nullable, + coordonnee_lambert_x -> Nullable, + coordonnee_lambert_y -> Nullable, } } @@ -173,9 +183,9 @@ diesel::table! { nic_siege -> Nullable, economie_sociale_solidaire -> Nullable, #[max_length = 1] - caractere_employeur -> Nullable, - #[max_length = 1] societe_mission -> Nullable, + #[max_length = 1] + caractere_employeur -> Nullable, } } @@ -219,9 +229,9 @@ diesel::table! { nic_siege -> Nullable, economie_sociale_solidaire -> Nullable, #[max_length = 1] - caractere_employeur -> Nullable, - #[max_length = 1] societe_mission -> Nullable, + #[max_length = 1] + caractere_employeur -> Nullable, } } diff --git a/src/models/unite_legale/columns.rs b/src/models/unite_legale/columns.rs index 772fc55..fe7c23c 100644 --- a/src/models/unite_legale/columns.rs +++ b/src/models/unite_legale/columns.rs @@ -31,6 +31,6 @@ activite_principale, nomenclature_activite_principale, nic_siege, economie_sociale_solidaire, -caractere_employeur, -societe_mission +societe_mission, +caractere_employeur "#; diff --git a/src/models/unite_legale/common.rs b/src/models/unite_legale/common.rs index 23b28d0..8376080 100644 --- a/src/models/unite_legale/common.rs +++ b/src/models/unite_legale/common.rs @@ -1,6 +1,6 @@ -use diesel::prelude::*; use super::super::schema::unite_legale; use chrono::{NaiveDate, NaiveDateTime}; +use diesel::prelude::*; use serde::Serialize; #[derive(Insertable, Queryable, Serialize, Clone, Debug)] @@ -38,6 +38,6 @@ pub struct UniteLegale { pub nomenclature_activite_principale: Option, pub nic_siege: Option, pub economie_sociale_solidaire: Option, - pub caractere_employeur: Option, pub societe_mission: Option, + pub caractere_employeur: Option, } diff --git a/src/models/unite_legale/mod.rs b/src/models/unite_legale/mod.rs index 3e8fffe..93c1b05 100644 --- a/src/models/unite_legale/mod.rs +++ b/src/models/unite_legale/mod.rs @@ -63,10 +63,8 @@ impl UpdatableModel for UniteLegaleModel { fn swap(&self, connectors: &Connectors) -> Result<(), UpdatableError> { let mut connection = connectors.local.pool.get()?; connection.build_transaction().read_write().run(|conn| { - sql_query("ALTER TABLE unite_legale RENAME TO unite_legale_temp") - .execute(conn)?; - sql_query("ALTER TABLE unite_legale_staging RENAME TO unite_legale") - .execute(conn)?; + sql_query("ALTER TABLE unite_legale RENAME TO unite_legale_temp").execute(conn)?; + sql_query("ALTER TABLE unite_legale_staging RENAME TO unite_legale").execute(conn)?; sql_query("ALTER TABLE unite_legale_temp RENAME TO unite_legale_staging") .execute(conn)?; sql_query("TRUNCATE unite_legale_staging").execute(conn)?; @@ -172,8 +170,8 @@ impl UpdatableModel for UniteLegaleModel { .eq(excluded(dsl::nomenclature_activite_principale)), dsl::nic_siege.eq(excluded(dsl::nic_siege)), dsl::economie_sociale_solidaire.eq(excluded(dsl::economie_sociale_solidaire)), - dsl::caractere_employeur.eq(excluded(dsl::caractere_employeur)), dsl::societe_mission.eq(excluded(dsl::societe_mission)), + dsl::caractere_employeur.eq(excluded(dsl::caractere_employeur)), )) .execute(&mut connection)?; diff --git a/src/update/action/sync_insee.rs b/src/update/action/sync_insee.rs index d5eb610..54ed947 100644 --- a/src/update/action/sync_insee.rs +++ b/src/update/action/sync_insee.rs @@ -37,6 +37,8 @@ impl Action for SyncInseeAction { planned_count, )?; + debug!("Syncing {} {:#?}...", planned_count, group_type); + while let Some(cursor) = current_cursor { let (next_cursor, inserted_count) = model .update_daily_data(connectors, timestamp, cursor)