From 007f103ef45e081cc9e95295869da2391446c83f Mon Sep 17 00:00:00 2001 From: Jennifer Melot Date: Fri, 10 May 2024 15:34:19 -0400 Subject: [PATCH] Update organizations query to use new airtable data Temporarily points to the parat_input_test dataset --- company_linkage/sql/organizations.sql | 98 ++++++++++++--------------- 1 file changed, 45 insertions(+), 53 deletions(-) diff --git a/company_linkage/sql/organizations.sql b/company_linkage/sql/organizations.sql index 5f1434d6..97f9eeb1 100644 --- a/company_linkage/sql/organizations.sql +++ b/company_linkage/sql/organizations.sql @@ -1,11 +1,27 @@ +WITH sp_500 AS ( + SELECT DISTINCT + CSET_id + FROM + parat_input_test.groups + where + name = "S&P 500" +), +global_500 AS ( + SELECT DISTINCT + CSET_id + FROM + parat_input_test.groups + where + name = "Global 500" +) SELECT * REPLACE( ( SELECT - ARRAY_AGG(STRUCT(alias_language, + ARRAY_AGG(STRUCT(language, alias)) FROM ( SELECT - DISTINCT alias_language, + DISTINCT language, alias FROM UNNEST(aliases) a )) AS aliases, ( @@ -32,92 +48,68 @@ SELECT FROM ( SELECT CSET_id, - organizations_joined.name, + organizations.name, STRUCT(city, - province_state, - organizations_joined.country) AS location, + state_or_province, + organizations.country) AS location, website, - ARRAY_AGG(STRUCT(alias_language, + ARRAY_AGG(STRUCT(aliases.language, alias)) AS aliases, ARRAY_AGG(STRUCT(CASE - WHEN parent_acquisition IS TRUE THEN TRUE + WHEN parentage.is_acquisition_parent IS TRUE THEN TRUE ELSE FALSE END AS parent_acquisition, parent_name, parent_id)) AS parent, - ARRAY_AGG(DISTINCT permid IGNORE NULLS) AS permid, - ARRAY_AGG(STRUCT(exchange, + ARRAY_AGG(DISTINCT IF(source = "PermID", external_id, null) IGNORE NULLS) AS permid, + ARRAY_AGG(STRUCT(market as exchange, ticker)) AS market, - STRUCT(crunchbase_uuid, - crunchbase_url) AS crunchbase, + ARRAY_AGG(DISTINCT IF(source = "Crunchbase UUID", external_id, null) IGNORE NULLS) AS crunchbase_uuid, + ARRAY_AGG(DISTINCT IF(source = "Crunchbase URL", external_id, null) IGNORE NULLS) AS crunchbase_url, ARRAY_AGG(DISTINCT ror.id IGNORE NULLS) AS ror_id, - regex, - ARRAY_AGG(DISTINCT bgov_id IGNORE NULLS) AS BGOV_id, - linkedin, - CASE - WHEN in_sandp_500 IS TRUE THEN TRUE - ELSE - FALSE - END - AS in_sandp_500, - CASE - WHEN in_fortune_global_500 IS TRUE THEN TRUE - ELSE - FALSE - END - AS in_fortune_global_500, - ids_joined.comment + ARRAY_AGG(DISTINCT IF(source = "Regex", external_id, null) IGNORE NULLS) AS regex, + ARRAY_AGG(DISTINCT IF(source = "BGOV", external_id, null) IGNORE NULLS) AS BGOV_id, + ARRAY_AGG(DISTINCT IF(source = "LinkedIn", external_id, null) IGNORE NULLS) AS linkedin, + sp_500.CSET_id IS NOT NULL AS in_sandp_500, + global_500.CSET_id IS NOT NULL AS in_fortune_global_500 FROM - parat_input.organizations_joined - LEFT JOIN - parat_input.alias_joined - USING - (CSET_id) + parat_input_test.organizations LEFT JOIN - parat_input.parent_joined + parat_input_test.aliases USING (CSET_id) LEFT JOIN - parat_input.permid_joined + parat_input_test.parentage USING (CSET_id) LEFT JOIN - parat_input.market_joined + parat_input_test.ids USING (CSET_id) LEFT JOIN - parat_input.ids_joined + parat_input_test.tickers USING (CSET_id) LEFT JOIN - parat_input.grid_joined - USING - (CSET_id) + gcp_cset_ror.ror + ON + TRIM(ids.external_id) = external_ids.GRID.all and ids.source = "GRID" LEFT JOIN - parat_input.bgov_validate + sp_500 USING (CSET_id) LEFT JOIN - parat_input.linkedin_joined + global_500 USING (CSET_id) - LEFT JOIN - gcp_cset_ror.ror - ON - TRIM(grid_joined.grid) = external_ids.GRID.all GROUP BY CSET_id, name, city, - province_state, - organizations_joined.country, + state_or_province, + organizations.country, website, - crunchbase_uuid, - crunchbase_url, - regex, - linkedin, in_sandp_500, - in_fortune_global_500, - comment) \ No newline at end of file + in_fortune_global_500) \ No newline at end of file