Skip to content

Commit

Permalink
Update organizations query to use new airtable data
Browse files Browse the repository at this point in the history
Temporarily points to the parat_input_test dataset
  • Loading branch information
jmelot committed May 15, 2024
1 parent 00ab1aa commit 007f103
Showing 1 changed file with 45 additions and 53 deletions.
98 changes: 45 additions & 53 deletions company_linkage/sql/organizations.sql
Original file line number Diff line number Diff line change
@@ -1,11 +1,27 @@
WITH sp_500 AS (
SELECT DISTINCT
CSET_id
FROM
parat_input_test.groups
where
name = "S&P 500"
),
global_500 AS (
SELECT DISTINCT
CSET_id
FROM
parat_input_test.groups
where
name = "Global 500"
)
SELECT
* REPLACE( (
SELECT
ARRAY_AGG(STRUCT(alias_language,
ARRAY_AGG(STRUCT(language,
alias))
FROM (
SELECT
DISTINCT alias_language,
DISTINCT language,
alias
FROM
UNNEST(aliases) a )) AS aliases, (
Expand All @@ -32,92 +48,68 @@ SELECT
FROM (
SELECT
CSET_id,
organizations_joined.name,
organizations.name,
STRUCT(city,
province_state,
organizations_joined.country) AS location,
state_or_province,
organizations.country) AS location,
website,
ARRAY_AGG(STRUCT(alias_language,
ARRAY_AGG(STRUCT(aliases.language,
alias)) AS aliases,
ARRAY_AGG(STRUCT(CASE
WHEN parent_acquisition IS TRUE THEN TRUE
WHEN parentage.is_acquisition_parent IS TRUE THEN TRUE
ELSE
FALSE
END
AS parent_acquisition,
parent_name,
parent_id)) AS parent,
ARRAY_AGG(DISTINCT permid IGNORE NULLS) AS permid,
ARRAY_AGG(STRUCT(exchange,
ARRAY_AGG(DISTINCT IF(source = "PermID", external_id, null) IGNORE NULLS) AS permid,
ARRAY_AGG(STRUCT(market as exchange,
ticker)) AS market,
STRUCT(crunchbase_uuid,
crunchbase_url) AS crunchbase,
ARRAY_AGG(DISTINCT IF(source = "Crunchbase UUID", external_id, null) IGNORE NULLS) AS crunchbase_uuid,
ARRAY_AGG(DISTINCT IF(source = "Crunchbase URL", external_id, null) IGNORE NULLS) AS crunchbase_url,
ARRAY_AGG(DISTINCT ror.id IGNORE NULLS) AS ror_id,
regex,
ARRAY_AGG(DISTINCT bgov_id IGNORE NULLS) AS BGOV_id,
linkedin,
CASE
WHEN in_sandp_500 IS TRUE THEN TRUE
ELSE
FALSE
END
AS in_sandp_500,
CASE
WHEN in_fortune_global_500 IS TRUE THEN TRUE
ELSE
FALSE
END
AS in_fortune_global_500,
ids_joined.comment
ARRAY_AGG(DISTINCT IF(source = "Regex", external_id, null) IGNORE NULLS) AS regex,
ARRAY_AGG(DISTINCT IF(source = "BGOV", external_id, null) IGNORE NULLS) AS BGOV_id,
ARRAY_AGG(DISTINCT IF(source = "LinkedIn", external_id, null) IGNORE NULLS) AS linkedin,
sp_500.CSET_id IS NOT NULL AS in_sandp_500,
global_500.CSET_id IS NOT NULL AS in_fortune_global_500
FROM
parat_input.organizations_joined
LEFT JOIN
parat_input.alias_joined
USING
(CSET_id)
parat_input_test.organizations
LEFT JOIN
parat_input.parent_joined
parat_input_test.aliases
USING
(CSET_id)
LEFT JOIN
parat_input.permid_joined
parat_input_test.parentage
USING
(CSET_id)
LEFT JOIN
parat_input.market_joined
parat_input_test.ids
USING
(CSET_id)
LEFT JOIN
parat_input.ids_joined
parat_input_test.tickers
USING
(CSET_id)
LEFT JOIN
parat_input.grid_joined
USING
(CSET_id)
gcp_cset_ror.ror
ON
TRIM(ids.external_id) = external_ids.GRID.all and ids.source = "GRID"
LEFT JOIN
parat_input.bgov_validate
sp_500
USING
(CSET_id)
LEFT JOIN
parat_input.linkedin_joined
global_500
USING
(CSET_id)
LEFT JOIN
gcp_cset_ror.ror
ON
TRIM(grid_joined.grid) = external_ids.GRID.all
GROUP BY
CSET_id,
name,
city,
province_state,
organizations_joined.country,
state_or_province,
organizations.country,
website,
crunchbase_uuid,
crunchbase_url,
regex,
linkedin,
in_sandp_500,
in_fortune_global_500,
comment)
in_fortune_global_500)

0 comments on commit 007f103

Please sign in to comment.