Skip to content

Commit

Permalink
Add genAI group
Browse files Browse the repository at this point in the history
Closes #410
  • Loading branch information
jmelot committed Jun 18, 2024
1 parent 7c92bc4 commit 3daa4b5
Show file tree
Hide file tree
Showing 14 changed files with 54 additions and 15 deletions.
15 changes: 9 additions & 6 deletions company_linkage/parat_scripts/aggregate_organizations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ def __init__(self, cset_id, name):
self.in_fortune_global_500 = False
self.in_sandp_500 = False
self.in_global_big_tech = False
self.in_gen_ai = False

def add_location(self, city, province_state, country):
"""
Expand Down Expand Up @@ -237,15 +238,16 @@ def add_linkedin(self, linkedin):
self.linkedin.append(linkedin)

def add_sandp(self, in_sandp_500):
if in_sandp_500:
self.in_sandp_500 = True
self.in_sandp_500 = bool(in_sandp_500)

def add_fortune(self, in_fortune_global_500):
if in_fortune_global_500:
self.in_fortune_global_500 = True
self.in_fortune_global_500 = bool(in_fortune_global_500)

def add_in_global_big_tech(self, in_global_big_tech):
self.in_global_big_tech = in_global_big_tech
self.in_global_big_tech = bool(in_global_big_tech)

def add_in_gen_ai(self, in_gen_ai):
self.in_gen_ai = bool(in_gen_ai)


class OrganizationAggregator:
Expand Down Expand Up @@ -418,6 +420,7 @@ def update_organization_data(self, org, org_id):
org_info.add_sandp(org["in_sandp_500"])
org_info.add_fortune(org["in_fortune_global_500"])
org_info.add_in_global_big_tech(org["in_global_big_tech"])
org_info.add_in_gen_ai(org["in_gen_ai"])

def print_output(self, output_file, local):
"""
Expand All @@ -440,7 +443,7 @@ def print_output(self, output_file, local):
"ror_id": org_info.ror, "regex": org_info.regex,
"BGOV_id": org_info.bgov_id, "linkedin": org_info.linkedin,
"in_sandp_500": org_info.in_sandp_500, "in_fortune_global_500": org_info.in_fortune_global_500,
"in_global_big_tech": org_info.in_global_big_tech,
"in_global_big_tech": org_info.in_global_big_tech, "in_gen_ai": org_info.in_gen_ai,
"children": org_info.children,
"non_agg_children": org_info.non_agg_children}
out.write(json.dumps(js, ensure_ascii=False) + "\n")
Expand Down
6 changes: 6 additions & 0 deletions company_linkage/schemas/aggregated_organizations.json
Original file line number Diff line number Diff line change
Expand Up @@ -227,6 +227,12 @@
"type": "BOOLEAN",
"description": "Boolean identifying whether the company is a big tech company."
},
{
"mode": "REQUIRED",
"name": "in_gen_ai",
"type": "BOOLEAN",
"description": "Boolean identifying whether the company is active in GenAI and LLMs."
},
{
"fields": [
{
Expand Down
1 change: 1 addition & 0 deletions company_linkage/sql/initial_visualization_data.sql
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ SELECT
in_sandp_500,
in_fortune_global_500,
in_global_big_tech,
in_gen_ai,
COALESCE(ai_pubs, 0) as ai_pubs,
COALESCE(cv_pubs, 0) as cv_pubs,
COALESCE(nlp_pubs, 0) as nlp_pubs,
Expand Down
16 changes: 15 additions & 1 deletion company_linkage/sql/organizations.sql
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,14 @@ global_big_tech AS (
where
name = "Global Big Tech"
),
gen_ai AS (
SELECT DISTINCT
new_cset_id
FROM
parat_input.groups
where
name = "GenAI Contenders"
),
mapped_parents AS (
SELECT
parentage.new_cset_id,
Expand Down Expand Up @@ -98,7 +106,8 @@ FROM (
ARRAY_AGG(DISTINCT IF(source = "LinkedIn", external_id, null) IGNORE NULLS) AS linkedin,
sp_500.new_cset_id IS NOT NULL AS in_sandp_500,
global_500.new_cset_id IS NOT NULL AS in_fortune_global_500,
global_big_tech.new_cset_id IS NOT NULL AS in_global_big_tech
global_big_tech.new_cset_id IS NOT NULL AS in_global_big_tech,
gen_ai.new_cset_id IS NOT NULL AS in_gen_ai
FROM
parat_input.organizations
LEFT JOIN
Expand Down Expand Up @@ -137,6 +146,10 @@ FROM (
global_big_tech
USING
(new_cset_id)
LEFT JOIN
gen_ai
USING
(new_cset_id)
GROUP BY
new_cset_id,
name,
Expand All @@ -151,4 +164,5 @@ FROM (
in_sandp_500,
in_fortune_global_500,
in_global_big_tech,
in_gen_ai,
new_cset_id)
3 changes: 2 additions & 1 deletion company_linkage/sql/visualization_data_omit_by_rule.sql
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,8 @@ WITH
AND ARRAY_LENGTH(market) = 0
and in_fortune_global_500 IS FALSE
and in_sandp_500 IS FALSE
and in_global_big_tech IS FALSE)
and in_global_big_tech IS FALSE
and in_gen_ai IS FALSE)
SELECT
*
FROM
Expand Down
3 changes: 3 additions & 0 deletions web/gui-v2/src/components/DetailViewMoreMetadataDialog.jsx
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,9 @@ const MoreMetadataDialog = ({
if ( data.groups.globalBigTech ) {
groups.push("Global Big Tech");
}
if ( data.groups.genAI ) {
groups.push("GenAI Contenders");
}
if ( groups.length > 0 ) {
metadata.push({
title: "Groups",
Expand Down
2 changes: 1 addition & 1 deletion web/gui-v2/src/data/companies.json

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion web/gui-v2/src/static_data/data.js

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion web/gui-v2/src/static_data/overall_data.json

Large diffs are not rendered by default.

11 changes: 9 additions & 2 deletions web/scripts/retrieve_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -312,7 +312,8 @@ def add_ranks(rows: list) -> None:
all_metrics = set()
row_and_key_groups = [(rows, "rank"),
([r for r in rows if r.get("groups", {}).get("sp500")], "sp500_rank"),
([r for r in rows if r.get("groups", {}).get("globalBigTech")], "globalBigTech_rank")]
([r for r in rows if r.get("groups", {}).get("globalBigTech")], "globalBigTech_rank"),
([r for r in rows if r.get("groups", {}).get("genAI")], "genAI_rank")]
for filtered_rows, rank_key in row_and_key_groups:
for row in filtered_rows:
for metric in row.get(metric_list_name, {}):
Expand Down Expand Up @@ -581,7 +582,8 @@ def clean_misc_fields(js: dict, refresh_images: bool, lowercase_to_orig_cname: d
js["child_crunchbase"] = clean_crunchbase(js["child_crunchbase"])
group_keys_to_names = {
"sp500": "in_sandp_500",
"globalBigTech": "in_global_big_tech"
"globalBigTech": "in_global_big_tech",
"genAI": "in_gen_ai"
}
js["groups"] = {k: js.pop(v, False) for k, v in group_keys_to_names.items()}

Expand Down Expand Up @@ -876,6 +878,11 @@ def clean(refresh_images: bool, refresh_sectors: bool) -> dict:
"cset_id": GROUP_OFFSET+502,
"rows": []
},
"genAI": {
"name": "GenAI Contenders",
"cset_id": GROUP_OFFSET + 503,
"rows": []
},
}
for row in rows:
company_rows.append(row)
Expand Down
1 change: 1 addition & 0 deletions web/tests/test_data/alphabet_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,7 @@
],
"in_sandp_500": true,
"in_global_big_tech": true,
"in_gen_ai": true,
"ai_pubs": 136,
"cv_pubs": 68,
"nlp_pubs": 1,
Expand Down
3 changes: 2 additions & 1 deletion web/tests/test_data/alphabet_output.json
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@
"crunchbase_description": "Alphabet is a holding company that provides projects with resources, freedom, and focus to make their ideas happen.",
"groups": {
"sp500": true,
"globalBigTech": true
"globalBigTech": true,
"genAI": true
},
"fields": [
{
Expand Down
1 change: 1 addition & 0 deletions web/tests/test_data/hugging_face_input.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@
],
"in_sandp_500": false,
"in_global_big_tech": false,
"in_gen_ai": false,
"ai_pubs": 18,
"cv_pubs": 1,
"nlp_pubs": 15,
Expand Down
3 changes: 2 additions & 1 deletion web/tests/test_data/hugging_face_output.json
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@
"crunchbase_description": "Hugging Face allows users to build, train, and deploy art models using the reference open source in machine learning.",
"groups": {
"sp500": false,
"globalBigTech": false
"globalBigTech": false,
"genAI": false
},
"fields": [
{
Expand Down

0 comments on commit 3daa4b5

Please sign in to comment.