Add genAI group

Closes #410
georgetown-cset · Jun 18, 2024 · 3daa4b5 · 3daa4b5
1 parent 7c92bc4
commit 3daa4b5
Show file tree

Hide file tree

Showing 14 changed files with 54 additions and 15 deletions.
diff --git a/company_linkage/parat_scripts/aggregate_organizations.py b/company_linkage/parat_scripts/aggregate_organizations.py
@@ -36,6 +36,7 @@ def __init__(self, cset_id, name):
         self.in_fortune_global_500 = False
         self.in_sandp_500 = False
         self.in_global_big_tech = False
+        self.in_gen_ai = False
 
     def add_location(self, city, province_state, country):
         """
@@ -237,15 +238,16 @@ def add_linkedin(self, linkedin):
             self.linkedin.append(linkedin)
 
     def add_sandp(self, in_sandp_500):
-        if in_sandp_500:
-            self.in_sandp_500 = True
+        self.in_sandp_500 = bool(in_sandp_500)
 
     def add_fortune(self, in_fortune_global_500):
-        if in_fortune_global_500:
-            self.in_fortune_global_500 = True
+        self.in_fortune_global_500 = bool(in_fortune_global_500)
 
     def add_in_global_big_tech(self, in_global_big_tech):
-        self.in_global_big_tech = in_global_big_tech
+        self.in_global_big_tech = bool(in_global_big_tech)
+
+    def add_in_gen_ai(self, in_gen_ai):
+        self.in_gen_ai = bool(in_gen_ai)
 
 
 class OrganizationAggregator:
@@ -418,6 +420,7 @@ def update_organization_data(self, org, org_id):
         org_info.add_sandp(org["in_sandp_500"])
         org_info.add_fortune(org["in_fortune_global_500"])
         org_info.add_in_global_big_tech(org["in_global_big_tech"])
+        org_info.add_in_gen_ai(org["in_gen_ai"])
 
     def print_output(self, output_file, local):
         """
@@ -440,7 +443,7 @@ def print_output(self, output_file, local):
                   "ror_id": org_info.ror, "regex": org_info.regex,
                   "BGOV_id": org_info.bgov_id, "linkedin": org_info.linkedin,
                   "in_sandp_500": org_info.in_sandp_500, "in_fortune_global_500": org_info.in_fortune_global_500,
-                  "in_global_big_tech": org_info.in_global_big_tech,
+                  "in_global_big_tech": org_info.in_global_big_tech, "in_gen_ai": org_info.in_gen_ai,
                   "children": org_info.children,
                   "non_agg_children": org_info.non_agg_children}
             out.write(json.dumps(js, ensure_ascii=False) + "\n")

diff --git a/company_linkage/schemas/aggregated_organizations.json b/company_linkage/schemas/aggregated_organizations.json
@@ -227,6 +227,12 @@
     "type": "BOOLEAN",
     "description": "Boolean identifying whether the company is a big tech company."
   },
+  {
+    "mode": "REQUIRED",
+    "name": "in_gen_ai",
+    "type": "BOOLEAN",
+    "description": "Boolean identifying whether the company is active in GenAI and LLMs."
+  },
   {
     "fields": [
       {

diff --git a/company_linkage/sql/initial_visualization_data.sql b/company_linkage/sql/initial_visualization_data.sql
@@ -47,6 +47,7 @@ SELECT
   in_sandp_500,
   in_fortune_global_500,
   in_global_big_tech,
+  in_gen_ai,
   COALESCE(ai_pubs, 0) as ai_pubs,
   COALESCE(cv_pubs, 0) as cv_pubs,
   COALESCE(nlp_pubs, 0) as nlp_pubs,

diff --git a/company_linkage/sql/organizations.sql b/company_linkage/sql/organizations.sql
@@ -22,6 +22,14 @@ global_big_tech AS (
   where
     name = "Global Big Tech"
 ),
+gen_ai AS (
+  SELECT DISTINCT
+    new_cset_id
+  FROM
+    parat_input.groups
+  where
+    name = "GenAI Contenders"
+),
 mapped_parents AS (
   SELECT
     parentage.new_cset_id,
@@ -98,7 +106,8 @@ FROM (
     ARRAY_AGG(DISTINCT IF(source = "LinkedIn", external_id, null) IGNORE NULLS) AS linkedin,
     sp_500.new_cset_id IS NOT NULL AS in_sandp_500,
     global_500.new_cset_id IS NOT NULL AS in_fortune_global_500,
-    global_big_tech.new_cset_id IS NOT NULL AS in_global_big_tech
+    global_big_tech.new_cset_id IS NOT NULL AS in_global_big_tech,
+    gen_ai.new_cset_id IS NOT NULL AS in_gen_ai
   FROM
     parat_input.organizations
   LEFT JOIN
@@ -137,6 +146,10 @@ FROM (
     global_big_tech
   USING
     (new_cset_id)
+  LEFT JOIN
+    gen_ai
+  USING
+    (new_cset_id)
   GROUP BY
     new_cset_id,
     name,
@@ -151,4 +164,5 @@ FROM (
     in_sandp_500,
     in_fortune_global_500,
     in_global_big_tech,
+    in_gen_ai,
     new_cset_id)
diff --git a/company_linkage/sql/visualization_data_omit_by_rule.sql b/company_linkage/sql/visualization_data_omit_by_rule.sql
@@ -27,7 +27,8 @@ WITH
     AND ARRAY_LENGTH(market) = 0
     and in_fortune_global_500 IS FALSE
     and in_sandp_500 IS FALSE
-    and in_global_big_tech IS FALSE)
+    and in_global_big_tech IS FALSE
+    and in_gen_ai IS FALSE)
 SELECT
   *
 FROM

diff --git a/web/gui-v2/src/components/DetailViewMoreMetadataDialog.jsx b/web/gui-v2/src/components/DetailViewMoreMetadataDialog.jsx
@@ -89,6 +89,9 @@ const MoreMetadataDialog = ({
   if ( data.groups.globalBigTech ) {
     groups.push("Global Big Tech");
   }
+  if ( data.groups.genAI ) {
+    groups.push("GenAI Contenders");
+  }
   if ( groups.length > 0 ) {
     metadata.push({
       title: "Groups",

diff --git a/web/gui-v2/src/data/companies.json b/web/gui-v2/src/data/companies.json
diff --git a/web/gui-v2/src/static_data/data.js b/web/gui-v2/src/static_data/data.js
diff --git a/web/gui-v2/src/static_data/overall_data.json b/web/gui-v2/src/static_data/overall_data.json
diff --git a/web/scripts/retrieve_data.py b/web/scripts/retrieve_data.py
@@ -312,7 +312,8 @@ def add_ranks(rows: list) -> None:
         all_metrics = set()
         row_and_key_groups = [(rows, "rank"),
                                ([r for r in rows if r.get("groups", {}).get("sp500")], "sp500_rank"),
-                               ([r for r in rows if r.get("groups", {}).get("globalBigTech")], "globalBigTech_rank")]
+                               ([r for r in rows if r.get("groups", {}).get("globalBigTech")], "globalBigTech_rank"),
+                               ([r for r in rows if r.get("groups", {}).get("genAI")], "genAI_rank")]
         for filtered_rows, rank_key in row_and_key_groups:
             for row in filtered_rows:
                 for metric in row.get(metric_list_name, {}):
@@ -581,7 +582,8 @@ def clean_misc_fields(js: dict, refresh_images: bool, lowercase_to_orig_cname: d
     js["child_crunchbase"] = clean_crunchbase(js["child_crunchbase"])
     group_keys_to_names = {
         "sp500": "in_sandp_500",
-        "globalBigTech": "in_global_big_tech"
+        "globalBigTech": "in_global_big_tech",
+        "genAI": "in_gen_ai"
     }
     js["groups"] = {k: js.pop(v, False) for k, v in group_keys_to_names.items()}
 
@@ -876,6 +878,11 @@ def clean(refresh_images: bool, refresh_sectors: bool) -> dict:
             "cset_id": GROUP_OFFSET+502,
             "rows": []
         },
+        "genAI": {
+            "name": "GenAI Contenders",
+            "cset_id": GROUP_OFFSET + 503,
+            "rows": []
+        },
     }
     for row in rows:
         company_rows.append(row)

diff --git a/web/tests/test_data/alphabet_input.json b/web/tests/test_data/alphabet_input.json
@@ -145,6 +145,7 @@
   ],
   "in_sandp_500": true,
   "in_global_big_tech": true,
+  "in_gen_ai": true,
   "ai_pubs": 136,
   "cv_pubs": 68,
   "nlp_pubs": 1,

diff --git a/web/tests/test_data/alphabet_output.json b/web/tests/test_data/alphabet_output.json
@@ -80,7 +80,8 @@
   "crunchbase_description": "Alphabet is a holding company that provides projects with resources, freedom, and focus to make their ideas happen.",
   "groups": {
     "sp500": true,
-    "globalBigTech": true
+    "globalBigTech": true,
+    "genAI": true
   },
   "fields": [
     {

diff --git a/web/tests/test_data/hugging_face_input.json b/web/tests/test_data/hugging_face_input.json
@@ -32,6 +32,7 @@
   ],
   "in_sandp_500": false,
   "in_global_big_tech": false,
+  "in_gen_ai": false,
   "ai_pubs": 18,
   "cv_pubs": 1,
   "nlp_pubs": 15,

diff --git a/web/tests/test_data/hugging_face_output.json b/web/tests/test_data/hugging_face_output.json
@@ -34,7 +34,8 @@
   "crunchbase_description": "Hugging Face allows users to build, train, and deploy art models using the reference open source in machine learning.",
   "groups": {
     "sp500": false,
-    "globalBigTech": false
+    "globalBigTech": false,
+    "genAI": false
   },
   "fields": [
     {