-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
8 changed files
with
165 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,28 @@ | ||
import argparse | ||
|
||
from get_ai_counts import CountGetter | ||
|
||
|
||
def main() -> None: | ||
parser = argparse.ArgumentParser() | ||
parser.add_argument("output_file", type=str, | ||
help="A jsonl file for writing output data to create new tables") | ||
args = parser.parse_args() | ||
if not args.output_file: | ||
parser.print_help() | ||
return | ||
if "jsonl" not in args.output_file: | ||
parser.print_help() | ||
return | ||
paper_finder = CountGetter() | ||
paper_finder.get_identifiers() | ||
# These are the only two lines that make this different from running AI pubs | ||
# We select from a different table | ||
table_name = "staging_ai_companies_visualization.highly_cited_ai_publications" | ||
# And we write out our data to a different variable | ||
companies = paper_finder.run_query_papers(table_name, "highly_cited_ai_pubs", by_year=True) | ||
paper_finder.write_output(companies, args.output_file) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
[ | ||
{ | ||
"mode": "REQUIRED", | ||
"name": "CSET_id", | ||
"type": "INTEGER", | ||
"description": "The CSET_id for a company." | ||
}, | ||
{ | ||
"mode": "REQUIRED", | ||
"name": "highly_cited_ai_pubs", | ||
"type": "INTEGER", | ||
"description": "Count of highly cited AI papers." | ||
}, | ||
{ | ||
"fields": [ | ||
{ | ||
"mode": "NULLABLE", | ||
"name": "year", | ||
"type": "INTEGER", | ||
"description": "Publication year of papers." | ||
}, | ||
{ | ||
"mode": "NULLABLE", | ||
"name": "highly_cited_ai_pubs", | ||
"type": "INTEGER", | ||
"description": "Count of highly cited AI papers published in that year." | ||
} | ||
], | ||
"mode": "REPEATED", | ||
"name": "highly_cited_ai_pubs_by_year", | ||
"type": "RECORD", | ||
"description": "Highly cited AI papers by year." | ||
} | ||
] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
WITH | ||
ai_pubs AS ( | ||
SELECT | ||
merged_id, | ||
ror_id, | ||
org_name, | ||
country, | ||
year | ||
FROM | ||
staging_ai_companies_visualization.ai_publications | ||
), | ||
|
||
citation_counts AS ( | ||
SELECT | ||
DISTINCT ref_id AS merged_id, | ||
COUNT(DISTINCT | ||
REFERENCES | ||
.merged_id) AS citation_count, | ||
ror_id, | ||
org_name, | ||
country, | ||
year | ||
FROM | ||
literature.references | ||
INNER JOIN | ||
ai_pubs | ||
ON | ||
ref_id = ai_pubs.merged_id | ||
GROUP BY | ||
ref_id, | ||
ror_id, | ||
org_name, | ||
country, | ||
year | ||
), | ||
|
||
get_top_cited AS ( | ||
SELECT | ||
DISTINCT merged_id, | ||
citation_count, | ||
ror_id, | ||
org_name, | ||
country, | ||
IF | ||
(citation_count >= PERCENTILE_CONT(citation_count, 0.9) OVER(PARTITION BY year), TRUE, FALSE) AS top_cited, | ||
year | ||
FROM | ||
citation_counts | ||
) | ||
|
||
SELECT | ||
DISTINCT merged_id, | ||
ror_id, | ||
org_name, | ||
country, | ||
year | ||
FROM | ||
get_top_cited | ||
WHERE | ||
top_cited IS true |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
27 changes: 27 additions & 0 deletions
27
company_linkage/sql/visualization_data_with_highly_cited.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,27 @@ | ||
-- Update the visualization table itself to add highly cited paper data | ||
-- Pull in the highly cited paper counts, along with the CSET ids to link them in | ||
WITH | ||
count_data AS ( | ||
SELECT | ||
CSET_id, | ||
highly_cited_ai_pubs, | ||
highly_cited_ai_pubs_by_year, | ||
FROM | ||
staging_ai_companies_visualization.highly_cited_paper_counts), | ||
-- Pull in the current visualization data. | ||
viz_data AS ( | ||
SELECT | ||
* | ||
FROM | ||
staging_ai_companies_visualization.visualization_data_with_top_papers) | ||
-- Join the two together using the CSET id | ||
SELECT | ||
viz_data.*, | ||
highly_cited_ai_pubs, | ||
highly_cited_ai_pubs_by_year, | ||
FROM | ||
viz_data | ||
LEFT JOIN | ||
count_data | ||
ON | ||
viz_data.CSET_id = count_data.CSET_id |