From c3f6a1ef823dc99080bfe3911facee5009419b99 Mon Sep 17 00:00:00 2001 From: melissagqc Date: Wed, 4 May 2022 16:26:30 -0400 Subject: [PATCH 01/24] Census QWI finished but not completely tested. --- flowsa/data_source_scripts/Census_qwi.py | 124 ++++++++++++++++++ .../flowbyactivitymethods/Census_qwi.yaml | 25 ++++ 2 files changed, 149 insertions(+) create mode 100644 flowsa/data_source_scripts/Census_qwi.py create mode 100644 flowsa/methods/flowbyactivitymethods/Census_qwi.yaml diff --git a/flowsa/data_source_scripts/Census_qwi.py b/flowsa/data_source_scripts/Census_qwi.py new file mode 100644 index 000000000..86bd33c5c --- /dev/null +++ b/flowsa/data_source_scripts/Census_qwi.py @@ -0,0 +1,124 @@ +# BLS_QCEW.py (flowsa) +# !/usr/bin/env python3 +# coding=utf-8 +""" +Pulls Quarterly Census of Employment and Wages data in NAICS from Bureau +of Labor Statistics. Writes out to various FlowBySector class files for +these data items +EMP = Number of employees, Class = Employment +PAYANN = Annual payroll ($1,000), Class = Money +ESTAB = Number of establishments, Class = Other +This script is designed to run with a configuration parameter +--year = 'year' e.g. 2015 +""" + +import json +import pandas as pd +import numpy as np +from flowsa.location import get_all_state_FIPS_2, get_county_FIPS, US_FIPS +from flowsa.common import fba_default_grouping_fields, load_api_key +from flowsa.schema import flow_by_activity_wsec_fields, \ + flow_by_activity_mapped_wsec_fields +from flowsa.flowbyfunctions import assign_fips_location_system, \ + aggregator +from flowsa.dataclean import add_missing_flow_by_fields, \ + replace_strings_with_NoneType + + + +def census_qwi_url_helper(*, build_url, year, config, **_): + """ + This helper function uses the "build_url" input from flowbyactivity.py, + which is a base url for data imports that requires parts of the url text + string to be replaced with info specific to the data year. This function + does not parse the data, only modifies the urls from which data is + obtained. + :param build_url: string, base url + :param config: dictionary, items in FBA method yaml + :param args: dictionary, arguments specified when running flowbyactivity.py + flowbyactivity.py ('year' and 'source') + :return: list, urls to call, concat, parse, format into Flow-By-Activity + format + """ + quarters = [1, 2, 3, 4] + urls = [] + if int(year) >= 2015: + fips_year = str(2015) + elif int(year) >= 2013: + fips_year = str(2013) + else: + fips_year = str(2010) + county_fips_df = get_county_FIPS(fips_year) + county_fips = county_fips_df.FIPS + # url = "https://api.census.gov/data/timeseries/qwi/se?get=industry,EmpTotal,ownercode&for=county:198&in=state:02&year=2002&quarter=1&key=8690959118863c43f40a6b1b3ccb1fe3f67578de" + # urls.append(url) + for q in quarters: + for d in county_fips: + url = build_url + url = url.replace('__year__', str(year)) + userAPIKey = load_api_key(config['api_name']) + url = url.replace("__apiKey__", userAPIKey) + state_digit = str(d[0]) + str(d[1]) + county_digit = str(d[2]) + str(d[3]) + str(d[4]) + url = url.replace("__state__", state_digit) + url = url.replace("__county__", county_digit) + url = url.replace("__quarter__", str(q)) + urls.append(url) + return urls + + +def census_qwi_call(*, resp, **_): + """ + Convert response for calling url to pandas dataframe, + begin parsing df into FBA format + :param resp: df, response from url call + :return: pandas dataframe of original source data + """ + json_load = json.loads(resp.text) + # convert response to dataframe + df = pd.DataFrame(data=json_load[1:len(json_load)], columns=json_load[0]) + return df + + +def census_qwi_parse(*, df_list, year, **_): + """ + Combine, parse, and format the provided dataframes + :param df_list: list of dataframes to concat and format + :param args: dictionary, used to run flowbyactivity.py + ('year' and 'source') + :return: df, parsed and partially formatted to flowbyactivity + specifications + """ + # Concat dataframes + df = pd.concat(df_list, sort=False) + # drop rows don't need + # get rid of None values in EmpTotal + df = df[df.EmpTotal.notnull()] + df.loc[df['ownercode'] == 'A00', 'Owner'] = 'State and local government plus private ownership' + df.loc[df['ownercode'] == 'A01', 'Owner'] = 'Federal government' + df.loc[df['ownercode'] == 'A05', 'Owner'] = 'All Private' + df = df.reindex() + + # What to do: + # Combine the State and County into the location. + df['Location'] = df['state'] + df['county'] + + + # industry needs to be renamed Activity Produced by. + # add the Quarter and ownership codes to flowname. + + df['FlowName'] = "Employment, owner code " + df['Owner'] + "Quarter " + df["quarter"] + df = df.rename(columns={'EmpTotal': 'FlowAmount', + 'year': 'Year', + 'industry': "ActivityProducedBy"}) + + df = df.drop(columns=['state', 'county', 'Owner', 'ownercode']) + # add location system based on year of data + df = assign_fips_location_system(df, year) + # add hard code data + df['SourceName'] = 'Census_QWI' + df['FlowType'] = "ELEMENTARY_FLOW" + df['Class'] = "Employment" + return df + + diff --git a/flowsa/methods/flowbyactivitymethods/Census_qwi.yaml b/flowsa/methods/flowbyactivitymethods/Census_qwi.yaml new file mode 100644 index 000000000..20a81444d --- /dev/null +++ b/flowsa/methods/flowbyactivitymethods/Census_qwi.yaml @@ -0,0 +1,25 @@ +author: US Census Bureau +source_name: Quarterly Workforce Indicators +source_url: https://www.census.gov/data/developers/data-sets/qwi.html#ownership +bib_id: Census_qwi +api_name: Census +api_key_required: false +format: csv # comma delineated data +url: + base_url: https://api.census.gov/data/timeseries/qwi/se?get=industry,EmpTotal,ownercode&for=county:__county__&in=state:__state__&year=__year__&quarter=__quarter__&key=__apiKey__ +url_replace_fxn: census_qwi_url_helper +call_response_fxn: census_qwi_call +parse_response_fxn: census_qwi_parse +years: +- 2002 +- 2010 +- 2011 +- 2012 +- 2013 +- 2014 +- 2015 +- 2016 +- 2017 +- 2018 +- 2019 +- 2020 From 89449097de92078636c2df16809fd75adec6b66d Mon Sep 17 00:00:00 2001 From: unknown Date: Wed, 11 May 2022 15:55:17 -0400 Subject: [PATCH 02/24] Took out the hardcoding of the URL's with issues --- flowsa/data_source_scripts/Census_qwi.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/flowsa/data_source_scripts/Census_qwi.py b/flowsa/data_source_scripts/Census_qwi.py index 86bd33c5c..92744c619 100644 --- a/flowsa/data_source_scripts/Census_qwi.py +++ b/flowsa/data_source_scripts/Census_qwi.py @@ -50,8 +50,6 @@ def census_qwi_url_helper(*, build_url, year, config, **_): fips_year = str(2010) county_fips_df = get_county_FIPS(fips_year) county_fips = county_fips_df.FIPS - # url = "https://api.census.gov/data/timeseries/qwi/se?get=industry,EmpTotal,ownercode&for=county:198&in=state:02&year=2002&quarter=1&key=8690959118863c43f40a6b1b3ccb1fe3f67578de" - # urls.append(url) for q in quarters: for d in county_fips: url = build_url @@ -64,6 +62,7 @@ def census_qwi_url_helper(*, build_url, year, config, **_): url = url.replace("__county__", county_digit) url = url.replace("__quarter__", str(q)) urls.append(url) + return urls @@ -74,9 +73,16 @@ def census_qwi_call(*, resp, **_): :param resp: df, response from url call :return: pandas dataframe of original source data """ - json_load = json.loads(resp.text) - # convert response to dataframe - df = pd.DataFrame(data=json_load[1:len(json_load)], columns=json_load[0]) + try: + json_load = json.loads(resp.text) + # convert response to dataframe + df = pd.DataFrame(data=json_load[1:len(json_load)], columns=json_load[0]) + except: + print(resp) + df = pd.DataFrame() + + + return df From 4a1d0ad43fb7a0ecfde56ef4608c6b5f0d668c41 Mon Sep 17 00:00:00 2001 From: unknown Date: Fri, 13 May 2022 09:10:31 -0400 Subject: [PATCH 03/24] added a finally condition to the try except block --- flowsa/data_source_scripts/Census_qwi.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/flowsa/data_source_scripts/Census_qwi.py b/flowsa/data_source_scripts/Census_qwi.py index 92744c619..fb652374b 100644 --- a/flowsa/data_source_scripts/Census_qwi.py +++ b/flowsa/data_source_scripts/Census_qwi.py @@ -80,10 +80,8 @@ def census_qwi_call(*, resp, **_): except: print(resp) df = pd.DataFrame() - - - - return df + finally: + return df def census_qwi_parse(*, df_list, year, **_): From da25ef6f835ca59786efba7f82080922fb8b5ef5 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 11:49:54 -0400 Subject: [PATCH 04/24] re-add develop branches --- requirements.txt | 6 +++--- setup.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index f9356c917..6223afaf2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist -git+https://github.com/USEPA/esupy#egg=esupy -git+https://github.com/USEPA/standardizedinventories#egg=StEWI +git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist +git+https://github.com/USEPA/esupy.git@develop#egg=esupy +git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI pandas>=1.3.2 # Powerful data structures for data analysis, time series, and statistics. pip>=9 # The PyPA recommended tool for installing Python packages. setuptools>=41 # Fully-featured library designed to facilitate packaging Python projects. diff --git a/setup.py b/setup.py index 17c64e4cf..097f0e468 100644 --- a/setup.py +++ b/setup.py @@ -11,9 +11,9 @@ package_dir={'flowsa': 'flowsa'}, include_package_data=True, install_requires=[ - 'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist', - 'esupy @ git+https://github.com/USEPA/esupy#egg=esupy', - 'StEWI @ git+https://github.com/USEPA/standardizedinventories#egg=StEWI', + 'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist', + 'esupy @ git+https://github.com/USEPA/esupy.git@develop#egg=esupy', + 'StEWI @ git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI', 'pandas>=1.3.2', 'pip>=9', 'setuptools>=41', From 56305e4b4e8522ec0914256aada28ce31799906c Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 12:13:02 -0400 Subject: [PATCH 05/24] update qwi to QWI --- flowsa/data_source_scripts/{Census_qwi.py => Census_QWI.py} | 0 .../flowbyactivitymethods/{Census_qwi.yaml => Census_QWI.yaml} | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename flowsa/data_source_scripts/{Census_qwi.py => Census_QWI.py} (100%) rename flowsa/methods/flowbyactivitymethods/{Census_qwi.yaml => Census_QWI.yaml} (96%) diff --git a/flowsa/data_source_scripts/Census_qwi.py b/flowsa/data_source_scripts/Census_QWI.py similarity index 100% rename from flowsa/data_source_scripts/Census_qwi.py rename to flowsa/data_source_scripts/Census_QWI.py diff --git a/flowsa/methods/flowbyactivitymethods/Census_qwi.yaml b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml similarity index 96% rename from flowsa/methods/flowbyactivitymethods/Census_qwi.yaml rename to flowsa/methods/flowbyactivitymethods/Census_QWI.yaml index 20a81444d..d9f8fa072 100644 --- a/flowsa/methods/flowbyactivitymethods/Census_qwi.yaml +++ b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml @@ -1,7 +1,7 @@ author: US Census Bureau source_name: Quarterly Workforce Indicators source_url: https://www.census.gov/data/developers/data-sets/qwi.html#ownership -bib_id: Census_qwi +bib_id: Census_QWI api_name: Census api_key_required: false format: csv # comma delineated data From 8a5f7f66c8db7863a5a9b06c8d444656d52ba9b0 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 12:13:24 -0400 Subject: [PATCH 06/24] add qwi to source_catalog --- flowsa/data/source_catalog.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/flowsa/data/source_catalog.yaml b/flowsa/data/source_catalog.yaml index a842d9be8..ee6dbfef8 100644 --- a/flowsa/data/source_catalog.yaml +++ b/flowsa/data/source_catalog.yaml @@ -73,6 +73,12 @@ Census_PEP_Population: sector-like_activities: False activity_schema: sector_aggregation_level: "" +Census_QWI: + class: + - Employment + sector-like_activities: True + activity_schema: NAICS_2012_Code + sector_aggregation_level: "disaggregated" Census_VIP: class: Money sector-like_activities: False From a936174e4491428505b6eee1feffe134b54032f7 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 12:55:46 -0400 Subject: [PATCH 07/24] update qwi fba to use !script_function --- flowsa/methods/flowbyactivitymethods/Census_QWI.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml index d9f8fa072..c46488f16 100644 --- a/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml +++ b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml @@ -7,9 +7,9 @@ api_key_required: false format: csv # comma delineated data url: base_url: https://api.census.gov/data/timeseries/qwi/se?get=industry,EmpTotal,ownercode&for=county:__county__&in=state:__state__&year=__year__&quarter=__quarter__&key=__apiKey__ -url_replace_fxn: census_qwi_url_helper -call_response_fxn: census_qwi_call -parse_response_fxn: census_qwi_parse +url_replace_fxn: !script_function:Census_QWI census_qwi_url_helper +call_response_fxn: !script_function:Census_QWI census_qwi_call +parse_response_fxn: !script_function:Census_QWI census_qwi_parse years: - 2002 - 2010 From 557e6ae86b23f92fde99f3f9f7de8d4126b8d0ad Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 12:56:22 -0400 Subject: [PATCH 08/24] drop Puerto Rico from the FIPS crosswalk --- scripts/write_FIPS_xwalk_from_Census.py | 62 ++++++++++++++++--------- 1 file changed, 39 insertions(+), 23 deletions(-) diff --git a/scripts/write_FIPS_xwalk_from_Census.py b/scripts/write_FIPS_xwalk_from_Census.py index 09f9a08f4..be8cf2b7c 100644 --- a/scripts/write_FIPS_xwalk_from_Census.py +++ b/scripts/write_FIPS_xwalk_from_Census.py @@ -28,13 +28,17 @@ def stripcounty(s): def annual_fips(years): - """Fxn to pull the FIPS codes/names from the Census website. Columns are renamed amd subset.""" - # list of years to include in FIPS crosswalk + """ + Fxn to pull the FIPS codes/names from the Census website. Columns are + renamed amd subset. + :param years: list, years to include in FIPS crosswalk + :return: + """ df_list = {} for year in years: - # only works for 2015 +....contacted Census on 5/1 to ask for county level - # fips for previous years + # only works for 2015 +....contacted Census on 5/1 to ask for county + # level fips for previous years if year == '2013': url = 'https://www2.census.gov/programs-surveys/popest/geographies/' + \ year + '/all-geocodes-v' + year + '.xls' @@ -43,7 +47,8 @@ def annual_fips(years): year + "/all-geocodes-v" + year + ".xlsx" r = make_url_request(url) - raw_df = pd.read_excel(io.BytesIO(r.content)).dropna().reset_index(drop=True) + raw_df = pd.read_excel(io.BytesIO(r.content)).dropna().reset_index( + drop=True) # skip the first few rows FIPS_df = pd.DataFrame(raw_df.loc[1:]).reindex() @@ -62,14 +67,16 @@ def annual_fips(years): # split df by level to return a list of dfs # use a list comprehension to split it out - FIPS_bylevel = [pd.DataFrame(y) for x, y in FIPS_df.groupby("SummaryLevel", as_index=False)] + FIPS_bylevel = [pd.DataFrame(y) for x, y in FIPS_df.groupby( + "SummaryLevel", as_index=False)] # Assume df order in list is in geolevels keys order # country does not have its own field - state_and_county_fields = {"Country": ["StateCode(FIPS)"], - "State": ["StateCode(FIPS)"], - "County_" + year: ["StateCode(FIPS)", "CountyCode(FIPS)"]} + state_and_county_fields = { + "Country": ["StateCode(FIPS)"], + "State": ["StateCode(FIPS)"], + "County_" + year: ["StateCode(FIPS)", "CountyCode(FIPS)"]} name_field = "AreaName(includinglegal/statisticalareadescription)" @@ -89,21 +96,22 @@ def annual_fips(years): # FIPS_df_new = FIPS_df for k, v in new_dfs.items(): fields_to_merge = [str(x) for x in state_and_county_fields[k]] - # FIPS_df_new = pd.merge(FIPS_df_new,v,on=fields_to_merge,how="left") FIPS_df = pd.merge(FIPS_df, v, on=fields_to_merge, how="left") # combine state and county codes FIPS_df['FIPS_' + year] = \ - FIPS_df[state_and_county_fields["County_" + year][0]].astype(str) + \ - FIPS_df[state_and_county_fields["County_" + year][1]].astype(str) + FIPS_df[state_and_county_fields["County_" + year][0]].astype( + str) + FIPS_df[state_and_county_fields["County_" + year][ + 1]].astype(str) fields_to_keep = ["State", "County_" + year, "FIPS_" + year] FIPS_df = FIPS_df[fields_to_keep] # Clean the county field - remove the " County" - # FIPS_df["County"] = FIPS_df["County"].apply(lambda x:stripcounty(x)) - FIPS_df["County_" + year] = FIPS_df["County_" + year].apply(stripcounty) - FIPS_df["County_" + year] = FIPS_df["County_" + year].apply(clean_str_and_capitalize) + FIPS_df["County_" + year] = FIPS_df["County_" + year].apply( + stripcounty) + FIPS_df["County_" + year] = FIPS_df["County_" + year].apply( + clean_str_and_capitalize) FIPS_df["State"] = FIPS_df["State"].apply(clean_str_and_capitalize) # add to data dictionary of fips years @@ -134,12 +142,14 @@ def read_fips_2010(): names_10 = pd.DataFrame(names_10.loc[4:]).reset_index(drop=True) # drop rows of na names_10 = names_10.loc[~names_10['2010 County Set Description'].isna()] - names_10 = names_10.loc[~names_10['FIPS County Code'].isna()].reset_index(drop=True) + names_10 = names_10.loc[~names_10['FIPS County Code'].isna()].reset_index( + drop=True) # new column of fips names_10['FIPS_2010'] = names_10['FIPS State Code'].astype(str) +\ names_10['FIPS County Code'].astype(str) # rename columns and subset df - names_10 = names_10.rename(columns={'2010 County Set Description': 'County_2010'}) + names_10 = names_10.rename(columns={'2010 County Set Description': + 'County_2010'}) names_10 = names_10[['FIPS_2010', 'County_2010']] # drop empty fips column names_10['FIPS_2010'] = names_10['FIPS_2010'].str.strip() @@ -150,7 +160,8 @@ def read_fips_2010(): if __name__ == '__main__': - # consider modifying to include data for all years, as there are county level name changes + # consider modifying to include data for all years, as there are county + # level name changes # years data interested in (list) years = ['2015'] @@ -163,7 +174,8 @@ def read_fips_2010(): # Accessed 04/10/2020 df = fips_dic['FIPS_2015'] - # modify columns depicting how counties have changed over the years - starting 2010 + # modify columns depicting how counties have changed over the years - + # starting 2010 # 2013 had two different/renamed fips df_13 = pd.DataFrame(df['FIPS_2015']) @@ -175,7 +187,8 @@ def read_fips_2010(): # so 2010 will have an additional row df_10 = pd.DataFrame(df_13["FIPS_2013"]) df_10['FIPS_2010'] = df_13['FIPS_2013'] - df_10 = df_10.append(pd.DataFrame([["51019", "51515"]], columns=df_10.columns)) + df_10 = df_10.append(pd.DataFrame([["51019", "51515"]], + columns=df_10.columns)) # merge 2010 with 2013 dataframe df2 = pd.merge(df_10, df_13, on="FIPS_2013", how='left')\ @@ -200,10 +213,13 @@ def read_fips_2010(): df4 = pd.merge(df4, names_13, on=["State", "FIPS_2013"], how='left') # reorder dataframe - fips_xwalk = df4[['State', 'FIPS_2010', 'County_2010', 'FIPS_2013', 'County_2013', - 'FIPS_2015', 'County_2015']] + fips_xwalk = df4[['State', 'FIPS_2010', 'County_2010', 'FIPS_2013', + 'County_2013', 'FIPS_2015', 'County_2015']] fips_xwalk = fips_xwalk.sort_values(['FIPS_2010', 'FIPS_2013', - 'FIPS_2015']).reset_index(drop=True) + 'FIPS_2015']) + # drop peurto rico data + fips_xwalk = fips_xwalk[fips_xwalk['State'] != 'Puerto rico'].reset_index( + drop=True) # write fips crosswalk as csv fips_xwalk.to_csv(datapath + "FIPS_Crosswalk.csv", index=False) From 11fadfd924b3c7a182fc918328a30c763ccef0ca Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 8 Jul 2022 14:16:29 -0400 Subject: [PATCH 09/24] updated fips crosswalk without Puerto Rico --- flowsa/data/FIPS_Crosswalk.csv | 79 ---------------------------------- 1 file changed, 79 deletions(-) diff --git a/flowsa/data/FIPS_Crosswalk.csv b/flowsa/data/FIPS_Crosswalk.csv index cc39d690e..a8ddb4fbb 100644 --- a/flowsa/data/FIPS_Crosswalk.csv +++ b/flowsa/data/FIPS_Crosswalk.csv @@ -3194,82 +3194,3 @@ Wyoming,56039,Teton,56039,Teton,56039,Teton Wyoming,56041,Uinta,56041,Uinta,56041,Uinta Wyoming,56043,Washakie,56043,Washakie,56043,Washakie Wyoming,56045,Weston,56045,Weston,56045,Weston -Puerto rico,72000,,72000,,72000, -Puerto rico,72001,,72001,Adjuntas municipio,72001,Adjuntas municipio -Puerto rico,72003,,72003,Aguada municipio,72003,Aguada municipio -Puerto rico,72005,,72005,Aguadilla municipio,72005,Aguadilla municipio -Puerto rico,72007,,72007,Aguas buenas municipio,72007,Aguas buenas municipio -Puerto rico,72009,,72009,Aibonito municipio,72009,Aibonito municipio -Puerto rico,72011,,72011,Añasco municipio,72011,Añasco municipio -Puerto rico,72013,,72013,Arecibo municipio,72013,Arecibo municipio -Puerto rico,72015,,72015,Arroyo municipio,72015,Arroyo municipio -Puerto rico,72017,,72017,Barceloneta municipio,72017,Barceloneta municipio -Puerto rico,72019,,72019,Barranquitas municipio,72019,Barranquitas municipio -Puerto rico,72021,,72021,Bayamón municipio,72021,Bayamón municipio -Puerto rico,72023,,72023,Cabo rojo municipio,72023,Cabo rojo municipio -Puerto rico,72025,,72025,Caguas municipio,72025,Caguas municipio -Puerto rico,72027,,72027,Camuy municipio,72027,Camuy municipio -Puerto rico,72029,,72029,Canóvanas municipio,72029,Canóvanas municipio -Puerto rico,72031,,72031,Carolina municipio,72031,Carolina municipio -Puerto rico,72033,,72033,Cataño municipio,72033,Cataño municipio -Puerto rico,72035,,72035,Cayey municipio,72035,Cayey municipio -Puerto rico,72037,,72037,Ceiba municipio,72037,Ceiba municipio -Puerto rico,72039,,72039,Ciales municipio,72039,Ciales municipio -Puerto rico,72041,,72041,Cidra municipio,72041,Cidra municipio -Puerto rico,72043,,72043,Coamo municipio,72043,Coamo municipio -Puerto rico,72045,,72045,Comerío municipio,72045,Comerío municipio -Puerto rico,72047,,72047,Corozal municipio,72047,Corozal municipio -Puerto rico,72049,,72049,Culebra municipio,72049,Culebra municipio -Puerto rico,72051,,72051,Dorado municipio,72051,Dorado municipio -Puerto rico,72053,,72053,Fajardo municipio,72053,Fajardo municipio -Puerto rico,72054,,72054,Florida municipio,72054,Florida municipio -Puerto rico,72055,,72055,Guánica municipio,72055,Guánica municipio -Puerto rico,72057,,72057,Guayama municipio,72057,Guayama municipio -Puerto rico,72059,,72059,Guayanilla municipio,72059,Guayanilla municipio -Puerto rico,72061,,72061,Guaynabo municipio,72061,Guaynabo municipio -Puerto rico,72063,,72063,Gurabo municipio,72063,Gurabo municipio -Puerto rico,72065,,72065,Hatillo municipio,72065,Hatillo municipio -Puerto rico,72067,,72067,Hormigueros municipio,72067,Hormigueros municipio -Puerto rico,72069,,72069,Humacao municipio,72069,Humacao municipio -Puerto rico,72071,,72071,Isabela municipio,72071,Isabela municipio -Puerto rico,72073,,72073,Jayuya municipio,72073,Jayuya municipio -Puerto rico,72075,,72075,Juana díaz municipio,72075,Juana díaz municipio -Puerto rico,72077,,72077,Juncos municipio,72077,Juncos municipio -Puerto rico,72079,,72079,Lajas municipio,72079,Lajas municipio -Puerto rico,72081,,72081,Lares municipio,72081,Lares municipio -Puerto rico,72083,,72083,Las marías municipio,72083,Las marías municipio -Puerto rico,72085,,72085,Las piedras municipio,72085,Las piedras municipio -Puerto rico,72087,,72087,Loíza municipio,72087,Loíza municipio -Puerto rico,72089,,72089,Luquillo municipio,72089,Luquillo municipio -Puerto rico,72091,,72091,Manatí municipio,72091,Manatí municipio -Puerto rico,72093,,72093,Maricao municipio,72093,Maricao municipio -Puerto rico,72095,,72095,Maunabo municipio,72095,Maunabo municipio -Puerto rico,72097,,72097,Mayagüez municipio,72097,Mayagüez municipio -Puerto rico,72099,,72099,Moca municipio,72099,Moca municipio -Puerto rico,72101,,72101,Morovis municipio,72101,Morovis municipio -Puerto rico,72103,,72103,Naguabo municipio,72103,Naguabo municipio -Puerto rico,72105,,72105,Naranjito municipio,72105,Naranjito municipio -Puerto rico,72107,,72107,Orocovis municipio,72107,Orocovis municipio -Puerto rico,72109,,72109,Patillas municipio,72109,Patillas municipio -Puerto rico,72111,,72111,Peñuelas municipio,72111,Peñuelas municipio -Puerto rico,72113,,72113,Ponce municipio,72113,Ponce municipio -Puerto rico,72115,,72115,Quebradillas municipio,72115,Quebradillas municipio -Puerto rico,72117,,72117,Rincón municipio,72117,Rincón municipio -Puerto rico,72119,,72119,Río grande municipio,72119,Río grande municipio -Puerto rico,72121,,72121,Sabana grande municipio,72121,Sabana grande municipio -Puerto rico,72123,,72123,Salinas municipio,72123,Salinas municipio -Puerto rico,72125,,72125,San germán municipio,72125,San germán municipio -Puerto rico,72127,,72127,San juan municipio,72127,San juan municipio -Puerto rico,72129,,72129,San lorenzo municipio,72129,San lorenzo municipio -Puerto rico,72131,,72131,San sebastián municipio,72131,San sebastián municipio -Puerto rico,72133,,72133,Santa isabel municipio,72133,Santa isabel municipio -Puerto rico,72135,,72135,Toa alta municipio,72135,Toa alta municipio -Puerto rico,72137,,72137,Toa baja municipio,72137,Toa baja municipio -Puerto rico,72139,,72139,Trujillo alto municipio,72139,Trujillo alto municipio -Puerto rico,72141,,72141,Utuado municipio,72141,Utuado municipio -Puerto rico,72143,,72143,Vega alta municipio,72143,Vega alta municipio -Puerto rico,72145,,72145,Vega baja municipio,72145,Vega baja municipio -Puerto rico,72147,,72147,Vieques municipio,72147,Vieques municipio -Puerto rico,72149,,72149,Villalba municipio,72149,Villalba municipio -Puerto rico,72151,,72151,Yabucoa municipio,72151,Yabucoa municipio -Puerto rico,72153,,72153,Yauco municipio,72153,Yauco municipio From d1b52955fa9db3c683df8b098b4c5a2d823dd1cc Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Mon, 11 Jul 2022 15:30:33 -0400 Subject: [PATCH 10/24] update pandas requirment to 1.4.0 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6223afaf2..5093a1a74 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist git+https://github.com/USEPA/esupy.git@develop#egg=esupy git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI -pandas>=1.3.2 # Powerful data structures for data analysis, time series, and statistics. +pandas>=1.4.0 # Powerful data structures for data analysis, time series, and statistics. pip>=9 # The PyPA recommended tool for installing Python packages. setuptools>=41 # Fully-featured library designed to facilitate packaging Python projects. pyyaml>=5.3 # Yaml for python diff --git a/setup.py b/setup.py index 097f0e468..d8b6568e6 100644 --- a/setup.py +++ b/setup.py @@ -14,7 +14,7 @@ 'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist', 'esupy @ git+https://github.com/USEPA/esupy.git@develop#egg=esupy', 'StEWI @ git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI', - 'pandas>=1.3.2', + 'pandas>=1.4.0', 'pip>=9', 'setuptools>=41', 'pyyaml>=5.3', From f5232571150d911bacd7994638c491cc111513c4 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Mon, 11 Jul 2022 15:37:13 -0400 Subject: [PATCH 11/24] drop support for testing FLOWSA package on Python 3.7 --- .github/workflows/python-app.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml index fc805c820..f88bac2d7 100644 --- a/.github/workflows/python-app.yml +++ b/.github/workflows/python-app.yml @@ -31,7 +31,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, windows-latest, macos-latest] - py-version: ['3.7', '3.8', '3.9', '3.10'] + py-version: ['3.8', '3.9', '3.10'] steps: - uses: actions/checkout@v2 From c597b882c50722d19bd0ba896ad0ce1630d31b63 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Tue, 12 Jul 2022 10:33:25 -0400 Subject: [PATCH 12/24] add 2019 BLS QCEW FBA --- flowsa/methods/flowbyactivitymethods/BLS_QCEW.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/flowsa/methods/flowbyactivitymethods/BLS_QCEW.yaml b/flowsa/methods/flowbyactivitymethods/BLS_QCEW.yaml index b5685c59c..185e7e474 100644 --- a/flowsa/methods/flowbyactivitymethods/BLS_QCEW.yaml +++ b/flowsa/methods/flowbyactivitymethods/BLS_QCEW.yaml @@ -21,3 +21,4 @@ years: - 2016 - 2017 - 2018 +- 2019 From 9233a3592619a73b8a6209884e5eff103bdd0753 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Tue, 12 Jul 2022 10:33:55 -0400 Subject: [PATCH 13/24] clean up/simplify state employment yamls --- .../methods/flowbysectormethods/Employment_state_2012.yaml | 5 ----- .../methods/flowbysectormethods/Employment_state_2013.yaml | 6 ------ .../methods/flowbysectormethods/Employment_state_2014.yaml | 6 ------ .../methods/flowbysectormethods/Employment_state_2015.yaml | 6 ------ .../methods/flowbysectormethods/Employment_state_2016.yaml | 6 ------ .../methods/flowbysectormethods/Employment_state_2017.yaml | 6 ------ 6 files changed, 35 deletions(-) diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml index 51bdc62e3..4864f86cd 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2012.yaml @@ -14,9 +14,4 @@ source_names: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml index f155da27b..2cb15697f 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2013.yaml @@ -10,14 +10,8 @@ source_names: year: 2013 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec - activity_set_file: 'BLS_QCEW_asets.csv' activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml index 7039c9c27..c9a3692d9 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2014.yaml @@ -10,14 +10,8 @@ source_names: year: 2014 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec - activity_set_file: 'BLS_QCEW_asets.csv' activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml index 3834d1844..819f69256 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2015.yaml @@ -10,14 +10,8 @@ source_names: year: 2015 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec - activity_set_file: 'BLS_QCEW_asets.csv' activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml index 9864d6dff..7aaf42ef5 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2016.yaml @@ -10,14 +10,8 @@ source_names: year: 2016 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec - activity_set_file: 'BLS_QCEW_asets.csv' activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml index 96e34e569..5a73683a4 100644 --- a/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml +++ b/flowsa/methods/flowbysectormethods/Employment_state_2017.yaml @@ -10,14 +10,8 @@ source_names: year: 2017 clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec - activity_set_file: 'BLS_QCEW_asets.csv' activity_sets: qcew: names: !from_index:BLS_QCEW_asets.csv qcew allocation_method: direct - allocation_source: None - allocation_source_class: None - allocation_source_year: None - allocation_flow: None - allocation_compartment: None allocation_from_scale: state From e8dce397e89d5b7ef4c0aaba19296815dd38eb4b Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Tue, 12 Jul 2022 10:34:21 -0400 Subject: [PATCH 14/24] add state employment FBS for 2018 and 2019 --- .../Employment_state_2018.yaml | 17 +++++++++++++++++ .../Employment_state_2019.yaml | 17 +++++++++++++++++ 2 files changed, 34 insertions(+) create mode 100644 flowsa/methods/flowbysectormethods/Employment_state_2018.yaml create mode 100644 flowsa/methods/flowbysectormethods/Employment_state_2019.yaml diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2018.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2018.yaml new file mode 100644 index 000000000..c3437c2e8 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2018.yaml @@ -0,0 +1,17 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2018 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_from_scale: state diff --git a/flowsa/methods/flowbysectormethods/Employment_state_2019.yaml b/flowsa/methods/flowbysectormethods/Employment_state_2019.yaml new file mode 100644 index 000000000..b12493c88 --- /dev/null +++ b/flowsa/methods/flowbysectormethods/Employment_state_2019.yaml @@ -0,0 +1,17 @@ +target_sector_level: NAICS_6 +target_sector_source: NAICS_2012_Code +target_geoscale: state +source_names: + "BLS_QCEW": + data_format: 'FBA' + class: Employment + geoscale_to_use: state + source_fba_load_scale: state + year: 2019 + clean_fba_df_fxn: !script_function:BLS_QCEW clean_bls_qcew_fba_for_employment_sat_table + clean_fba_w_sec_df_fxn: !script_function:BLS_QCEW bls_clean_allocation_fba_w_sec + activity_sets: + qcew: + names: !from_index:BLS_QCEW_asets.csv qcew + allocation_method: direct + allocation_from_scale: state From 503c1d96780bae4b8f37046ec7bdc373dd32bf45 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Tue, 12 Jul 2022 16:34:06 -0400 Subject: [PATCH 15/24] update qwi flowname, remove "owner code" --- flowsa/data_source_scripts/Census_QWI.py | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/flowsa/data_source_scripts/Census_QWI.py b/flowsa/data_source_scripts/Census_QWI.py index fb652374b..309eb546c 100644 --- a/flowsa/data_source_scripts/Census_QWI.py +++ b/flowsa/data_source_scripts/Census_QWI.py @@ -25,7 +25,6 @@ replace_strings_with_NoneType - def census_qwi_url_helper(*, build_url, year, config, **_): """ This helper function uses the "build_url" input from flowbyactivity.py, @@ -76,7 +75,8 @@ def census_qwi_call(*, resp, **_): try: json_load = json.loads(resp.text) # convert response to dataframe - df = pd.DataFrame(data=json_load[1:len(json_load)], columns=json_load[0]) + df = pd.DataFrame(data=json_load[1:len(json_load)], + columns=json_load[0]) except: print(resp) df = pd.DataFrame() @@ -94,24 +94,24 @@ def census_qwi_parse(*, df_list, year, **_): specifications """ # Concat dataframes - df = pd.concat(df_list, sort=False) + df = pd.concat(df_list, ignore_index=True) # drop rows don't need # get rid of None values in EmpTotal df = df[df.EmpTotal.notnull()] - df.loc[df['ownercode'] == 'A00', 'Owner'] = 'State and local government plus private ownership' + df.loc[df['ownercode'] == 'A00', 'Owner'] = 'State and local government ' \ + 'plus private ownership' df.loc[df['ownercode'] == 'A01', 'Owner'] = 'Federal government' df.loc[df['ownercode'] == 'A05', 'Owner'] = 'All Private' df = df.reindex() - # What to do: # Combine the State and County into the location. df['Location'] = df['state'] + df['county'] - # industry needs to be renamed Activity Produced by. # add the Quarter and ownership codes to flowname. - - df['FlowName'] = "Employment, owner code " + df['Owner'] + "Quarter " + df["quarter"] + df['FlowName'] = df.apply( + lambda x: f'Number of employees, {x["Owner"]}, Quarter {x["quarter"]}', + axis=1) df = df.rename(columns={'EmpTotal': 'FlowAmount', 'year': 'Year', 'industry': "ActivityProducedBy"}) @@ -124,5 +124,3 @@ def census_qwi_parse(*, df_list, year, **_): df['FlowType'] = "ELEMENTARY_FLOW" df['Class'] = "Employment" return df - - From 962ddae8241625ef589a39bcfd50bc04f3a9e3f3 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Tue, 12 Jul 2022 16:34:46 -0400 Subject: [PATCH 16/24] indicate qwi requires api key --- .../flowbyactivitymethods/Census_QWI.yaml | 26 +++++++++---------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml index c46488f16..948d5bc87 100644 --- a/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml +++ b/flowsa/methods/flowbyactivitymethods/Census_QWI.yaml @@ -3,7 +3,7 @@ source_name: Quarterly Workforce Indicators source_url: https://www.census.gov/data/developers/data-sets/qwi.html#ownership bib_id: Census_QWI api_name: Census -api_key_required: false +api_key_required: True format: csv # comma delineated data url: base_url: https://api.census.gov/data/timeseries/qwi/se?get=industry,EmpTotal,ownercode&for=county:__county__&in=state:__state__&year=__year__&quarter=__quarter__&key=__apiKey__ @@ -11,15 +11,15 @@ url_replace_fxn: !script_function:Census_QWI census_qwi_url_helper call_response_fxn: !script_function:Census_QWI census_qwi_call parse_response_fxn: !script_function:Census_QWI census_qwi_parse years: -- 2002 -- 2010 -- 2011 -- 2012 -- 2013 -- 2014 -- 2015 -- 2016 -- 2017 -- 2018 -- 2019 -- 2020 + - 2002 + - 2010 + - 2011 + - 2012 + - 2013 + - 2014 + - 2015 + - 2016 + - 2017 + - 2018 + - 2019 + - 2020 From 0b4f4cdace3f0b9a42724a22b5b9bdd8ba798475 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Wed, 13 Jul 2022 08:01:26 -0400 Subject: [PATCH 17/24] copper myb url update --- flowsa/methods/flowbyactivitymethods/USGS_MYB_Copper.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Copper.yaml b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Copper.yaml index 1623e9be5..905a4ba84 100644 --- a/flowsa/methods/flowbyactivitymethods/USGS_MYB_Copper.yaml +++ b/flowsa/methods/flowbyactivitymethods/USGS_MYB_Copper.yaml @@ -6,7 +6,7 @@ date_generated: '2021-03-19' years_available: 2011-2015 format: xlsx url: - base_url: https://s3-us-west-2.amazonaws.com/prd-wret/assets/palladium/production/mineral-pubs/copper/myb1-2015-coppe.xlsx + base_url: https://d9-wret.s3.us-west-2.amazonaws.com/assets/palladium/production/mineral-pubs/copper/myb1-2015-coppe.xlsx url_replace_fxn: !script_function:USGS_MYB usgs_myb_url_helper call_response_fxn: !script_function:USGS_MYB usgs_copper_call parse_response_fxn: !script_function:USGS_MYB usgs_copper_parse From 9fb740af740c9a2d45749aee242e199e0fca346b Mon Sep 17 00:00:00 2001 From: Ben Young Date: Thu, 14 Jul 2022 20:19:32 -0400 Subject: [PATCH 18/24] move diff files to local dir --- .github/workflows/test_methods.yml | 2 +- flowsa/settings.py | 1 + flowsa/test_FBS_against_remote.py | 4 ++-- 3 files changed, 4 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test_methods.yml b/.github/workflows/test_methods.yml index 98062adec..c2b804245 100644 --- a/.github/workflows/test_methods.yml +++ b/.github/workflows/test_methods.yml @@ -51,7 +51,7 @@ jobs: name: FBS diff files # A file, directory or wildcard patter that describes what to upload path: | - ${{ env.LD_LIBRARY_PATH }}/python3.10/site-packages/flowsa/data/fbs_diff/*_diff.csv + ~/.local/share/flowsa/FBSComparisons/* ~/.local/share/flowsa/FlowBySector/* ~/.local/share/flowsa/Log/* if-no-files-found: warn # 'warn' or 'ignore' are also available, defaults to `warn` diff --git a/flowsa/settings.py b/flowsa/settings.py index 6bb2af5ab..d9467e318 100644 --- a/flowsa/settings.py +++ b/flowsa/settings.py @@ -32,6 +32,7 @@ fbsoutputpath = outputpath + 'FlowBySector/' biboutputpath = outputpath + 'Bibliography/' logoutputpath = outputpath + 'Log/' +diffpath = outputpath + 'FBSComparisons/' plotoutputpath = outputpath + 'Plots/' # ensure directories exist diff --git a/flowsa/test_FBS_against_remote.py b/flowsa/test_FBS_against_remote.py index 4a5574d1c..9ddbcc11c 100644 --- a/flowsa/test_FBS_against_remote.py +++ b/flowsa/test_FBS_against_remote.py @@ -5,7 +5,7 @@ import os from flowsa import seeAvailableFlowByModels from flowsa.metadata import set_fb_meta -from flowsa.settings import paths, datapath +from flowsa.settings import paths, diffpath from flowsa.validation import compare_FBS_results from esupy.processed_data_mgmt import download_from_remote @@ -13,7 +13,7 @@ def test_FBS_against_remote(): """Compare results for each FBS method at current HEAD with most recent FBS stored on remote server.""" - outdir = f"{datapath}fbs_diff/" + outdir = diffpath if not os.path.exists(outdir): os.mkdir(outdir) for m in seeAvailableFlowByModels("FBS", print_method=False): From 52ee73b817839d53967fac0a53e73641f8564848 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Thu, 14 Jul 2022 20:50:34 -0400 Subject: [PATCH 19/24] store known method errors and check against tests before failing --- flowsa/common.py | 10 +++++++++- flowsa/methods/method_status.yaml | 27 +++++++++++++++++++++++++++ flowsa/test_methods.py | 10 ++++++++-- 3 files changed, 44 insertions(+), 3 deletions(-) create mode 100644 flowsa/methods/method_status.yaml diff --git a/flowsa/common.py b/flowsa/common.py index 6bdff2721..c8cece8ee 100644 --- a/flowsa/common.py +++ b/flowsa/common.py @@ -18,7 +18,7 @@ flow_by_activity_wsec_fields, flow_by_activity_mapped_wsec_fields, \ activity_fields from flowsa.settings import datapath, MODULEPATH, logoutputpath, \ - sourceconfigpath, log, flowbysectormethodpath + sourceconfigpath, log, flowbysectormethodpath, methodpath # Sets default Sector Source Name @@ -366,3 +366,11 @@ def str2bool(v): return True else: return False + + +def check_method_status(): + """Read the current method status""" + yaml_path = methodpath + 'method_status.yaml' + with open(yaml_path, 'r') as f: + method_status = yaml.safe_load(f) + return method_status diff --git a/flowsa/methods/method_status.yaml b/flowsa/methods/method_status.yaml new file mode 100644 index 000000000..990418267 --- /dev/null +++ b/flowsa/methods/method_status.yaml @@ -0,0 +1,27 @@ +## List of current inactive method files + +# FlowByActivity + +USGS_MYB_Potash: + Active: False + Status: Error in source data links + Type: HTTPError + +USGS_MYB_Phosphate: + Active: False + Status: Error in source data links + Type: HTTPError + +USGS_MYB_Copper: + Active: False + Status: Error in source data links + Type: HTTPError + +Blackhurst_IO: + Active: True + Status: Error in HTTP request during testing only + Type: HTTPError + + +# Flow By Sector + diff --git a/flowsa/test_methods.py b/flowsa/test_methods.py index 092a1b48c..0fa1ec3fe 100644 --- a/flowsa/test_methods.py +++ b/flowsa/test_methods.py @@ -7,6 +7,7 @@ import flowsa.exceptions from flowsa.flowbyactivity import load_yaml_dict, assemble_urls_for_query,\ call_urls +from flowsa.common import check_method_status @pytest.mark.skip(reason="Perform targeted test for test_FBA_urls on PR") @@ -14,7 +15,9 @@ def test_FBA_urls(): """Test yaml_load and url access for each FBA at the latest year. FBA requiring API key are skipped.""" error_list = [] + method_status = check_method_status() for m in seeAvailableFlowByModels("FBA", print_method=False): + m_status = method_status.get(m) config = load_yaml_dict(m, flowbytype='FBA') year = max(config['years']) @@ -36,8 +39,11 @@ def test_FBA_urls(): except flowsa.exceptions.APIError: print('API Key required, skipping url') continue - except Exception: - error_list.append(m) + except Exception as e: + if e.__class__.__name__ == m_status.get('Type'): + print(f'Known {m_status.get("Type")} in {m}') + else: + error_list.append(m) if error_list: pytest.fail(f"Error retrieving: {', '.join([x for x in [*error_list]])}") From d454245c40bbcb8ef4aca69c2a5ef4a539d24211 Mon Sep 17 00:00:00 2001 From: Ben Young Date: Thu, 14 Jul 2022 20:51:17 -0400 Subject: [PATCH 20/24] separate jobs for FBS and FBA method testing --- .github/workflows/test_methods.yml | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/.github/workflows/test_methods.yml b/.github/workflows/test_methods.yml index c2b804245..f78aa8aa1 100644 --- a/.github/workflows/test_methods.yml +++ b/.github/workflows/test_methods.yml @@ -11,7 +11,7 @@ on: workflow_dispatch: # also allow manual trigger, for testing purposes jobs: - build: + FBA_testing: runs-on: ubuntu-latest steps: @@ -36,6 +36,27 @@ jobs: run: | python flowsa/test_methods.py + FBS_testing: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v2 + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: "3.10" + + - name: Update pip & install testing pkgs + run: | + python -VV + python -m pip install --upgrade pip setuptools wheel + pip install pytest pytest-cov flake8 + + # install package & dependencies + - name: Install package and dependencies + run: | + pip install . + - name: Compare FBS with remote id: FBS if: always() # Proceed even if Test FBA fails From d36477e432c6ce486e83fd9f89cd5d6dccb06b9c Mon Sep 17 00:00:00 2001 From: Ben Young <44471635+bl-young@users.noreply.github.com> Date: Thu, 14 Jul 2022 21:01:00 -0400 Subject: [PATCH 21/24] Create README.md --- flowsa/methods/README.md | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 flowsa/methods/README.md diff --git a/flowsa/methods/README.md b/flowsa/methods/README.md new file mode 100644 index 000000000..4d26ea8bc --- /dev/null +++ b/flowsa/methods/README.md @@ -0,0 +1,10 @@ +## Current Status + +Known issues with exisiting FBA and FBS methods are shown in [method_status.yaml](method_status.yaml) + +``` + + Active: True or False indicates whether the method can still be run. + Status: Description of the current status. + Type: Type of error generated by the method, e.g. 'HTTPError' +``` From 19346259d9280b9b86e3c82605a77c232cdd41a8 Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 15 Jul 2022 14:45:45 -0400 Subject: [PATCH 22/24] drop develop from package requirements --- requirements.txt | 6 +++--- setup.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5093a1a74..52d11f020 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,6 @@ -git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist -git+https://github.com/USEPA/esupy.git@develop#egg=esupy -git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI +git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist +git+https://github.com/USEPA/esupy#egg=esupy +git+https://github.com/USEPA/standardizedinventories#egg=StEWI pandas>=1.4.0 # Powerful data structures for data analysis, time series, and statistics. pip>=9 # The PyPA recommended tool for installing Python packages. setuptools>=41 # Fully-featured library designed to facilitate packaging Python projects. diff --git a/setup.py b/setup.py index d8b6568e6..45537fa53 100644 --- a/setup.py +++ b/setup.py @@ -11,9 +11,9 @@ package_dir={'flowsa': 'flowsa'}, include_package_data=True, install_requires=[ - 'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List.git@develop#egg=fedelemflowlist', - 'esupy @ git+https://github.com/USEPA/esupy.git@develop#egg=esupy', - 'StEWI @ git+https://github.com/USEPA/standardizedinventories.git@develop#egg=StEWI', + 'fedelemflowlist @ git+https://github.com/USEPA/Federal-LCA-Commons-Elementary-Flow-List#egg=fedelemflowlist', + 'esupy @ git+https://github.com/USEPA/esupy#egg=esupy', + 'StEWI @ git+https://github.com/USEPA/standardizedinventories#egg=StEWI', 'pandas>=1.4.0', 'pip>=9', 'setuptools>=41', From 15a5abd1af4d52d13c7497ab992405f6e85ad5aa Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 15 Jul 2022 14:46:00 -0400 Subject: [PATCH 23/24] update to v1.2.3 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 45537fa53..d22da884c 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ setup( name='flowsa', - version='1.2.2', + version='1.2.3', packages=find_packages(), package_dir={'flowsa': 'flowsa'}, include_package_data=True, From 9963c904f3b6b3c3d579513d7f6571ae7a9e6c0f Mon Sep 17 00:00:00 2001 From: catherinebirney Date: Fri, 15 Jul 2022 14:50:49 -0400 Subject: [PATCH 24/24] remove myb copper from known list of inactive FBA --- flowsa/methods/method_status.yaml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/flowsa/methods/method_status.yaml b/flowsa/methods/method_status.yaml index 990418267..7e2a074b3 100644 --- a/flowsa/methods/method_status.yaml +++ b/flowsa/methods/method_status.yaml @@ -12,11 +12,6 @@ USGS_MYB_Phosphate: Status: Error in source data links Type: HTTPError -USGS_MYB_Copper: - Active: False - Status: Error in source data links - Type: HTTPError - Blackhurst_IO: Active: True Status: Error in HTTP request during testing only