From 101f03d02cde0ec008188956584eb62032de88c4 Mon Sep 17 00:00:00 2001 From: Patrick Dorn Date: Wed, 1 May 2024 22:21:12 -0500 Subject: [PATCH 1/2] refactor tests to be less implementation-dependent --- tests/data/scripts/create_test_data.py | 24 +- .../data/scripts/unit/test_clean_all_years.py | 254 ++++++++++-------- tests/data/source/test_output.csv | 8 +- tests/data/source/test_src_data.csv | 18 +- 4 files changed, 181 insertions(+), 123 deletions(-) diff --git a/tests/data/scripts/create_test_data.py b/tests/data/scripts/create_test_data.py index ddecfac1..c7d3f9a7 100644 --- a/tests/data/scripts/create_test_data.py +++ b/tests/data/scripts/create_test_data.py @@ -9,24 +9,24 @@ src_input_file = 'ChicagoEnergyBenchmarking.csv' test_input_file = 'test_src_data.csv' -property_test_cases = ['United Center', 'Crown Hall', 'Art Institute', 'Marie Curie'] +properties_to_include = [ + '100856', # United Center + '256419', # Crown Hall + '160196', # The Art Institute of Chicago + '138730', # random property + '240068', # random property w/ submitted data and no GHGIntensity data + ] -def write_test_sample(reader: csv.reader, writer: csv.writer, property_test_cases: List[str]) -> csv.writer: +def write_test_sample(reader: csv.reader, writer: csv.writer, properties_to_include: List[str]) -> csv.writer: header_row = next(reader) if len(header_row) <= 0: raise EOFError('ChicagoEnergyBenchmarking CSV file is empty!') else: writer.writerow(header_row) for row in reader: - for item in row: - has_prop = False - for case in property_test_cases: - if case in item: - has_prop = True - writer.writerow(row) - break - if has_prop: - break + property_id = row[1] + if property_id in properties_to_include: + writer.writerow(row) def main(): # the first console argument is technically the python script so we skip that @@ -41,7 +41,7 @@ def main(): csvfile = open(get_test_file_path(target_path), 'w') test_file = csv.writer(csvfile) - write_test_sample(src_csv, test_file, property_test_cases) + write_test_sample(src_csv, test_file, properties_to_include) print('Copied source data from', src_path) print('Copied test data to', target_path) diff --git a/tests/data/scripts/unit/test_clean_all_years.py b/tests/data/scripts/unit/test_clean_all_years.py index bafe85ba..6aab16d2 100644 --- a/tests/data/scripts/unit/test_clean_all_years.py +++ b/tests/data/scripts/unit/test_clean_all_years.py @@ -1,123 +1,167 @@ import pytest -import shutil, os, pathlib, csv +import os +import csv import pandas as pd -import numpy as np -from src.data.scripts.utils import get_and_clean_csv -from src.data.scripts import clean_and_pare_down_data_all_years as clean, process_data as proc -from tests.data.scripts.utils import get_test_file_path, get_src_file_path +from src.data.scripts import clean_and_pare_down_data_all_years +from tests.data.scripts.utils import get_test_file_path src_dir = 'src' test_dir = 'tests' -src_input_file = 'ChicagoEnergyBenchmarking.csv' test_input_file = 'test_src_data.csv' test_output_file = 'test_output.csv' -@pytest.fixture -def src_building_data() -> pd.DataFrame: - test_data_path = get_test_file_path(test_input_file) - assert os.path.exists(test_data_path) - return get_and_clean_csv(test_data_path) @pytest.fixture -def csv_file() -> csv.reader: - csvfile = open(get_test_file_path(test_input_file)) - return csv.reader(csvfile) - -def test_csv_file_has_some_data(csv_file): - first_line = csv_file.__next__() - assert first_line - assert len(first_line) > 0 - -@pytest.mark.parametrize("test_input", [ - clean.string_cols, - clean.int_cols, - clean.replace_headers -]) -def test_is_not_empty(test_input): - assert len(test_input) > 0 - -def test_src_data_exists(src_building_data): - assert src_building_data is not None +def csv_reader() -> csv.reader: + '''return a csv.DictReader of our test data CSV''' -@pytest.fixture -def test_columns_are_renamed(src_building_data) -> pd.DataFrame: - df = clean.rename_columns(src_building_data) - assert df is not None - assert not df.columns.equals(src_building_data.columns) - return df + csv_path = get_test_file_path(test_input_file) + with open(csv_path) as filehandle: + # yield here so that the context manager (with...) + # can cleanup the open filehandle after we're done with + # the csv.DictReader + yield csv.DictReader(filehandle) -def test_data_has_positive_ghg_data(test_columns_are_renamed): - df = clean.get_buildings_with_ghg_intensity(test_columns_are_renamed) - assert df is not None - assert np.all(df['GHGIntensity'] > 0) - -def test_data_has_submitted_status(test_columns_are_renamed): - df = clean.get_submitted_data(test_columns_are_renamed) - assert np.all(df['ReportingStatus'].str.contains('Submitted')) @pytest.fixture -def test_has_last_year_of_data(test_columns_are_renamed) -> pd.DataFrame: - df = clean.get_last_year_data(test_columns_are_renamed) - assert np.all(df['ID'].value_counts() == 1) - return df +def processed_dataframe() -> pd.DataFrame: + '''Process our test data as per clean_and_pare_down_data_all_years.py + and return the resulting dataframe''' -@pytest.fixture -def fixed_strings(test_has_last_year_of_data, test_columns_are_renamed): - return clean.fix_str_cols(test_has_last_year_of_data, - test_columns_are_renamed) - -@pytest.fixture -def fixed_strings_all_years(test_columns_are_renamed): - return clean.fix_str_cols(test_columns_are_renamed, - test_columns_are_renamed) - -def test_str_values_remain_the_same_as_origin(fixed_strings_all_years, csv_file): - header_row = next(csv_file) - str_col_positions = list(map(lambda col: fixed_strings_all_years.columns.get_loc(col), clean.string_cols)) - for csv_row in csv_file: - year, id = csv_row[0], csv_row[1] - row = fixed_strings_all_years[(fixed_strings_all_years['ID'].astype(str) == id) & \ - (fixed_strings_all_years['DataYear'].astype(str) == year)] - - for col, csv_pos in zip(clean.string_cols, str_col_positions): - if all(pd.isna(row[col].to_numpy())): - continue - - # The raw GPS in ChicagoEnergyBenchmarking.csv has 41.880451999999998, which gets - # truncated, so we round to ignore that, since it's not a significant difference - # TODO: Fix GPS inconsistency and drop rounding - csv_value = csv_row[csv_pos] - - - # If > 10 or < -10, we truncate 0 after rounding to 6 decimals. This means this applies - # to GPS coordinates but not energy star ratings (e.g.) - if (abs(float(csv_value)) > 10): - print("df ", row[col].to_numpy(), "csv ", csv_value) - csv_float = float(csv_value) - csv_val_parsed = f'{csv_float:.9f}'.rstrip('0').rstrip('.') - else: - csv_val_parsed = csv_value - - assert row[col].to_numpy()[0] == csv_val_parsed - -def test_lat_lon_become_strings(fixed_strings): - df = fixed_strings[['Latitude','Longitude']] - assert np.all(df.dtypes == 'string') - -def test_int_values_remain_the_same_as_origin(test_has_last_year_of_data): - df = clean.fix_int_cols(test_has_last_year_of_data) - assert np.all(df[clean.int_cols].dtypes == 'Int64') - -def test_csv_is_produced(test_has_last_year_of_data): - out_file = get_test_file_path(test_output_file) - clean.output_to_csv(test_has_last_year_of_data, out_file) - assert os.path.exists(out_file) + input_filename = get_test_file_path(test_input_file) + df = clean_and_pare_down_data_all_years.process(input_filename) + assert df is not None + return df -@pytest.fixture -def process(): - return clean.process(get_src_file_path(src_input_file)) -def test_data_has_ranking_columns(process): - for col in proc.building_cols_to_rank: - assert col in process.columns +def test_data_has_positive_ghg_data(processed_dataframe): + '''confirm each property in the processed dataframe has non-zero GHGIntensity''' + + df = processed_dataframe + assert all([ghg > 0 for ghg in df['GHGIntensity']]) + + +def test_data_has_submitted_status(processed_dataframe): + '''confirm each property in the processed dataframe has a submitted status''' + + df = processed_dataframe + for status in df['ReportingStatus']: + assert status in ('Submitted Data', 'Submitted') + + +def test_lat_long_are_unchanged(processed_dataframe, csv_reader): + '''confirm lat/long in the processed dataframe is unchanged from origin csv''' + + df = processed_dataframe + df_lattitudes = [x for x in df['Latitude']] + df_longitudes = [x for x in df['Longitude']] + df_property_ids = [x for x in df['ID']] + + for row in csv_reader: + csv_property_id = row['ID'] + csv_lat = row['Latitude'] + csv_long = row['Longitude'] + if csv_property_id in df_property_ids: + i = df_property_ids.index(csv_property_id) + assert (csv_lat, csv_long) == (df_lattitudes[i], df_longitudes[i]) + + +def test_one_entry_per_property(processed_dataframe): + '''confirm each property only has 1 entry in the processed dataframe''' + + df = processed_dataframe + assert all([count == 1 for count in df['ID'].value_counts()]) + + +def test_expected_columns_present(processed_dataframe): + '''confirm all expected columns are present in the processed dataframe''' + + df = processed_dataframe + mandatory_columns = ( + 'DataYear', + 'ID', + 'PropertyName', + 'ReportingStatus', + 'Address', + 'ZIPCode', + 'ChicagoEnergyRating', + 'ExemptFromChicagoEnergyRating', + 'CommunityArea', + 'PrimaryPropertyType', + 'GrossFloorArea', + 'TotalGHGEmissions', + 'GHGIntensity', + 'YearBuilt', + 'NumberOfBuildings', + 'WaterUse', + 'ENERGYSTARScore', + 'ElectricityUse', + 'NaturalGasUse', + 'DistrictSteamUse', + 'DistrictChilledWaterUse', + 'AllOtherFuelUse', + 'SiteEUI', + 'SourceEUI', + 'WeatherNormalizedSiteEUI', + 'WeatherNormalizedSourceEUI', + 'Latitude', + 'Longitude', + 'Location', + 'Row_ID', + 'Wards', + 'CommunityAreas', + 'ZipCodes', + 'CensusTracts', + 'HistoricalWards2003-2015', + ) + assert set(df.columns) == set(mandatory_columns) + + +def test_correct_year_selected(processed_dataframe): + '''confirm the correct DataYear is present in the processed dataframe + for a sample of properties''' + + df = processed_dataframe + + united_center_df = df[df['PropertyName']=='United Center'] + united_center_df.reset_index(inplace=True, drop=True) + assert len(united_center_df) == 1 + assert united_center_df.loc[0, 'DataYear'] == 2019 + + crown_hall_df = df[df['PropertyName']=='Crown Hall'] + crown_hall_df.reset_index(inplace=True, drop=True) + assert len(crown_hall_df) == 1 + assert crown_hall_df.loc[0, 'DataYear'] == 2021 + + bldg_138730_df = df[df['ID']==138730] + bldg_138730_df.reset_index(inplace=True, drop=True) + assert len(bldg_138730_df) == 1 + assert bldg_138730_df.loc[0, 'DataYear'] == 2020 + + +def test_property_count(processed_dataframe): + '''confirm the processed dataframe has the correct number of properties''' + + df = processed_dataframe + assert len(df) == 4 + + +def test_no_ghg_property_is_excluded(processed_dataframe): + '''confirm property with submitted data but no GHGIntensity data + ie excluded from the processed dataframe''' + + df = processed_dataframe + # property ID 240068 is present in test source data but + # 2016-2022 submitted data has no GHGIntensity data + assert len(df[df['ID']=='240068']) == 0 + + +def test_csv_is_produced(processed_dataframe): + '''confirm clean_and_pare_down_data_all_years.output_to_csv creates + a csv on disk''' + + df = processed_dataframe + output_file_path = get_test_file_path(test_output_file) + clean_and_pare_down_data_all_years.output_to_csv(df, output_file_path) + assert os.path.exists(output_file_path) diff --git a/tests/data/source/test_output.csv b/tests/data/source/test_output.csv index 3ae6fc4c..970b5123 100644 --- a/tests/data/source/test_output.csv +++ b/tests/data/source/test_output.csv @@ -1,5 +1,5 @@ DataYear,ID,PropertyName,ReportingStatus,Address,ZIPCode,ChicagoEnergyRating,ExemptFromChicagoEnergyRating,CommunityArea,PrimaryPropertyType,GrossFloorArea,YearBuilt,NumberOfBuildings,WaterUse,ENERGYSTARScore,ElectricityUse,NaturalGasUse,DistrictSteamUse,DistrictChilledWaterUse,AllOtherFuelUse,SiteEUI,SourceEUI,WeatherNormalizedSiteEUI,WeatherNormalizedSourceEUI,TotalGHGEmissions,GHGIntensity,Latitude,Longitude,Location,Row_ID,Wards,CommunityAreas,ZipCodes,CensusTracts,HistoricalWards2003-2015 -2021,100856,United Center,Not Submitted,1901 W Madison St,60612,0.0,False,NEAR WEST SIDE,Indoor Arena,2289000.0,1994.0,1.0,,,,,,,,,,,,,,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2021-100856,46,29,21184,90,41 -2021,160196,The Art Institute of Chicago,Not Submitted,111 S Michigan Ave,60603,0.0,False,LOOP,Museum,1008416.0,1892.0,1.0,,,,,,,,,,,,,,41.880452,-87.624229,"(41.880452, -87.624229)",2021-160196,36,38,14311,367,22 -2021,256419,Crown Hall,Submitted,3360 S State Street,60616,1.0,False,DOUGLAS,College/University,54291.0,1955.0,1.0,,,1333307.2,0.0,451039945.6,0.0,,8332.4,10063.4,8332.4,10063.4,30138.8,555.1,41.842325,-87.62715344,"(41.842325, -87.62715344)",2021-256419,9,1,21194,377,12 -2021,256458,United Center Office Building,Not Submitted,1901 W Madison St,60612,0.0,False,,,,,,,,,,,,,,,,,,,41.88125398,-87.67448493,"(41.88125398, -87.67448493)",2021-256458,46,29,21184,90,41 +2019,100856,United Center,Submitted,1901 W Madison St,60612.0,2.0,False,NEAR WEST SIDE,Indoor Arena,960000.0,1994.0,2,206239.0,,102653875.6,15169580.2,,,,122.7,316.0,122.4,,17883.7,18.6,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2019-100856,46,29,21184,90,41 +2020,138730,Grand Blvd Plaza,Submitted Data,5401 S WENTWORTH AVE,60609.0,3.0,False,FULLER PARK,Strip Mall,138730.0,1975.0,1,,,6245386.4,5872823.7,,,,87.4,170.5,87.9,172.0,1286.6,9.3,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2020-138730,9,3,14924,224,12 +2022,160196,The Art Institute of Chicago,Submitted Data,111 S Michigan Ave,60603.0,1.0,False,LOOP,Museum,1008416.0,1892.0,1,,,80968968.1,158224778.6,0.0,0.0,,237.2,389.6,239.3,389.5,19068.8,18.9,41.880527821930805,-87.62420946585881,"(41.880527821930805, -87.62420946585881)",2022-160196,36,38,14311,367,22 +2021,256419,Crown Hall,Submitted,3360 S State Street,60616.0,1.0,False,DOUGLAS,College/University,54291.0,1955.0,1,,,1333307.2,0.0,451039945.6,0.0,,8332.4,10063.4,8332.4,10063.4,30138.8,555.1,41.842325,-87.62715344,"(41.842325, -87.62715344)",2021-256419,9,1,21194,377,12 diff --git a/tests/data/source/test_src_data.csv b/tests/data/source/test_src_data.csv index 70dd3cae..8a054e33 100644 --- a/tests/data/source/test_src_data.csv +++ b/tests/data/source/test_src_data.csv @@ -1,22 +1,36 @@ Data Year,ID,Property Name,Reporting Status,Address,ZIP Code,Chicago Energy Rating,Exempt From Chicago Energy Rating,Community Area,Primary Property Type,Gross Floor Area - Buildings (sq ft),Year Built,# of Buildings,Water Use (kGal),ENERGY STAR Score,Electricity Use (kBtu),Natural Gas Use (kBtu),District Steam Use (kBtu),District Chilled Water Use (kBtu),All Other Fuel Use (kBtu),Site EUI (kBtu/sq ft),Source EUI (kBtu/sq ft),Weather Normalized Site EUI (kBtu/sq ft),Weather Normalized Source EUI (kBtu/sq ft),Total GHG Emissions (Metric Tons CO2e),GHG Intensity (kg CO2e/sq ft),Latitude,Longitude,Location,Row_ID,Wards,Community Areas,Zip Codes,Census Tracts,Historical Wards 2003-2015 -2020,256458,United Center Office Building,Not Submitted,1901 W Madison St,60612,0.0,false,,,,,,,,,,,,,,,,,,,41.88125398,-87.67448493,"(41.88125398, -87.67448493)",2020-256458,46,29,21184,90,41 2020,100856,United Center,Not Submitted,1901 W Madison St,60612,0.0,false,NEAR WEST SIDE,,2289000,,,,,,,,,,,,,,,,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2020-100856,46,29,21184,90,41 +2020,138730,Grand Blvd Plaza,Submitted Data,5401 S WENTWORTH AVE,60609,3.0,false,FULLER PARK,Strip Mall,138730,1975,1,,,6245386.4,5872823.7,,,,87.4,170.5,87.9,172,1286.6,9.3,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2020-138730,9,3,14924,224,12 +2020,240068,The Farallon Condominium,Submitted Data,600 N Dearborn St,60654,1.0,false,NEAR NORTH SIDE,Multifamily Housing,277047,2001,1,,27,5004988.4,15776668.8,,4201811.7,,111.8,153.9,116,158.5,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2020-240068,36,37,4446,670,22 2020,160196,The Art Institute of Chicago,Submitted Data,111 S Michigan Ave,60603,2.0,false,LOOP,Museum,1008416,1892,1,,,79096897.3,160912625.4,,,,238,387.2,240.8,390.1,20891.4,20.7,41.880452,-87.624229,"(41.880452, -87.624229)",2020-160196,36,38,14311,367,22 2020,256419,Crown Hall,Submitted Data,3360 S State Street,60616,4.0,false,DOUGLAS,College/University,54291,1955,1,,,1033900.8,,5517241.1,,,120.7,175.6,127.9,184.3,527.7,9.7,41.842325,-87.62715344,"(41.842325, -87.62715344)",2020-256419,9,1,21194,377,12 2017,100856,United Center,Submitted,1901 West Madison St.,60612,,,NEAR WEST SIDE,Indoor Arena,960000,1994,2,,,111594203,8070756,,,,124.7,373.8,125.2,374.4,21061.9,21.9,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2017-100856,46,29,21184,90,41 2017,160196,The Art Institute of Chicago,Submitted,111 South Michigan Ave,60603,,,LOOP,Museum,1008416,1892,1,,,84934097,159962683,,,,242.9,431,247.3,435,24200.3,24,41.880452,-87.624229,"(41.880452, -87.624229)",2017-160196,36,38,14311,367,22 2015,160196,Art Institute of Chicago,Submitted,111 S. Michigan Ave.,60603,,,LOOP,Museum,1113416,1892,1,,,88935350,177925468,,,,239.7,418.6,243.4,422.5,27319,24.5,41.880451999999998,-87.624229,"(41.880452, -87.624229)",2015-160196,36,38,14311,367,22 +2016,138730,5401 S Wentworth - 2017 Resubmit,Submitted,5401 S. Wentworth,60609,,,FULLER PARK,Strip Mall,138730,1975,1,,,7995186.1,,,,,57.6,181,56.9,178.8,1478.3,10.7,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2016-138730,9,3,14924,224,12 2018,256419,Crown Hall,Not Submitted,3360 South State st,60616,0.0,false,DOUGLAS,College/University,54291,1955,1,,,,,,,,,,,,,,41.842325,-87.62715344,"(41.842325, -87.62715344)",2018-256419,9,1,21194,377,12 2014,160196,The Art Institute of Chicago,Submitted,111 South Michigan Ave,60603,,,LOOP,Museum,1008416,1892,1,,,94822312,158283369,,,,251,460,248,457,27458,27.23,41.880452,-87.624229,"(41.880452, -87.624229)",2014-160196,36,38,14311,367,22 2018,160196,The Art Institute of Chicago,Submitted,111 South Michigan Ave,60603,1.0,false,LOOP,Museum,1008416,1892,1,657,,89812131.2,169797846.5,,,,257.4,426.2,257.7,426.5,23960.3,23.8,41.880452,-87.624229,"(41.880452, -87.624229)",2018-160196,36,38,14311,367,22 2015,100856,United Center,Submitted,1901 West Madison St.,60612,,,NEAR WEST SIDE,Indoor Arena,960000,1994,1,,,11479770,12102665,,,,24.6,50.8,,,2949,3.1,41.880676719999997,-87.674182070000001,"(41.88067672, -87.67418207)",2015-100856,46,29,21184,90,41 +2017,240068,The Farallon Condominium,Submitted,600 N Dearborn St.,60654,,,NEAR NORTH SIDE,Multifamily Housing,214049,2001,1,,26,4952300,14924591,,3809916,,110.7,163.7,116.4,169.7,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2017-240068,36,37,4446,670,22 +2018,240068,The Farallon Condominium,Submitted,600 N Dearborn St.,60654,1.0,false,NEAR NORTH SIDE,Multifamily Housing,223535,2001,1,6453,29,4836086.9,16781256.1,,4447092,,116.6,157.5,117.8,158.8,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2018-240068,36,37,4446,670,22 2016,160196,The Art Institute of Chicago,Submitted,111 South Michigan Ave,60603,,,LOOP,Museum,1008416,1892,1,,,91317062,160204044.6,,,,249.4,451.2,254.1,453,25340.9,25.1,41.880452,-87.624229,"(41.880452, -87.624229)",2016-160196,36,38,14311,367,22 +2018,138730,2019 - 5401 S Wentworth,Submitted,5401 S. Wentworth,60609,4.0,false,FULLER PARK,Strip Mall,138730,1975,1,8244,,195112.7,,,,,1.4,3.9,1.4,3.9,32.5,0.2,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2018-138730,9,3,14924,224,12 2018,100856,United Center,Submitted,1901 West Madison St.,60612,4.0,false,NEAR WEST SIDE,Indoor Arena,960000,1994,2,12929,,11345016,9342860.4,,,,21.5,43.3,21.6,43.4,2383.6,2.5,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2018-100856,46,29,21184,90,41 2016,100856,United Center,Submitted,1901 West Madison St.,60612,,,NEAR WEST SIDE,Indoor Arena,960000,1994,1,,,113634101,8970953,,,,127.7,381.5,128.5,382.3,21421.7,22.3,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2016-100856,46,29,21184,90,41 +2016,240068,The Farallon Condominium,Submitted,600 N Dearborn St.,60654,,,NEAR NORTH SIDE,Multifamily Housing,214049,2001,1,,25,4896718.2,15069487,,4826364,,115.8,168.3,122.7,175.7,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2016-240068,36,37,4446,670,22 2019,100856,United Center,Submitted,1901 W Madison St,60612,2.0,false,NEAR WEST SIDE,Indoor Arena,960000,1994,2,206239,,102653875.6,15169580.2,,,,122.7,316,122.4,,17883.7,18.6,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2019-100856,46,29,21184,90,41 +2019,138730,Grand Blvd Plaza,Not Submitted,5401 S WENTWORTH AVE,,0.0,false,FULLER PARK,,,,,,,,,,,,,,,,,,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2019-138730,9,3,14924,224,12 2019,160196,The Art Institute of Chicago,Submitted,111 S Michigan Ave,60603,1.0,false,LOOP,Museum,1008416,1892,1,,,89515423.6,174620529.5,,,,261.9,430.4,260.4,,24167.1,24,41.880452,-87.624229,"(41.880452, -87.624229)",2019-160196,36,38,14311,367,22 +2019,240068,The Farallon Condominium,Submitted,600 N Dearborn St,60654,1.0,false,NEAR NORTH SIDE,Multifamily Housing,223535,2001,1,,30,4854412.8,16557998.2,,,,112.6,,,,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2019-240068,36,37,4446,670,22 2019,256419,Crown Hall,Submitted,3360 S State Street,60616,1.0,false,DOUGLAS,College/University,54291,1955,1,,,1776182.8,,6264685,,,148.1,230.4,145.5,,711.5,13.1,41.842325,-87.62715344,"(41.842325, -87.62715344)",2019-256419,9,1,21194,377,12 2021,100856,United Center,Not Submitted,1901 W Madison St,60612,0.0,false,NEAR WEST SIDE,Indoor Arena,2289000,1994,1,,,,,,,,,,,,,,41.88067672,-87.67418207,"(41.88067672, -87.67418207)",2021-100856,46,29,21184,90,41 2021,256419,Crown Hall,Submitted,3360 S State Street,60616,1.0,false,DOUGLAS,College/University,54291,1955,1,,,1333307.2,0,451039945.6,0,,8332.4,10063.4,8332.4,10063.4,30138.8,555.1,41.842325,-87.62715344,"(41.842325, -87.62715344)",2021-256419,9,1,21194,377,12 +2021,138730,Grand Blvd Plaza,Not Submitted,5401 S WENTWORTH AVE,60609,0.0,false,FULLER PARK,,138730,,,,,,,,,,,,,,,,41.79622465,-87.63030493,"(41.79622465, -87.63030493)",2021-138730,9,3,14924,224,12 2021,160196,The Art Institute of Chicago,Not Submitted,111 S Michigan Ave,60603,0.0,false,LOOP,Museum,1008416,1892,1,,,,,,,,,,,,,,41.880452,-87.624229,"(41.880452, -87.624229)",2021-160196,36,38,14311,367,22 -2021,256458,United Center Office Building,Not Submitted,1901 W Madison St,60612,0.0,false,,,,,,,,,,,,,,,,,,,41.88125398,-87.67448493,"(41.88125398, -87.67448493)",2021-256458,46,29,21184,90,41 \ No newline at end of file +2021,240068,The Farallon Condominium,Submitted,600 N Dearborn St,60654,1.0,false,NEAR NORTH SIDE,Multifamily Housing,277047,2001,1,,28,4812199.5,15477578.9,0,4759716.1,,112.1,152.4,116.5,157.4,,,41.89268011,-87.630164,"(41.89268011, -87.630164)",2021-240068,36,37,4446,670,22 +2022,138730,Grand Blvd Plaza,Submitted Data,5401 S WENTWORTH AVE,60609,0.0,false,FULLER PARK,,138730,,,,,,,,,,,,,,,,41.796234875219376,-87.63040492438661,"(41.796234875219376, -87.63040492438661)",2022-138730,9,3,14924,224,12 +2022,240068,The Farallon Condominium,Submitted Data,600 N Dearborn St,60654,1.0,false,NEAR NORTH SIDE,Multifamily Housing,277047,2001,1,,30,4691440.9,16881198.1,0,4426784.3,,116.3,156.1,120,160.2,,,41.89248545359695,-87.62982443970971,"(41.89248545359695, -87.62982443970971)",2022-240068,36,37,4446,670,22 +2022,256419,Crown Hall,Not Submitted,3360 S State Street,60616,0.0,false,DOUGLAS,,54291,,,,,,,,,,,,,,,,41.83292416819927,-87.62680274146655,"(41.83292416819927, -87.62680274146655)",2022-256419,9,1,21194,25,12 +2022,100856,United Center,Not Submitted,1901 W Madison St,60612,0.0,false,NEAR WEST SIDE,,2289000,,,,,,,,,,,,,,,,41.881241456803345,-87.67428253395332,"(41.881241456803345, -87.67428253395332)",2022-100856,46,29,21184,90,41 +2022,160196,The Art Institute of Chicago,Submitted Data,111 S Michigan Ave,60603,1.0,false,LOOP,Museum,1008416,1892,1,,,80968968.1,158224778.6,0,0,,237.2,389.6,239.3,389.5,19068.8,18.9,41.880527821930805,-87.62420946585881,"(41.880527821930805, -87.62420946585881)",2022-160196,36,38,14311,367,22 From 05af0f85ea2cd6c7b124de2915413ab53fd995a3 Mon Sep 17 00:00:00 2001 From: Patrick Dorn Date: Tue, 7 May 2024 21:30:58 -0500 Subject: [PATCH 2/2] variable renames per feedback --- tests/data/scripts/create_test_data.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/data/scripts/create_test_data.py b/tests/data/scripts/create_test_data.py index c7d3f9a7..c09b6dc1 100644 --- a/tests/data/scripts/create_test_data.py +++ b/tests/data/scripts/create_test_data.py @@ -9,7 +9,7 @@ src_input_file = 'ChicagoEnergyBenchmarking.csv' test_input_file = 'test_src_data.csv' -properties_to_include = [ +property_ids_to_include = [ '100856', # United Center '256419', # Crown Hall '160196', # The Art Institute of Chicago @@ -17,7 +17,7 @@ '240068', # random property w/ submitted data and no GHGIntensity data ] -def write_test_sample(reader: csv.reader, writer: csv.writer, properties_to_include: List[str]) -> csv.writer: +def write_test_sample(reader: csv.reader, writer: csv.writer, property_ids_to_include: List[str]) -> csv.writer: header_row = next(reader) if len(header_row) <= 0: raise EOFError('ChicagoEnergyBenchmarking CSV file is empty!') @@ -25,7 +25,7 @@ def write_test_sample(reader: csv.reader, writer: csv.writer, properties_to_incl writer.writerow(header_row) for row in reader: property_id = row[1] - if property_id in properties_to_include: + if property_id in property_ids_to_include: writer.writerow(row) def main(): @@ -41,7 +41,7 @@ def main(): csvfile = open(get_test_file_path(target_path), 'w') test_file = csv.writer(csvfile) - write_test_sample(src_csv, test_file, properties_to_include) + write_test_sample(src_csv, test_file, property_ids_to_include) print('Copied source data from', src_path) print('Copied test data to', target_path)