Skip to content

Commit

Permalink
Merge pull request #80 from vkoves/test-automation
Browse files Browse the repository at this point in the history
Fix Python Data Tests & Add to CI Via GitHub Actions
  • Loading branch information
vkoves authored Apr 2, 2024
2 parents d6afdc2 + 534ab06 commit ac2d048
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 11 deletions.
4 changes: 0 additions & 4 deletions .github/workflows/eslint.yml
Original file line number Diff line number Diff line change
@@ -1,7 +1,3 @@
# This workflow uses actions that are not certified by GitHub.
# They are provided by a third-party and are governed by
# separate terms of service, privacy policy, and support
# documentation.
# ESLint is a tool for identifying and reporting on patterns
# found in ECMAScript/JavaScript code.
# More details at https://github.com/eslint/eslint
Expand Down
28 changes: 28 additions & 0 deletions .github/workflows/pytest.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# A GitHub action to run our data tests

name: Pytest Data Tests

on:
push:
branches: [ "main" ]
pull_request:
# The branches below must be a subset of the branches above
branches: [ "main" ]

jobs:
pytest:
name: 'Pytest'
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: '3.9'
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r requirements.txt
- name: Test with pytest
run: |
pytest
30 changes: 23 additions & 7 deletions tests/data/scripts/unit/test_clean_all_years.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ def src_building_data() -> pd.DataFrame:
@pytest.fixture
def csv_file() -> csv.reader:
csvfile = open(get_test_file_path(test_input_file))
return csv.reader(csvfile)
return csv.reader(csvfile)

def test_csv_file_has_some_data(csv_file):
first_line = csv_file.__next__()
Expand All @@ -48,7 +48,7 @@ def test_columns_are_renamed(src_building_data) -> pd.DataFrame:
return df

def test_data_has_positive_ghg_data(test_columns_are_renamed):
df = clean.get_all_ghg_data(test_columns_are_renamed)
df = clean.get_buildings_with_ghg_intensity(test_columns_are_renamed)
assert df is not None
assert np.all(df['GHGIntensity'] > 0)

Expand All @@ -64,12 +64,12 @@ def test_has_last_year_of_data(test_columns_are_renamed) -> pd.DataFrame:

@pytest.fixture
def fixed_strings(test_has_last_year_of_data, test_columns_are_renamed):
return clean.fix_str_cols(test_has_last_year_of_data,
return clean.fix_str_cols(test_has_last_year_of_data,
test_columns_are_renamed)

@pytest.fixture
def fixed_strings_all_years(test_columns_are_renamed):
return clean.fix_str_cols(test_columns_are_renamed,
return clean.fix_str_cols(test_columns_are_renamed,
test_columns_are_renamed)

def test_str_values_remain_the_same_as_origin(fixed_strings_all_years, csv_file):
Expand All @@ -79,16 +79,32 @@ def test_str_values_remain_the_same_as_origin(fixed_strings_all_years, csv_file)
year, id = csv_row[0], csv_row[1]
row = fixed_strings_all_years[(fixed_strings_all_years['ID'].astype(str) == id) & \
(fixed_strings_all_years['DataYear'].astype(str) == year)]

for col, csv_pos in zip(clean.string_cols, str_col_positions):
if all(pd.isna(row[col].to_numpy())):
continue
# print("df ", row[col].to_numpy(), "csv ", csv_row[csv_pos])
assert row[col].to_numpy()[0] == csv_row[csv_pos]

# The raw GPS in ChicagoEnergyBenchmarking.csv has 41.880451999999998, which gets
# truncated, so we round to ignore that, since it's not a significant difference
# TODO: Fix GPS inconsistency and drop rounding
csv_value = csv_row[csv_pos]


# If > 10 or < -10, we truncate 0 after rounding to 6 decimals. This means this applies
# to GPS coordinates but not energy star ratings (e.g.)
if (abs(float(csv_value)) > 10):
print("df ", row[col].to_numpy(), "csv ", csv_value)
csv_float = float(csv_value)
csv_val_parsed = f'{csv_float:.9f}'.rstrip('0').rstrip('.')
else:
csv_val_parsed = csv_value

assert row[col].to_numpy()[0] == csv_val_parsed

def test_lat_lon_become_strings(fixed_strings):
df = fixed_strings[['Latitude','Longitude']]
assert np.all(df.dtypes == 'string')

def test_int_values_remain_the_same_as_origin(test_has_last_year_of_data):
df = clean.fix_int_cols(test_has_last_year_of_data)
assert np.all(df[clean.int_cols].dtypes == 'Int64')
Expand Down

0 comments on commit ac2d048

Please sign in to comment.