From 0a28506e1295615d13e13ef86f8bef97582eb1ad Mon Sep 17 00:00:00 2001
From: alexkcode <alexiskwan.uiuc@gmail.com>
Date: Tue, 5 Mar 2024 18:55:53 -0600
Subject: [PATCH] refactoring of tests continued

---
 tests/data/scripts/create_test_data.py          |  4 ++--
 tests/data/scripts/unit/test_clean_all_years.py | 16 +++++++++-------
 tests/data/scripts/utils.py                     |  9 +++++++--
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/tests/data/scripts/create_test_data.py b/tests/data/scripts/create_test_data.py
index 82c9b6fa..7041b2cc 100644
--- a/tests/data/scripts/create_test_data.py
+++ b/tests/data/scripts/create_test_data.py
@@ -2,7 +2,7 @@
 import pandas as pd, numpy as np
 from typing import List
 from src.data.scripts.utils import get_and_clean_csv
-from tests.data.scripts.utils import get_file_path
+from tests.data.scripts.utils import get_test_file_path
 
 src_dir = 'src'
 test_dir = 'tests'
@@ -12,7 +12,7 @@
 property_test_cases = ['United Center', 'Crown Hall', 'Art Institute', 'Marie Curie']
 
 def csv_rows() -> csv.reader:
-    csvfile = open(get_file_path(test_dir, test_input_file))
+    csvfile = open(get_test_file_path(test_dir, test_input_file))
     return csv.reader(csvfile) 
 
 def get_test_sample(src_data: csv.reader, property_test_cases: List[str]) -> csv.writer:
diff --git a/tests/data/scripts/unit/test_clean_all_years.py b/tests/data/scripts/unit/test_clean_all_years.py
index 2d1dfd63..c24e559c 100644
--- a/tests/data/scripts/unit/test_clean_all_years.py
+++ b/tests/data/scripts/unit/test_clean_all_years.py
@@ -5,7 +5,7 @@
 
 from src.data.scripts.utils import get_and_clean_csv
 from src.data.scripts import clean_and_pare_down_data_all_years as clean, process_data as proc
-from tests.data.scripts.utils import get_file_path
+from tests.data.scripts.utils import get_test_file_path, get_src_file_path
 
 src_dir = 'src'
 test_dir = 'tests'
@@ -15,17 +15,19 @@
 
 @pytest.fixture
 def src_building_data() -> pd.DataFrame:
-    test_data_path = get_file_path(test_dir, test_input_file)
+    test_data_path = get_test_file_path(test_input_file)
     assert os.path.exists(test_data_path)
     return get_and_clean_csv(test_data_path)
 
 @pytest.fixture
 def csv_file() -> csv.reader:
-    csvfile = open(get_file_path(test_dir, test_input_file))
+    csvfile = open(get_test_file_path(test_input_file))
     return csv.reader(csvfile) 
 
-def test_csv_file_is_readable(csv_file):
-    csv_file
+def test_csv_file_has_some_data(csv_file):
+    first_line = csv_file.__next__()
+    assert first_line
+    assert len(first_line) > 0
 
 @pytest.mark.parametrize("test_input", [
     clean.string_cols,
@@ -77,13 +79,13 @@ def test_int_values_remain_the_same_as_origin(test_has_last_year_of_data):
     assert np.all(df[clean.int_cols].dtypes == 'Int64')
 
 def test_csv_is_produced(test_has_last_year_of_data):
-    out_file = get_file_path(test_dir, test_output_file)
+    out_file = get_test_file_path(test_output_file)
     clean.output_to_csv(test_has_last_year_of_data, out_file)
     assert os.path.exists(out_file)
 
 @pytest.fixture
 def process():
-    return clean.process(get_file_path(src_dir, src_input_file))
+    return clean.process(get_src_file_path(src_input_file))
 
 def test_data_has_ranking_columns(process):
     for col in proc.building_cols_to_rank:
diff --git a/tests/data/scripts/utils.py b/tests/data/scripts/utils.py
index f2857765..36ad00f3 100644
--- a/tests/data/scripts/utils.py
+++ b/tests/data/scripts/utils.py
@@ -1,6 +1,11 @@
 import os, pathlib
 
-def get_file_path(dir: str, f: str):
+def get_test_file_path(f: str):
     curr_path = pathlib.Path(".")
-    path = curr_path.parent.absolute() / dir / "data" / "source"
+    path = curr_path.parent.absolute() / 'tests' / 'data' / 'source'
+    return os.path.join(path, f)
+
+def get_src_file_path(f: str):
+    curr_path = pathlib.Path(".")
+    path = curr_path.parent.absolute() / 'src' / 'data' / 'source'
     return os.path.join(path, f)
\ No newline at end of file