diff --git a/lib/projections.tsx b/lib/projections.tsx index 581a070..d4631a5 100644 --- a/lib/projections.tsx +++ b/lib/projections.tsx @@ -17,7 +17,7 @@ const months = { } // eslint-disable-next-line @typescript-eslint/no-inferrable-types -const latestMonth: number = 3 +const latestMonth: number = 5 const glueWord = latestMonth == 2 ? "and" : "through" const observedMonths = latestMonth == 1 diff --git a/python/housing_data/build_data_utils.py b/python/housing_data/build_data_utils.py index ed325ce..8cdc0f2 100644 --- a/python/housing_data/build_data_utils.py +++ b/python/housing_data/build_data_utils.py @@ -80,8 +80,8 @@ def get_numerical_columns( CANADA_POPULATION_DIR = Path("data", "canada-population") # Last year and month for which monthly BPS data is available (and is cloned to housing-data-data). -LATEST_MONTH = (2024, 3) -LAST_YEAR_ANNUAL_DATA_RELEASED = False +LATEST_MONTH = (2024, 5) +LAST_YEAR_ANNUAL_DATA_RELEASED = True def write_to_json_directory(df: pd.DataFrame, path: Path) -> None: diff --git a/python/housing_data/california_hcd_data.py b/python/housing_data/california_hcd_data.py index 969d2f5..3b42d60 100644 --- a/python/housing_data/california_hcd_data.py +++ b/python/housing_data/california_hcd_data.py @@ -121,13 +121,18 @@ def _aggregate_to_geography( assert (wide_df[["JURS_NAME", "year"]].value_counts() == 1).all() wide_df = wide_df.drop(columns=["CNTY_NAME"]) if level == "place": - old_rows = len(wide_df) + old_wide_df = wide_df # Add place_or_county_code wide_df = wide_df.merge( _load_fips_crosswalk(data_path), left_on="JURS_NAME", right_on="name" ).drop(columns=["name", "county_code"]) - new_rows = len(wide_df) - assert old_rows == new_rows, f"{old_rows=} != {new_rows=}" + if len(old_wide_df) != len(wide_df): + dropped_cities = set(old_wide_df["JURS_NAME"]) - set(wide_df["JURS_NAME"]) + added_cities = set(wide_df["JURS_NAME"]) - set(old_wide_df["JURS_NAME"]) + raise ValueError( + f"wide_df had {len(old_wide_df)} rows before merge and {len(wide_df)} rows after merge. " + f"{dropped_cities=} {added_cities=}" + ) elif level == "county": # Add county_code old_rows = len(wide_df) @@ -167,6 +172,11 @@ def _load_fips_crosswalk(data_path: Path) -> pd.DataFrame: "Carmel-by-the-Sea": "CARMEL", "La CaƱada Flintridge": "LA CANADA FLINTRIDGE", "Angels": "ANGELS CAMP", + # The crosswalk has a city called "Amador City city". + # I think the BPS data cleaning code messes this city up and shortens it to just "Amador". + # This is wrong/we should probably fix it like we fixed Jersey City, but for now + # let's just change it "AMADOR" to fix the "rows dropped in merge" error + "Amador City": "AMADOR", } ) .str.upper()