ihmeuw · collijk · Dec 28, 2024 · Dec 28, 2024 · Dec 28, 2024 · Dec 28, 2024
diff --git a/.cruft.json b/.cruft.json
@@ -1,6 +1,6 @@
 {
   "template": "https://github.com/collijk/python-package-cookiecutter",
-  "commit": "7e9285f84cc6b52165dbc97b9a0d4f059d0f6818",
+  "commit": "cd59edc69d51f9485bca69eb940e0706bc65e9ba",
   "checkout": null,
   "context": {
     "cookiecutter": {
@@ -11,7 +11,8 @@
       "project_slug": "climate-data",
       "package_name": "climate_data",
       "project_short_description": "Pipelines to extract, format, and downscale ERA5 and CMIP6 data.",
-      "_template": "https://github.com/collijk/python-package-cookiecutter"
+      "_template": "https://github.com/collijk/python-package-cookiecutter",
+      "_commit": "cd59edc69d51f9485bca69eb940e0706bc65e9ba"
     }
   },
   "directory": null

diff --git a/.github/alternative_workflows/build_docs.yml b/.github/alternative_workflows/build_docs.yml
@@ -0,0 +1,21 @@
+name: Build and Deploy Docs
+
+on:
+  workflow_dispatch:
+  pull_request:
+    branches:
+      - main
+    types:
+      - closed
+
+jobs:
+  build-and-deploy-docs:
+    if: ${{ github.event.pull_request.merged }} or ${{ github.event_name == 'workflow_dispatch' }}
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          token: ${{ secrets.GH_TOKEN }}
+      - uses: ./.github/actions/python-poetry-env
+      - name: Deploy docs
+        run: poetry run mkdocs gh-deploy --force
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -1,6 +1,6 @@
 repos:
 -   repo: https://github.com/pre-commit/pre-commit-hooks
-    rev: v4.0.1
+    rev: v5.0.0
     hooks:
       - id: check-ast
       - id: check-added-large-files

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -52,24 +52,24 @@ pydantic = "^2.10.4"
 
 
 [tool.poetry.group.dev.dependencies]
-mkdocstrings = {version = ">=0.23", extras = ["python"]}
+mkdocstrings = {version = "*", extras = ["python"]}
 mkdocs-material = "*"
 mkdocs-table-reader-plugin = "*"
-mkdocs-gen-files = "^0.5.0"
+mkdocs-gen-files = "*"
 mypy = "*"
 pre-commit = "*"
 pymdown-extensions = "*"
 pytest = "*"
 pytest-github-actions-annotate-failures = "*"
 pytest-cov = "*"
 python-kacl = "*"
-ruff = ">=0.2.0"
-pandas-stubs = "^2.2.3.241009"
-types-pyyaml = "^6.0.12.20240311"
-types-requests = "^2.31.0.20240406"
-types-tqdm = "^4.66.0.20240417"
-mkdocs-literate-nav = "^0.6.1"
-mkdocs-section-index = "^0.3.9"
+ruff = "*"
+pandas-stubs = "*"
+types-pyyaml = "*"
+types-requests = "*"
+types-tqdm = "*"
+mkdocs-literate-nav = "*"
+mkdocs-section-index = "*"
 
 
 [build-system]
@@ -111,6 +111,7 @@ ignore = [
     "FBT002",   # Boolean positional args are super common in clis
     "PD901",    # Generic df names are fine
     "S311",     # Not using random numbers for crypto purposes
+    "S101",     # Use of `assert` detected
 ]
 
 [tool.ruff.lint.per-file-ignores]

diff --git a/scripts/gen_data_pages.py b/scripts/gen_data_pages.py
@@ -186,7 +186,7 @@
 
     - Daily Storage Root: `{cdata.daily_results}`
     - Naming Convention: `{{SCENARIO}}/{{DAILY_VARIABLE}}/{{YEAR}}.nc` (historical data only)
-        - `{{SCENARIO}}`: Generally, only historical data is available at the daily level, so this will be `historical`. 
+        - `{{SCENARIO}}`: Generally, only historical data is available at the daily level, so this will be `historical`.
         - `{{DAILY_VARIABLE}}`: The name of the variable being stored.
         - `{{YEAR}}`: The year of the data being stored.
     - Annual Storage Root: `{cdata.results}`

diff --git a/src/climate_data/cli_options.py b/src/climate_data/cli_options.py
@@ -25,7 +25,6 @@
 
 from climate_data import constants as cdc
 
-
 _T = TypeVar("_T")
 _P = ParamSpec("_P")
 
@@ -109,6 +108,7 @@ def with_cmip6_experiment(
         help="CMIP6 experiment to extract.",
     )
 
+
 def with_cmip6_variable(
     *,
     allow_all: bool = False,
@@ -147,6 +147,7 @@ def with_draw(
         help="Draw to process.",
     )
 
+
 def with_scenario(
     *,
     allow_all: bool = False,
@@ -168,24 +169,24 @@ def with_overwrite() -> ClickOption[_P, _T]:
 
 
 __all__ = [
-    "with_year",
-    "with_month",
-    "with_era5_variable",
-    "with_era5_dataset",
-    "with_cmip6_source",
+    "RUN_ALL",
+    "ClickOption",
+    "with_choice",
     "with_cmip6_experiment",
-    "with_target_variable",
-    "with_draw",
-    "with_scenario",
-    "with_overwrite",
-    "with_output_directory",
-    "with_queue",
-    "with_verbose",
+    "with_cmip6_source",
     "with_debugger",
+    "with_draw",
+    "with_era5_dataset",
+    "with_era5_variable",
     "with_input_directory",
+    "with_month",
     "with_num_cores",
+    "with_output_directory",
+    "with_overwrite",
     "with_progress_bar",
-    "RUN_ALL",
-    "ClickOption",
-    "with_choice",
+    "with_queue",
+    "with_scenario",
+    "with_target_variable",
+    "with_verbose",
+    "with_year",
 ]
diff --git a/src/climate_data/constants.py b/src/climate_data/constants.py
@@ -1,10 +1,9 @@
 from pathlib import Path
-from typing import NamedTuple, Literal
+from typing import Literal, NamedTuple
 
-from pydantic import BaseModel
 import numpy as np
 import xarray as xr
-
+from pydantic import BaseModel
 
 ##############
 # File roots #
@@ -40,13 +39,16 @@
 
 # Extraction Constants
 
+
 class _ERA5Datasets(NamedTuple):
     # Use named tuple so that we can access the dataset names as attributes
     reanalysis_era5_land: str = "reanalysis-era5-land"
     reanalysis_era5_single_levels: str = "reanalysis-era5-single-levels"
 
+
 ERA5_DATASETS = _ERA5Datasets()
 
+
 class _ERA5Variables(NamedTuple):
     u_component_of_wind: str = "10m_u_component_of_wind"
     v_component_of_wind: str = "10m_v_component_of_wind"
@@ -56,6 +58,7 @@ class _ERA5Variables(NamedTuple):
     total_precipitation: str = "total_precipitation"
     sea_surface_temperature: str = "sea_surface_temperature"
 
+
 ERA5_VARIABLES = _ERA5Variables()
 
 CMIP6_SOURCES = [
@@ -83,11 +86,13 @@ class _ERA5Variables(NamedTuple):
     "NorESM2-MM",
 ]
 
+
 class _CMIP6Experiments(NamedTuple):
     ssp126: str = "ssp126"
     ssp245: str = "ssp245"
     ssp585: str = "ssp585"
 
+
 CMIP6_EXPERIMENTS = _CMIP6Experiments()
 
 
@@ -161,7 +166,7 @@ def names(self) -> list[str]:
         return [v.name for v in self]
 
     def get(self, name: str) -> CMIP6Variable:
-        return getattr(self, name)
+        return getattr(self, name)  # type: ignore[no-any-return]
 
     def to_dict(self) -> dict[str, CMIP6Variable]:
         return {v.name: v for v in self}
@@ -181,10 +186,12 @@ def to_dict(self) -> dict[str, CMIP6Variable]:
 
 DRAWS = [str(d) for d in range(100)]
 
+
 class _Scenarios(NamedTuple):
     historical: str = "historical"
     ssp126: str = "ssp126"
     ssp245: str = "ssp245"
     ssp585: str = "ssp585"
 
+
 SCENARIOS = _Scenarios()
diff --git a/src/climate_data/downscale/prepare_predictors.py b/src/climate_data/downscale/prepare_predictors.py
@@ -1,6 +1,6 @@
-from typing import ParamSpec, TypeVar
 from collections.abc import Sequence
 from pathlib import Path
+from typing import ParamSpec, TypeVar
 
 import click
 import numpy as np
@@ -9,12 +9,13 @@
 
 from climate_data import (
     cli_options as clio,
+)
+from climate_data import (
     constants as cdc,
 )
 from climate_data.data import ClimateData
 from climate_data.utils import make_raster_template
 
-
 _T = TypeVar("_T")
 _P = ParamSpec("_P")
 
@@ -98,7 +99,9 @@ def load_lcz_data(
 
 
 def prepare_predictors_main(
-    lat_start: str | int, lon_start: str | int, output_dir: str | Path,
+    lat_start: str | int,
+    lon_start: str | int,
+    output_dir: str | Path,
 ) -> None:
     lat_start = int(lat_start)
     lon_start = int(lon_start)
@@ -162,8 +165,8 @@ def prepare_predictors(output_dir: str, queue: str) -> None:
         runner="cdtask",
         task_name="downscale prepare_predictors",
         node_args={
-            "lat-start": clio.LATITUDES,
-            "lon-start": clio.LONGITUDES,
+            "lat-start": LATITUDES,
+            "lon-start": LONGITUDES,
         },
         task_args={
             "output-dir": output_dir,

diff --git a/src/climate_data/downscale/prepare_training_data.py b/src/climate_data/downscale/prepare_training_data.py
@@ -9,6 +9,8 @@
 
 from climate_data import (
     cli_options as clio,
+)
+from climate_data import (
     constants as cdc,
 )
 from climate_data.data import ClimateData

diff --git a/src/climate_data/extract/cmip6.py b/src/climate_data/extract/cmip6.py
@@ -12,6 +12,8 @@
 
 from climate_data import (
     cli_options as clio,
+)
+from climate_data import (
     constants as cdc,
 )
 from climate_data.data import ClimateData
@@ -40,7 +42,7 @@ def extract_cmip6_main(
     cdata = ClimateData(output_dir)
     meta = cdata.load_cmip6_metadata()
 
-    *_, offset, scale, table_id =  cdc.CMIP6_VARIABLES.get(cmip6_variable)
+    *_, offset, scale, table_id = cdc.CMIP6_VARIABLES.get(cmip6_variable)
 
     mask = (
         (meta.source_id == cmip6_source)
@@ -106,10 +108,10 @@ def extract_cmip6_task(
     overwrite: bool,
 ) -> None:
     extract_cmip6_main(
-        cmip6_variable, 
-        cmip6_experiment, 
+        cmip6_variable,
+        cmip6_experiment,
         cmip6_source,
-        output_dir, 
+        output_dir,
         overwrite,
     )
 
@@ -138,14 +140,16 @@ def extract_cmip6(
     capture model inclusion criteria as it does not account for the year range avaialable
     in the data. This determiniation is made when we proccess the data in later steps.
     """
-    sources = (
-        cdc.CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source]
-    )
+    sources = cdc.CMIP6_SOURCES if cmip6_source == clio.RUN_ALL else [cmip6_source]
     experiments = (
-        cdc.CMIP6_EXPERIMENTS if cmip6_experiment == clio.RUN_ALL else [cmip6_experiment]
+        cdc.CMIP6_EXPERIMENTS
+        if cmip6_experiment == clio.RUN_ALL
+        else [cmip6_experiment]
     )
     variables = (
-        cdc.CMIP6_VARIABLES.names() if cmip6_variable == clio.RUN_ALL else [cmip6_variable]
+        cdc.CMIP6_VARIABLES.names()
+        if cmip6_variable == clio.RUN_ALL
+        else [cmip6_variable]
     )
 
     overwrite_arg = {"overwrite": None} if overwrite else {}
@@ -154,8 +158,8 @@ def extract_cmip6(
         runner="cdtask",
         task_name="extract cmip6",
         node_args={
-            "cmip6-source": sources,
-            "cmip6-experiment": experiments,
+            "cmip6-source": list(sources),
+            "cmip6-experiment": list(experiments),
             "cmip6-variable": variables,
         },
         task_args={

diff --git a/src/climate_data/extract/elevation.py b/src/climate_data/extract/elevation.py
@@ -7,6 +7,8 @@
 
 from climate_data import (
     cli_options as clio,
+)
+from climate_data import (
     constants as cdc,
 )
 from climate_data.data import ClimateData
@@ -91,7 +93,7 @@ def extract_elevation_task(
         msg = "Downloaded using aws cli, this implementation is not valid"
         raise NotImplementedError(msg)
 
-    extract_elevation_main(output_dir, model_name, lat_start, lon_start)
+    extract_elevation_main(model_name, lat_start, lon_start, output_dir)
 
 
 @click.command()  # type: ignore[arg-type]