diff --git a/.github/workflows/Python tests.yml b/.github/workflows/Python tests.yml index d73e0a92..f2a831db 100644 --- a/.github/workflows/Python tests.yml +++ b/.github/workflows/Python tests.yml @@ -81,7 +81,7 @@ jobs: run: uv run pytest --cov=./python/pdstools --cov-report=xml --cov-config=./python/tests/.coveragerc --ignore=python/tests/test_healthcheck.py --ignore=python/tests/test_ADMTrees.py - name: Upload coverage reports to Codecov - uses: codecov/codecov-action@v5.0.7 + uses: codecov/codecov-action@v5.1.1 with: token: ${{ secrets.CODECOV_TOKEN }} fail_ci_if_error: false diff --git a/.gitignore b/.gitignore index d70b21aa..af985fde 100644 --- a/.gitignore +++ b/.gitignore @@ -34,3 +34,4 @@ python/*.ipynb_checkpoints/* **/META-INF/* r/tests/testthat/d/tmp2 **/cache +.venv diff --git a/examples/articles/ADMExplained.ipynb b/examples/articles/ADMExplained.ipynb index 90053430..10ac0c80 100644 --- a/examples/articles/ADMExplained.ipynb +++ b/examples/articles/ADMExplained.ipynb @@ -944,8 +944,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.12.3" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index f07ff425..00f7d9d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -26,20 +26,18 @@ classifiers = [ keywords = [ "pega", "pegasystems", - "pds", "pdstools", - "cdhtools", "datascientist", "tools", ] requires-python = ">=3.9" -dependencies = ['polars>=1.9', 'typing_extensions'] +dependencies = ['polars==1.16', 'typing_extensions'] [tool.setuptools.dynamic] version = {attr="pdstools.__version__"} [project.optional-dependencies] -adm = ['plotly>=5.5.0'] +adm = ['plotly[express]>=6.0.0rc0', 'requests'] pega_io = ['aioboto3', 'polars_hash'] api = ['httpx', 'pydantic', 'anyio'] healthcheck = ['pdstools[adm]', 'great_tables>=0.13', 'quarto', 'papermill', 'xlsxwriter>=3.0', 'pydot'] diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py index 863e8ad2..0005ef6e 100644 --- a/python/pdstools/adm/ADMDatamart.py +++ b/python/pdstools/adm/ADMDatamart.py @@ -354,8 +354,8 @@ def _validate_predictor_data( def apply_predictor_categorization( self, df: Optional[pl.LazyFrame] = None, - categorization: Optional[ - Union[pl.Expr, Callable[..., pl.Expr]] + categorization: Union[ + pl.Expr, Callable[..., pl.Expr] ] = cdh_utils.default_predictor_categorization, ): """Apply a new predictor categorization to the datamart tables @@ -381,25 +381,35 @@ def apply_predictor_categorization( See also -------- - pdstools.utils.cdh_utils.default_predictor_categorization : The default + pdstools.utils.cdh_utils.default_predictor_categorization : The default method Examples -------- - >>> #TODO + >>> dm = ADMDatamart(my_data) #uses the OOTB predictor categorization + + >>> dm.apply_predictor_categorization(categorization=pl.when( + >>> pl.col("PredictorName").cast(pl.Utf8).str.contains("Propensity") + >>> ).then(pl.lit("External Model") + >>> ).otherwise(pl.lit("Adaptive Model)") + + >>> # Now, every subsequent plot will use the custom categorization """ - if callable(categorization): - categorization: pl.Expr = categorization() + + categorization_expr: pl.Expr = ( + categorization() if callable(categorization) else categorization + ) + if df is not None: - return df.with_columns(PredictorCategory=categorization) + return df.with_columns(PredictorCategory=categorization_expr) if hasattr(self, "predictor_data") and self.predictor_data is not None: self.predictor_data = self.predictor_data.with_columns( - PredictorCategory=categorization + PredictorCategory=categorization_expr ) if hasattr(self, "combined_data") and self.combined_data is not None: self.combined_data = self.combined_data.with_columns( - PredictorCategory=categorization + PredictorCategory=categorization_expr ) def save_data( diff --git a/python/pdstools/adm/Plots.py b/python/pdstools/adm/Plots.py index 7fcfcb09..aa511c3f 100644 --- a/python/pdstools/adm/Plots.py +++ b/python/pdstools/adm/Plots.py @@ -174,6 +174,7 @@ def distribution_graph(df: pl.LazyFrame, title: str): class Plots(LazyNamespace): dependencies = ["plotly"] + dependency_group = "adm" def __init__(self, datamart: "ADMDatamart"): self.datamart = datamart @@ -295,15 +296,15 @@ def over_time( metric_formatting = { "SuccessRate_weighted_average": ":.4%", - "Performance_weighted_average": ":.2", # is not a percentage! + "Performance_weighted_average": ":.2", # is not a percentage! "Positives": ":.d", "ResponseCount": ":.d", } if metric == "Performance": - metric_scaling:pl.Expr = pl.lit(100.0) + metric_scaling: pl.Expr = pl.lit(100.0) else: - metric_scaling:pl.Expr = pl.lit(1.0) + metric_scaling: pl.Expr = pl.lit(1.0) if self.datamart.model_data is None: raise ValueError("Visualisation requires model_data") @@ -333,9 +334,10 @@ def over_time( "SnapshotTime", every=every, group_by=grouping_columns ) .agg( - (metric_scaling*cdh_utils.weighted_average_polars( - metric, "ResponseCount" - )).name.suffix("_weighted_average") + ( + metric_scaling + * cdh_utils.weighted_average_polars(metric, "ResponseCount") + ).name.suffix("_weighted_average") ) .sort("SnapshotTime", by_col) ) @@ -660,6 +662,10 @@ def predictor_performance( Whether to facet the plot into subplots, by default None return_df : bool, optional Whether to return a dataframe instead of a plot, by default False + + See also + -------- + pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization """ metric = "PredictorPerformance" if metric == "Performance" else metric @@ -762,6 +768,31 @@ def predictor_category_performance( facet: Optional[Union[pl.Expr, str]] = None, return_df: bool = False, ): + """Plot the predictor category performance + + Parameters + ---------- + metric : str, optional + The metric to plot, by default "Performance" + active_only : bool, optional + Whether to only analyze active predictors, by default False + query : Optional[QUERY], optional + An optional query to apply, by default None + facet : Optional[Union[pl.Expr, str]], optional + By which columns to facet the result, by default None + return_df : bool, optional + An optional flag to get the dataframe instead, by default False + + Returns + ------- + px.Figure + A Plotly figure + + + See also + -------- + pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization + """ metric = "PredictorPerformance" if metric == "Performance" else metric # Determine columns to select and grouping @@ -847,6 +878,26 @@ def predictor_contribution( query: Optional[QUERY] = None, return_df: bool = False, ): + """Plots the predictor contribution for each configuration + + Parameters + ---------- + by : str, optional + By which column to plot the contribution, by default "Configuration" + query : Optional[QUERY], optional + An optional query to apply to the data, by default None + return_df : bool, optional + An optional flag to get a Dataframe instead, by default False + + Returns + ------- + px.Figure + A plotly figure + + See also + -------- + pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization + """ df = ( cdh_utils._apply_query( self.datamart.aggregates.last(table="combined_data"), diff --git a/python/pdstools/infinity/__init__.py b/python/pdstools/infinity/__init__.py index 4b429270..01b04b1e 100644 --- a/python/pdstools/infinity/__init__.py +++ b/python/pdstools/infinity/__init__.py @@ -1,9 +1,48 @@ """ -My module docstring - -Does this work? +Infinity API client for Pega Decision Management. """ -from .client import AsyncInfinity, Infinity +from importlib.util import find_spec +from typing import TYPE_CHECKING, List + +from ..utils.namespaces import MissingDependenciesException + +if TYPE_CHECKING: + from .client import Infinity + + +class DependencyNotFound: + def __init__(self, dependencies: List[str]): + self.dependencies = dependencies + self.namespace = "the DX API Client" + self.deps_group = "api" + + def __repr__(self): + return f"While importing, one or more dependencies were not found: {self.dependencies}" + + def __call__(self): + raise MissingDependenciesException( + self.dependencies, namespace=self.namespace, deps_group=self.deps_group + ) + + +def __getattr__(name: str): + """Lazy import to avoid loading httpx until needed.""" + if name == "Infinity": + missing_dependencies: List[str] = [] + if not find_spec("pydantic"): + missing_dependencies.append("pydantic") + if not find_spec("httpx"): + missing_dependencies.append("httpx") + + if missing_dependencies: + return DependencyNotFound(missing_dependencies) + + from .client import Infinity + + return Infinity + + raise AttributeError(f"module '{__name__}' has no attribute '{name}'") + -__all__ = ["Infinity", "AsyncInfinity"] +__all__ = ["Infinity"] \ No newline at end of file diff --git a/python/pdstools/pega_io/API.py b/python/pdstools/pega_io/API.py index 65f090ec..f2934f38 100644 --- a/python/pdstools/pega_io/API.py +++ b/python/pdstools/pega_io/API.py @@ -1,7 +1,5 @@ from os import PathLike -import requests - def _read_client_credential_file(credential_file: PathLike): # pragma: no cover outputdict = {} @@ -37,6 +35,8 @@ def get_token(credential_file: PathLike, verify: bool = True): # pragma: no cov explicitly set verify to False, otherwise Python will yell at you. """ + import requests + creds = _read_client_credential_file(credential_file) response = requests.post( url=creds["Access token endpoint"], diff --git a/python/pdstools/pega_io/File.py b/python/pdstools/pega_io/File.py index ae4082b2..0b042767 100644 --- a/python/pdstools/pega_io/File.py +++ b/python/pdstools/pega_io/File.py @@ -12,7 +12,6 @@ from typing import Iterable, List, Literal, Optional, Tuple, Union, overload import polars as pl -import requests from ..utils.cdh_utils import from_prpc_date_time @@ -94,6 +93,8 @@ def read_ds_export( logging.debug("Could not find file in directory, checking if URL") try: + import requests + response = requests.get(f"{path}/{filename}") logging.info(f"Response: {response}") if response.status_code == 200: @@ -102,6 +103,11 @@ def read_ds_export( file = BytesIO(urllib.request.urlopen(file).read()) _, extension = os.path.splitext(filename) + except ImportError: + warnings.warn( + "Unable to import `requests`, so not able to check for remote files. If you're trying to read in a file from the internet (or, for instance, using the built-in cdh_sample method), try installing the 'requests' package (`uv pip install requests`)" + ) + except Exception as e: logging.info(e) if verbose: @@ -162,19 +168,19 @@ def import_file( if extension == ".json": try: - if isinstance(file, BytesIO): - from pyarrow import json - - return pl.LazyFrame( - json.read_json( - file, - ) - ) - else: - return pl.scan_ndjson( - file, - infer_schema_length=reading_opts.pop("infer_schema_length", 10000), - ) + # if isinstance(file, BytesIO): + # from pyarrow import json + + # return pl.LazyFrame( + # json.read_json( + # file, + # ) + # ) + # else: + return pl.scan_ndjson( + file, + infer_schema_length=reading_opts.pop("infer_schema_length", 10000), + ) except Exception: # pragma: no cover try: return pl.read_json(file).lazy() diff --git a/python/pdstools/prediction/Prediction.py b/python/pdstools/prediction/Prediction.py index 9a55b410..11f202a1 100644 --- a/python/pdstools/prediction/Prediction.py +++ b/python/pdstools/prediction/Prediction.py @@ -201,7 +201,6 @@ def responsecount_trend( result.update_layout(yaxis_title="Responses") return result - class Prediction: """Monitor Pega Prediction Studio Predictions""" diff --git a/python/pdstools/reports/HealthCheck.qmd b/python/pdstools/reports/HealthCheck.qmd index 82442ee8..5c29400b 100644 --- a/python/pdstools/reports/HealthCheck.qmd +++ b/python/pdstools/reports/HealthCheck.qmd @@ -1920,4 +1920,5 @@ except Exception as e: # unfortunately no way to get the quarto source file name, so that is hardcoded report_utils.show_credits("pega-datascientist-tools/python/pdstools/reports/HealthCheck.qmd") + ``` \ No newline at end of file