Skip to content

Commit

Permalink
Patch optional imports, documented predictor categorization, revamped…
Browse files Browse the repository at this point in the history
… docs (#293)

* add pre-commit to dev deps, update docs

* update docs workflows

* try updated docs workflow

* see effects on docs

* try to ignore loggers

* doc revamp

* keep using doc push on every push while deving

* sphinxarg as dependency

* more conditions

* Bring back jupyter tags

* bring back vf cell tag too

* update predictor_binning

* update agb

* Update VF article

* update admexplained

* Add getting started guide

* Further docstring improvements

* Fix tests

* don't import from python

* Add inline dependencies to the cli

* Delete uv.lock

I don't want this part of the core lib (yet), may add it back later once we reach some more stability

* update release ci to use pypi token

* try v5 :)

* skip one test

* Release docs on public version release

* Fixed optional imports, documented predictor categorization

* tie polars to 1.16 - it was failing
  • Loading branch information
StijnKas authored Dec 9, 2024
1 parent 51ac723 commit a874e24
Show file tree
Hide file tree
Showing 11 changed files with 148 additions and 44 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/Python tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,7 @@ jobs:
run: uv run pytest --cov=./python/pdstools --cov-report=xml --cov-config=./python/tests/.coveragerc --ignore=python/tests/test_healthcheck.py --ignore=python/tests/test_ADMTrees.py

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v5.0.7
uses: codecov/codecov-action@v5.1.1
with:
token: ${{ secrets.CODECOV_TOKEN }}
fail_ci_if_error: false
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -34,3 +34,4 @@ python/*.ipynb_checkpoints/*
**/META-INF/*
r/tests/testthat/d/tmp2
**/cache
.venv
3 changes: 1 addition & 2 deletions examples/articles/ADMExplained.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -944,8 +944,7 @@
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
"pygments_lexer": "ipython3"
}
},
"nbformat": 4,
Expand Down
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,20 +26,18 @@ classifiers = [
keywords = [
"pega",
"pegasystems",
"pds",
"pdstools",
"cdhtools",
"datascientist",
"tools",
]
requires-python = ">=3.9"
dependencies = ['polars>=1.9', 'typing_extensions']
dependencies = ['polars==1.16', 'typing_extensions']

[tool.setuptools.dynamic]
version = {attr="pdstools.__version__"}

[project.optional-dependencies]
adm = ['plotly>=5.5.0']
adm = ['plotly[express]>=6.0.0rc0', 'requests']
pega_io = ['aioboto3', 'polars_hash']
api = ['httpx', 'pydantic', 'anyio']
healthcheck = ['pdstools[adm]', 'great_tables>=0.13', 'quarto', 'papermill', 'xlsxwriter>=3.0', 'pydot']
Expand Down
28 changes: 19 additions & 9 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -354,8 +354,8 @@ def _validate_predictor_data(
def apply_predictor_categorization(
self,
df: Optional[pl.LazyFrame] = None,
categorization: Optional[
Union[pl.Expr, Callable[..., pl.Expr]]
categorization: Union[
pl.Expr, Callable[..., pl.Expr]
] = cdh_utils.default_predictor_categorization,
):
"""Apply a new predictor categorization to the datamart tables
Expand All @@ -381,25 +381,35 @@ def apply_predictor_categorization(
See also
--------
pdstools.utils.cdh_utils.default_predictor_categorization : The default
pdstools.utils.cdh_utils.default_predictor_categorization : The default method
Examples
--------
>>> #TODO
>>> dm = ADMDatamart(my_data) #uses the OOTB predictor categorization
>>> dm.apply_predictor_categorization(categorization=pl.when(
>>> pl.col("PredictorName").cast(pl.Utf8).str.contains("Propensity")
>>> ).then(pl.lit("External Model")
>>> ).otherwise(pl.lit("Adaptive Model)")
>>> # Now, every subsequent plot will use the custom categorization
"""
if callable(categorization):
categorization: pl.Expr = categorization()

categorization_expr: pl.Expr = (
categorization() if callable(categorization) else categorization
)


if df is not None:
return df.with_columns(PredictorCategory=categorization)
return df.with_columns(PredictorCategory=categorization_expr)

if hasattr(self, "predictor_data") and self.predictor_data is not None:
self.predictor_data = self.predictor_data.with_columns(
PredictorCategory=categorization
PredictorCategory=categorization_expr
)
if hasattr(self, "combined_data") and self.combined_data is not None:
self.combined_data = self.combined_data.with_columns(
PredictorCategory=categorization
PredictorCategory=categorization_expr
)

def save_data(
Expand Down
63 changes: 57 additions & 6 deletions python/pdstools/adm/Plots.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,7 @@ def distribution_graph(df: pl.LazyFrame, title: str):

class Plots(LazyNamespace):
dependencies = ["plotly"]
dependency_group = "adm"

def __init__(self, datamart: "ADMDatamart"):
self.datamart = datamart
Expand Down Expand Up @@ -295,15 +296,15 @@ def over_time(

metric_formatting = {
"SuccessRate_weighted_average": ":.4%",
"Performance_weighted_average": ":.2", # is not a percentage!
"Performance_weighted_average": ":.2", # is not a percentage!
"Positives": ":.d",
"ResponseCount": ":.d",
}

if metric == "Performance":
metric_scaling:pl.Expr = pl.lit(100.0)
metric_scaling: pl.Expr = pl.lit(100.0)
else:
metric_scaling:pl.Expr = pl.lit(1.0)
metric_scaling: pl.Expr = pl.lit(1.0)

if self.datamart.model_data is None:
raise ValueError("Visualisation requires model_data")
Expand Down Expand Up @@ -333,9 +334,10 @@ def over_time(
"SnapshotTime", every=every, group_by=grouping_columns
)
.agg(
(metric_scaling*cdh_utils.weighted_average_polars(
metric, "ResponseCount"
)).name.suffix("_weighted_average")
(
metric_scaling
* cdh_utils.weighted_average_polars(metric, "ResponseCount")
).name.suffix("_weighted_average")
)
.sort("SnapshotTime", by_col)
)
Expand Down Expand Up @@ -660,6 +662,10 @@ def predictor_performance(
Whether to facet the plot into subplots, by default None
return_df : bool, optional
Whether to return a dataframe instead of a plot, by default False
See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""

metric = "PredictorPerformance" if metric == "Performance" else metric
Expand Down Expand Up @@ -762,6 +768,31 @@ def predictor_category_performance(
facet: Optional[Union[pl.Expr, str]] = None,
return_df: bool = False,
):
"""Plot the predictor category performance
Parameters
----------
metric : str, optional
The metric to plot, by default "Performance"
active_only : bool, optional
Whether to only analyze active predictors, by default False
query : Optional[QUERY], optional
An optional query to apply, by default None
facet : Optional[Union[pl.Expr, str]], optional
By which columns to facet the result, by default None
return_df : bool, optional
An optional flag to get the dataframe instead, by default False
Returns
-------
px.Figure
A Plotly figure
See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""
metric = "PredictorPerformance" if metric == "Performance" else metric

# Determine columns to select and grouping
Expand Down Expand Up @@ -847,6 +878,26 @@ def predictor_contribution(
query: Optional[QUERY] = None,
return_df: bool = False,
):
"""Plots the predictor contribution for each configuration
Parameters
----------
by : str, optional
By which column to plot the contribution, by default "Configuration"
query : Optional[QUERY], optional
An optional query to apply to the data, by default None
return_df : bool, optional
An optional flag to get a Dataframe instead, by default False
Returns
-------
px.Figure
A plotly figure
See also
--------
pdstools.adm.ADMDatamart.apply_predictor_categorization : how to override the out of the box predictor categorization
"""
df = (
cdh_utils._apply_query(
self.datamart.aggregates.last(table="combined_data"),
Expand Down
49 changes: 44 additions & 5 deletions python/pdstools/infinity/__init__.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,48 @@
"""
My module docstring
Does this work?
Infinity API client for Pega Decision Management.
"""

from .client import AsyncInfinity, Infinity
from importlib.util import find_spec
from typing import TYPE_CHECKING, List

from ..utils.namespaces import MissingDependenciesException

if TYPE_CHECKING:
from .client import Infinity


class DependencyNotFound:
def __init__(self, dependencies: List[str]):
self.dependencies = dependencies
self.namespace = "the DX API Client"
self.deps_group = "api"

def __repr__(self):
return f"While importing, one or more dependencies were not found: {self.dependencies}"

def __call__(self):
raise MissingDependenciesException(
self.dependencies, namespace=self.namespace, deps_group=self.deps_group
)


def __getattr__(name: str):
"""Lazy import to avoid loading httpx until needed."""
if name == "Infinity":
missing_dependencies: List[str] = []
if not find_spec("pydantic"):
missing_dependencies.append("pydantic")
if not find_spec("httpx"):
missing_dependencies.append("httpx")

if missing_dependencies:
return DependencyNotFound(missing_dependencies)

from .client import Infinity

return Infinity

raise AttributeError(f"module '{__name__}' has no attribute '{name}'")


__all__ = ["Infinity", "AsyncInfinity"]
__all__ = ["Infinity"]
4 changes: 2 additions & 2 deletions python/pdstools/pega_io/API.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,5 @@
from os import PathLike

import requests


def _read_client_credential_file(credential_file: PathLike): # pragma: no cover
outputdict = {}
Expand Down Expand Up @@ -37,6 +35,8 @@ def get_token(credential_file: PathLike, verify: bool = True): # pragma: no cov
explicitly set verify to False, otherwise Python will yell at you.
"""
import requests

creds = _read_client_credential_file(credential_file)
response = requests.post(
url=creds["Access token endpoint"],
Expand Down
34 changes: 20 additions & 14 deletions python/pdstools/pega_io/File.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from typing import Iterable, List, Literal, Optional, Tuple, Union, overload

import polars as pl
import requests

from ..utils.cdh_utils import from_prpc_date_time

Expand Down Expand Up @@ -94,6 +93,8 @@ def read_ds_export(
logging.debug("Could not find file in directory, checking if URL")

try:
import requests

response = requests.get(f"{path}/{filename}")
logging.info(f"Response: {response}")
if response.status_code == 200:
Expand All @@ -102,6 +103,11 @@ def read_ds_export(
file = BytesIO(urllib.request.urlopen(file).read())
_, extension = os.path.splitext(filename)

except ImportError:
warnings.warn(
"Unable to import `requests`, so not able to check for remote files. If you're trying to read in a file from the internet (or, for instance, using the built-in cdh_sample method), try installing the 'requests' package (`uv pip install requests`)"
)

except Exception as e:
logging.info(e)
if verbose:
Expand Down Expand Up @@ -162,19 +168,19 @@ def import_file(

if extension == ".json":
try:
if isinstance(file, BytesIO):
from pyarrow import json

return pl.LazyFrame(
json.read_json(
file,
)
)
else:
return pl.scan_ndjson(
file,
infer_schema_length=reading_opts.pop("infer_schema_length", 10000),
)
# if isinstance(file, BytesIO):
# from pyarrow import json

# return pl.LazyFrame(
# json.read_json(
# file,
# )
# )
# else:
return pl.scan_ndjson(
file,
infer_schema_length=reading_opts.pop("infer_schema_length", 10000),
)
except Exception: # pragma: no cover
try:
return pl.read_json(file).lazy()
Expand Down
1 change: 0 additions & 1 deletion python/pdstools/prediction/Prediction.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,6 @@ def responsecount_trend(
result.update_layout(yaxis_title="Responses")
return result


class Prediction:
"""Monitor Pega Prediction Studio Predictions"""

Expand Down
1 change: 1 addition & 0 deletions python/pdstools/reports/HealthCheck.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -1920,4 +1920,5 @@ except Exception as e:
# unfortunately no way to get the quarto source file name, so that is hardcoded
report_utils.show_credits("pega-datascientist-tools/python/pdstools/reports/HealthCheck.qmd")
```

0 comments on commit a874e24

Please sign in to comment.