Skip to content

Commit

Permalink
Fix warnings and depreciations from polars v bump
Browse files Browse the repository at this point in the history
  • Loading branch information
yusufuyanik1 committed Dec 20, 2023
1 parent 3e4f783 commit d9a54b8
Show file tree
Hide file tree
Showing 22 changed files with 283 additions and 129 deletions.
10 changes: 5 additions & 5 deletions examples/articles/ADMExplained.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -641,12 +641,12 @@
" pl.col(\"Contents\").cast(pl.Utf8)\n",
" ).with_columns(\n",
" pl.when(pl.col(\"Type\") == \"numeric\")\n",
" .then(pl.col(\"Contents\").map_elements(lambda col: extract_numbers_in_contents(col, 0)))\n",
" .then(pl.col(\"Contents\").apply(lambda col: extract_numbers_in_contents(col, 0)))\n",
" .otherwise(pl.lit(-9999))\n",
" .alias(\"BinLowerBound\")\n",
" .cast(pl.Float32),\n",
" pl.when(pl.col(\"Type\") == \"numeric\")\n",
" .then(pl.col(\"Contents\").map_elements(lambda col: extract_numbers_in_contents(col, 1)))\n",
" .then(pl.col(\"Contents\").apply(lambda col: extract_numbers_in_contents(col, 1)))\n",
" .otherwise(pl.lit(-9999))\n",
" .alias(\"BinUpperBound\")\n",
" .cast(pl.Float32),\n",
Expand Down Expand Up @@ -701,12 +701,12 @@
" pl.col(\"BinSymbol\").alias(\"Bin\"),\n",
" BinPositives.alias(\"Positives\"),\n",
" BinNegatives.alias(\"Negatives\"),\n",
" ((pl.cumsum(\"BinResponseCount\") / pl.sum(\"BinResponseCount\")) * 100).alias(\n",
" ((pl.cum_sum(\"BinResponseCount\") / pl.sum(\"BinResponseCount\")) * 100).alias(\n",
" \"Cum. Total (%)\"\n",
" ),\n",
" (pl.col(\"BinPropensity\") * 100).alias(\"Propensity (%)\"),\n",
" (pl.col(\"AdjustedPropensity\") * 100).alias(\"Adjusted Propensity (%)\"),\n",
" ((pl.cumsum(\"BinPositives\") / pl.sum(\"BinPositives\")) * 100).alias(\n",
" ((pl.cum_sum(\"BinPositives\") / pl.sum(\"BinPositives\")) * 100).alias(\n",
" \"Cum Positives (%)\"\n",
" ),\n",
" pl.col(\"ZRatio\"),\n",
Expand Down Expand Up @@ -873,7 +873,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
"version": "3.11.5"
},
"orig_nbformat": 4
},
Expand Down
2 changes: 1 addition & 1 deletion examples/hds/Example_Historical_Dataset_Analysis.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -1111,7 +1111,7 @@
],
"source": [
"fig, ax = plt.subplots(figsize=(10, 5))\n",
"df_plot = df[['Customer_Age', 'Day']].groupby('Day').apply(\n",
"df_plot = df[['Customer_Age', 'Day']].groupby('Day').map_elements(\n",
" lambda x:x['Customer_Age'].isnull().sum()*100/x.shape[0]).reset_index().sort_values('Day')\n",
"pal = sns.color_palette(\"Reds_d\", len(df_plot))\n",
"rank = df_plot[0].argsort()\n",
Expand Down
2 changes: 1 addition & 1 deletion python/pdstools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

from polars import enable_string_cache

enable_string_cache(True)
enable_string_cache()

import sys
from pathlib import Path
Expand Down
10 changes: 6 additions & 4 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -794,7 +794,7 @@ def _getType(val):
.group_by(by)
.agg(pl.col("Modeldata").last())
.collect()
.with_columns(pl.col("Modeldata").apply(lambda v: _getType(v)))
.with_columns(pl.col("Modeldata").map_elements(lambda v: _getType(v)))
.to_dicts()
)
return {key: value for key, value in [i.values() for i in types]}
Expand Down Expand Up @@ -1082,7 +1082,7 @@ def response_gain_df(df: any_frame, by: str = "Channel") -> any_frame:
.sort([by, "ResponseCount"], descending=True)
.with_columns(
[
(pl.cumsum("ResponseCount") / pl.sum("ResponseCount"))
(pl.cum_sum("ResponseCount") / pl.sum("ResponseCount"))
.over(by)
.alias("TotalResponseFraction"),
((pl.col(by).cumcount() + 1) / pl.count("ResponseCount"))
Expand Down Expand Up @@ -1121,7 +1121,7 @@ def models_by_positives_df(
return (
modelsByPositives.join(
modelsByPositives["Positives"].cut(
bins=list(range(0, 210, 10)),
breaks=list(range(0, 210, 10)),
series=False,
category_label="PositivesBin",
),
Expand Down Expand Up @@ -1447,7 +1447,9 @@ def exportTables(self, file: Path = "Tables.xlsx", predictorBinning=False):
for tab in self.ApplicableTablesNoPredictorBinning
}

with Workbook(file, {"nan_inf_to_errors": True}) as wb:
with Workbook(
file, options={"nan_inf_to_errors": True, "remove_timezone": True}
) as wb:
for tab, data in tabs.items():
data = data.with_columns(
pl.col(pl.List(pl.Categorical), pl.List(pl.Utf8))
Expand Down
16 changes: 9 additions & 7 deletions python/pdstools/adm/ADMTrees.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def getMultiTrees(file: pl.DataFrame, n_threads=1, verbose=True, **kwargs):
pl.col("SnapshotTime")
.dt.round("1s")
.cast(pl.Utf8)
.str.rstrip(".000000000"),
.str.strip_chars_end(".000000000"),
pl.col("Modeldata").str.decode("base64"),
pl.col("Configuration").cast(pl.Utf8),
)
Expand Down Expand Up @@ -290,7 +290,9 @@ def _post_import_cleanup(self, decode, **kwargs):
self.model = self.trees["model"]["booster"]["trees"]
except Exception as e3:
try:
self.model = self.trees["model"]["model"]["booster"]["trees"]
self.model = self.trees["model"]["model"]["booster"][
"trees"
]
except Exception as e4:
raise (e1, e2, e3, e4)

Expand Down Expand Up @@ -473,7 +475,9 @@ def getGainsPerSplit(self) -> Tuple[Dict, pl.DataFrame, dict]:
list(zip(total_split_list, total_gains_list)), schema=["split", "gains"]
)
gainsPerSplit = gainsPerSplit.with_columns(
predictor=pl.col("split").map_elements(lambda x: self.parseSplitValues(x)[0])
predictor=pl.col("split").map_elements(
lambda x: self.parseSplitValues(x)[0]
)
)
return splitsPerTree, gainsPerTree, gainsPerSplit

Expand All @@ -498,7 +502,7 @@ def getGroupedGainsPerSplit(self) -> pl.DataFrame:
.alias("values"),
]
)
.with_columns(n=pl.col("gains").list.lengths())
.with_columns(n=pl.col("gains").list.len())
)

def getSplitsRecursively(
Expand Down Expand Up @@ -1046,9 +1050,7 @@ def computeOverTime(self, predictorCategorization=None):
to_plot = tree.computeCategorizationOverTime(predictorCategorization)[0]
outdf.append(
pl.DataFrame(to_plot).with_columns(
SnapshotTime=pl.lit(timestamp).str.to_date(
format="%Y-%m-%d %X"
)
SnapshotTime=pl.lit(timestamp).str.to_date(format="%Y-%m-%d %X")
)
)

Expand Down
File renamed without changes.
Empty file.
8 changes: 4 additions & 4 deletions python/pdstools/ih/legacy_IH.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,13 +134,13 @@ def plot_daily_cumulative_accept_rate(df, pos, neg, **kwargs):
_df, rollup, hue = get_accept_rate_time(df, pos, neg, "Date", **kwargs)

if "hue" in kwargs.keys():
_df["Total_cum"] = _df.group_by(hue)["Total"].apply(lambda x: x.cumsum())
_df["Accepted_cum"] = _df.group_by(hue)["Accepted"].apply(lambda x: x.cumsum())
_df["Total_cum"] = _df.group_by(hue)["Total"].map_elements(lambda x: x.cum_sum())
_df["Accepted_cum"] = _df.group_by(hue)["Accepted"].map_elements(lambda x: x.cum_sum())
_df["hue"] = _df[hue].agg("__".join, axis=1)
kwargs["hue"] = "hue"
else:
_df["Total_cum"] = _df["Total"].cumsum()
_df["Accepted_cum"] = _df["Accepted"].cumsum()
_df["Total_cum"] = _df["Total"].cum_sum()
_df["Accepted_cum"] = _df["Accepted"].cum_sum()
_df["Cumulative Accept Rate (%)"] = _df["Accepted_cum"] * 100 / _df["Total_cum"]

if "allTime" in kwargs.keys():
Expand Down
8 changes: 4 additions & 4 deletions python/pdstools/pega_io/API.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,9 @@ def _readClientCredentialFile(credentialFile): # pragma: no cover
with open(credentialFile) as f:
for idx, line in enumerate(f.readlines()):
if (idx % 2) == 0:
key = line.rstrip("\n")
key = line.strip_chars_end("\n")
else:
outputdict[key] = line.rstrip("\n")
outputdict[key] = line.strip_chars_end("\n")
return outputdict


Expand Down Expand Up @@ -50,7 +50,7 @@ def get_token(credentialFile: str, verify: bool = True, **kwargs): # pragma: no
verify=verify,
).json()
if "errors" in response:
raise ConnectionRefusedError(f"Error when connecting to infinity: {e}")
raise ConnectionRefusedError(f"Error when connecting to infinity: {response}")
return response["access_token"]


Expand All @@ -64,7 +64,7 @@ def setupAzureOpenAI(
"2023-07-01-preview",
"2023-09-15-preview",
"2023-10-01-preview",
"2023-12-01-preview"
"2023-12-01-preview",
] = "2023-12-01-preview",
):
"""Convenience function to automagically setup Azure AD-based authentication
Expand Down
4 changes: 3 additions & 1 deletion python/pdstools/plots/plot_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -777,7 +777,9 @@ def plotPredictorPerformance(
"""Plots a bar chart of the performance of the predictors
By default, this plot shows the performance over all models
Use the querying functionality to drill down into a more specific subset
Use the querying functionality to drill down into a more specific subset.
Picks top n predictors with highest weighted average Performance accross
models and then sorts the predictors according to the median value.
Parameters
----------
Expand Down
11 changes: 6 additions & 5 deletions python/pdstools/reports/HealthCheck.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ elif len(datafolder) > 0 or len(modelfilename) > 0 or len(predictorfilename) > 0
path="." if len(datafolder) == 0 else datafolder,
model_filename="" if len(modelfilename) == 0 else modelfilename,
predictor_filename="" if len(predictorfilename) == 0 else predictorfilename,
extract_keys=True,
include_cols="pyFeatureImportance",
).fillMissing()
else:
Expand Down Expand Up @@ -1575,7 +1576,7 @@ Very skewed results may be caused by prioritization elements like levers and wei
# by_as_list = by if isinstance(by,list) else [by]
# sortExpr = by_as_list + [sortExpr]
# indexExpr = (pl.int_range(1, pl.count() + 1)/ pl.count()) if index is None else (pl.cumsum(index) / pl.sum(index))
# indexExpr = (pl.int_range(1, pl.count() + 1)/ pl.count()) if index is None else (pl.cum_sum(index) / pl.sum(index))
# gains_df = (
# df.lazy()
Expand All @@ -1584,7 +1585,7 @@ Very skewed results may be caused by prioritization elements like levers and wei
# .select(
# by_as_list +
# [indexExpr.over(by).cast(pl.Float64).alias("cum_x"),
# (pl.cumsum(value) / pl.sum(value)).over(by).cast(pl.Float64).alias("cum_y")]
# (pl.cum_sum(value) / pl.sum(value)).over(by).cast(pl.Float64).alias("cum_y")]
# )
# )
# # Add entry for the (0,0) point
Expand Down Expand Up @@ -1620,7 +1621,7 @@ def gains_table(df, value: str, index=None, by=None):
indexExpr = (
(pl.int_range(1, pl.count() + 1) / pl.count())
if index is None
else (pl.cumsum(index) / pl.sum(index))
else (pl.cum_sum(index) / pl.sum(index))
)
if by is None:
Expand All @@ -1631,7 +1632,7 @@ def gains_table(df, value: str, index=None, by=None):
.sort(sortExpr, descending=True)
.select(
indexExpr.cast(pl.Float64).alias("cum_x"),
(pl.cumsum(value) / pl.sum(value)).cast(pl.Float64).alias("cum_y"),
(pl.cum_sum(value) / pl.sum(value)).cast(pl.Float64).alias("cum_y"),
),
]
)
Expand All @@ -1645,7 +1646,7 @@ def gains_table(df, value: str, index=None, by=None):
by_as_list
+ [
indexExpr.over(by).cast(pl.Float64).alias("cum_x"),
(pl.cumsum(value) / pl.sum(value))
(pl.cum_sum(value) / pl.sum(value))
.over(by)
.cast(pl.Float64)
.alias("cum_y"),
Expand Down
4 changes: 2 additions & 2 deletions python/pdstools/reports/ModelReport.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -260,12 +260,12 @@ human_friendly_scoredistribution = (
pl.col("BinSymbol").alias("Bin"),
pl.col("BinResponseCount").alias("Responses"),
pl.col("BinPositives").alias("Positives"),
(100 * (pl.col("BinPositives").cumsum(reverse=True)) / pl.sum("BinPositives"))
(100 * (pl.col("BinPositives").cum_sum(reverse=True)) / pl.sum("BinPositives"))
.round(2)
.alias("Cum. Positives (%)"),
(
100
* (pl.col("BinResponseCount").cumsum(reverse=True))
* (pl.col("BinResponseCount").cum_sum(reverse=True))
/ pl.sum("BinResponseCount")
)
.round(2)
Expand Down
Loading

0 comments on commit d9a54b8

Please sign in to comment.