Skip to content

Commit

Permalink
type in function name weighted_average
Browse files Browse the repository at this point in the history
  • Loading branch information
yusufuyanik1 committed Oct 10, 2023
1 parent 2bd977b commit e263180
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 31 deletions.
2 changes: 1 addition & 1 deletion examples/articles/ADMExplained.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@
" model.group_by(\"ModelID\")\n",
" .agg(\n",
" number_of_predictors=pl.col(\"PredictorName\").n_unique(),\n",
" model_performance=cdh_utils.weighed_performance_polars() * 100,\n",
" model_performance=cdh_utils.weighted_performance_polars() * 100,\n",
" response_count=pl.sum(\"ResponseCount\"),\n",
" )\n",
" .collect()\n",
Expand Down
26 changes: 13 additions & 13 deletions python/pdstools/adm/ADMDatamart.py
Original file line number Diff line number Diff line change
Expand Up @@ -970,19 +970,19 @@ def model_summary(
.sum()
.alias("Count_without_responses"),
(
cdh_utils.weighed_performance_polars().alias(
"Performance_weighted"
)
cdh_utils.weighted_performance_polars()
.alias("Performance_weighted")
.fill_nan(0.5)
),
cdh_utils.weighed_average_polars(
"SuccessRate", "ResponseCount"
).alias("SuccessRate_weighted"),
cdh_utils.weighted_average_polars("SuccessRate", "ResponseCount")
.fill_nan(0.0)
.alias("SuccessRate_weighted"),
],
)
.with_columns(
(pl.col("Count_without_responses") / pl.col(f"{by}_count")).alias(
"Percentage_without_responses"
)
(pl.col("Count_without_responses") / pl.col(f"{by}_count"))
.alias("Percentage_without_responses")
.fill_nan(0.0)
)
)

Expand Down Expand Up @@ -1029,16 +1029,16 @@ def pivot_df(
df.unique(subset=[by], keep="first")
.group_by(by)
.agg(
cdh_utils.weighed_average_polars("PerformanceBin", "ResponseCount")
cdh_utils.weighted_average_polars("PerformanceBin", "ResponseCount")
)
.sort("PerformanceBin", descending=True)
.head(top_n)
.select(by)
)
df = top_n_xaxis.join(df, on=by, how="left")
df = top_n_xaxis.join(df, on=by, how="left")
if by not in ["ModelID", "Name"]:
df = df.group_by([by, "PredictorName"]).agg(
cdh_utils.weighed_average_polars("PerformanceBin", "ResponseCount")
cdh_utils.weighted_average_polars("PerformanceBin", "ResponseCount")
)
df = (
df.collect()
Expand Down Expand Up @@ -1430,7 +1430,7 @@ def exportTables(self, file: Path = "Tables.xlsx"):
from xlsxwriter import Workbook

tabs = {tab: getattr(self, tab) for tab in self.ApplicableTables}
with Workbook(file) as wb:
with Workbook(file, {"nan_inf_to_errors": True}) as wb:
for tab, data in tabs.items():
data = data.with_columns(
pl.col(pl.List(pl.Categorical), pl.List(pl.Utf8))
Expand Down
18 changes: 10 additions & 8 deletions python/pdstools/plots/plot_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import polars as pl
from .plots_plotly import ADMVisualisations as plotly
from ..utils.cdh_utils import (
weighed_performance_polars,
weighed_average_polars,
weighted_performance_polars,
weighted_average_polars,
)
from ..utils.errors import NotApplicableError
from ..utils.types import any_frame
Expand Down Expand Up @@ -116,7 +116,7 @@ def top_n(
if facets:
df = df.join(
df.group_by(facets + ["PredictorName"])
.agg(weighed_average_polars(to_plot, "ResponseCountBin"))
.agg(weighted_average_polars(to_plot, "ResponseCountBin"))
.filter(pl.col(to_plot).is_not_nan())
.group_by(*facets)
.agg(
Expand All @@ -131,7 +131,7 @@ def top_n(
else:
df = df.join(
df.group_by("PredictorName")
.agg(weighed_average_polars(to_plot, "ResponseCountBin"))
.agg(weighted_average_polars(to_plot, "ResponseCountBin"))
.filter(pl.col(to_plot).is_not_nan())
.sort(to_plot, descending=True)
.head(top_n)
Expand Down Expand Up @@ -484,10 +484,10 @@ def plotOverTime(
df.group_by_dynamic("SnapshotTime", every=every, by=group_by)
.agg(
[
weighed_average_polars("SuccessRate", "ResponseCount").alias(
weighted_average_polars("SuccessRate", "ResponseCount").alias(
"SuccessRate"
),
weighed_performance_polars().alias("weighted_performance"),
weighted_performance_polars().alias("weighted_performance"),
]
)
.with_columns(pl.col("weighted_performance") * 100)
Expand Down Expand Up @@ -963,7 +963,7 @@ def plotPredictorCategoryPerformance(
df = (
df.group_by(facets + ["ModelID", "PredictorCategory"])
.agg(
weighed_average_polars("PerformanceBin", "ResponseCountBin").alias(
weighted_average_polars("PerformanceBin", "ResponseCountBin").alias(
"PerformanceBin"
)
)
Expand Down Expand Up @@ -1062,7 +1062,9 @@ def plotPredictorContribution(
.with_columns((pl.col("PerformanceBin") - 0.5) * 2)
.group_by(by, "PredictorCategory")
.agg(
Performance=weighed_average_polars("PerformanceBin", "BinResponseCount")
Performance=weighted_average_polars(
"PerformanceBin", "BinResponseCount"
)
)
.with_columns(
Contribution=(
Expand Down
8 changes: 4 additions & 4 deletions python/pdstools/utils/cdh_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -556,7 +556,7 @@ def toPRPCDateTime(dt: datetime.datetime) -> str:
return dt.strftime("%Y%m%dT%H%M%S.%f")[:-3] + dt.strftime(" GMT%z")


def weighed_average_polars(
def weighted_average_polars(
vals: Union[str, pl.Expr], weights: Union[str, pl.Expr]
) -> pl.Expr:
if isinstance(vals, str):
Expand All @@ -566,9 +566,9 @@ def weighed_average_polars(
return ((vals * weights).sum()) / weights.sum()


def weighed_performance_polars() -> pl.Expr:
def weighted_performance_polars() -> pl.Expr:
"""Polars function to return a weighted performance"""
return weighed_average_polars("Performance", "ResponseCount")
return weighted_average_polars("Performance", "ResponseCount")


def zRatio(
Expand Down Expand Up @@ -632,7 +632,7 @@ def LogOdds(


def featureImportance(over=["PredictorName", "ModelID"]):
varImp = weighed_average_polars(
varImp = weighted_average_polars(
LogOdds(
pl.col("BinPositives"), pl.col("BinResponseCount") - pl.col("BinPositives")
),
Expand Down
12 changes: 7 additions & 5 deletions python/tests/test_cdh_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,9 @@ def test_toPRPCDateTime():
== "20180316T134127.847 GMT-0456"
)
assert (
cdh_utils.toPRPCDateTime(datetime.datetime(2018, 3, 16, 13, 41, 27, 847000))[:-3]
cdh_utils.toPRPCDateTime(datetime.datetime(2018, 3, 16, 13, 41, 27, 847000))[
:-3
]
== "20180316T134127.847 GMT+0000"[:-3]
)

Expand All @@ -149,7 +151,7 @@ def test_weighted_average_polars():
output = (
input.group_by("Channel")
.agg(
cdh_utils.weighed_average_polars("SuccessRate", "ResponseCount").alias(
cdh_utils.weighted_average_polars("SuccessRate", "ResponseCount").alias(
"SuccessRate_weighted"
),
)
Expand All @@ -165,7 +167,7 @@ def test_weighted_average_polars():
output = (
input.filter(pl.col("Channel") == "SMS")
.with_columns(
cdh_utils.weighed_average_polars(
cdh_utils.weighted_average_polars(
vals="SuccessRate", weights="ResponseCount"
).alias("weighted_average")
)
Expand All @@ -184,7 +186,7 @@ def test_weighted_average_polars():
assert output.frame_equal(expected_output)


def test_weighed_performance_polars():
def test_weighted_performance_polars():
input = pl.DataFrame(
{
"Performance": [0.5, 0.8, 0.75, 0.5], # 0.6, 0.6
Expand All @@ -195,7 +197,7 @@ def test_weighed_performance_polars():

output = (
input.group_by("Channel")
.agg(cdh_utils.weighed_performance_polars())
.agg(cdh_utils.weighted_performance_polars())
.sort("Channel")
)

Expand Down

0 comments on commit e263180

Please sign in to comment.