Skip to content

Commit

Permalink
V1 of IH analysis on conversion models
Browse files Browse the repository at this point in the history
  • Loading branch information
operdeck committed Dec 19, 2024
1 parent e5daf2e commit e5c150c
Show file tree
Hide file tree
Showing 5 changed files with 214 additions and 132 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -6,16 +6,10 @@
"metadata": {},
"outputs": [],
"source": [
"import polars as pl\n",
"from pdstools import read_ds_export, IH\n",
"from pdstools.utils import cdh_utils\n",
"from ih_helper import interaction_history\n",
"from pdstools import IH\n",
"\n",
"import plotly.io as pio\n",
"import plotly as plotly\n",
"import plotly.express as px\n",
"import plotly.graph_objs as go\n",
"from plotly.subplots import make_subplots\n",
"\n",
"plotly.offline.init_notebook_mode()\n",
"pio.renderers.default = \"vscode\""
Expand All @@ -25,7 +19,9 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"# Conversion Results"
"# Conversion Results\n",
"\n",
"Visualization of conversion modeling results from IH data."
]
},
{
Expand All @@ -37,7 +33,7 @@
"from pathlib import Path\n",
"\n",
"ih_export_file = Path(\n",
" \"./Data-pxStrategyResult_InteractionFiles_20241213T091932_GMT.zip\"\n",
" \"./Data-pxStrategyResult_InteractionFiles_20241213T091932_GMT.zip \"\n",
")\n",
"\n",
"if not ih_export_file.exists():\n",
Expand Down Expand Up @@ -103,17 +99,17 @@
},
{
"cell_type": "code",
"execution_count": 5,
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# ih.plots.trend_line(\n",
"# experiment_field=\"ExperimentGroup\", # should be optional, can also give query to select only the Test group\n",
"# granularity=\"1d\", # string language polars\n",
"# positive_labels=[\"Conversion\"],\n",
"# negative_labels=[\"Impression\", \"Pending\"],\n",
"# title=\"Conversion Rate trends\",\n",
"# )"
"ih.plots.trend_bar(\n",
" experiment_field=\"ExperimentGroup\",\n",
" every=\"1w\",\n",
" positive_labels=[\"Conversion\"],\n",
" negative_labels=[\"Impression\", \"Pending\"],\n",
" title=\"Conversion Rates over Time\",\n",
")"
]
},
{
Expand All @@ -136,6 +132,17 @@
" title = \"Overall Engagement\",\n",
")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ih.plots.trend_line(\n",
" title=\"Engagement Rates over Time\",\n",
")"
]
}
],
"metadata": {
Expand Down
87 changes: 0 additions & 87 deletions examples/ih/ih_helper.py

This file was deleted.

74 changes: 59 additions & 15 deletions python/pdstools/ih/Aggregates.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from itertools import chain
from typing import TYPE_CHECKING, Dict, List, Optional
from typing import TYPE_CHECKING, List, Optional
import polars as pl

from ..utils.namespaces import LazyNamespace
from ..utils.cdh_utils import safe_flatten_list

if TYPE_CHECKING:
from .IH import IH as IH_Class
Expand All @@ -15,17 +15,42 @@ def __init__(self, ih: "IH_Class"):

def summary_by_experiment(
self,
experiment_field: str,
experiment_field: Optional[str] = None,
every: Optional[str] = None,
by: Optional[List[str]] = None,
positive_labels: List[str] = None,
negative_labels: List[str] = None,
):
positive_labels: Optional[List[str]] = None,
negative_labels: Optional[List[str]] = None,
) -> pl.LazyFrame:
"""Groups the IH data summarizing into success rate (CTR) and standard error (StdErr).
if by is not None:
if isinstance(by, str):
by = [by]
else:
by = []
It groups by the "experiment field" (TODO in the future this can be optional or multiple). When
given, the 'every' argument is used to divide the timerange into buckets. It uses the same string
language as Polars.
Every interaction is considered to have only one outcome: positive, negative or none. When any
outcome in the interaction is in the positive labels, the outcome is considered positive. Next,
when any is in the negative labels, the outcome of the interaction is considered negative. Otherwise
there is no defined outcome and the interaction is ignored in calculations of success rate or error.
Parameters
----------
experiment_field : Optional[str], optional
Optional field that contains the experiments
every : Optional[str], optional
Every interval start and period length, by default None
by : Optional[List[str]], optional
Extra grouping keys, by default None
positive_labels : Optional[List[str]], optional
Outcome label(s) for the positive responses, by default None
negative_labels : Optional[List[str]], optional
Outcome label(s) for the negative responses, by default None
Returns
-------
pl.LazyFrame
A polars frame with the grouping keys and columns for the total number of Positives, Negatives,
number of Interactions, success rate (CTR) and standard error (StdErr).
"""

if positive_labels is None:
positive_labels = ["Accepted", "Accept", "Clicked", "Click"]
Expand All @@ -38,11 +63,26 @@ def summary_by_experiment(
"NoResponse",
]

if every is not None:
source = self.ih.data.with_columns(pl.col.OutcomeTime.dt.truncate(every))
else:
source = self.ih.data

group_by_clause = safe_flatten_list(
[experiment_field] + [by] + (["OutcomeTime"] if every is not None else [])
)
if len(group_by_clause) == 0:
group_by_clause = None

summary = (
self.ih.data.filter(
source.filter(
pl.col.ExperimentGroup.is_not_null() & (pl.col.ExperimentGroup != "")
)
.group_by([experiment_field] + by + ["InteractionID"])
.group_by(
(group_by_clause + ["InteractionID"])
if group_by_clause is not None
else ["InteractionID"]
)
.agg(
# Take only one outcome per interaction. TODO should perhaps be the last one.
InteractionOutcome=pl.when(pl.col.Outcome.is_in(positive_labels).any())
Expand All @@ -51,7 +91,7 @@ def summary_by_experiment(
.then(pl.lit(False)),
Outcomes=pl.col.Outcome.unique().sort(), # for debugging
)
.group_by([experiment_field] + by)
.group_by(group_by_clause)
.agg(
Positives=pl.col.InteractionOutcome.filter(
pl.col.InteractionOutcome
Expand All @@ -74,7 +114,11 @@ def summary_by_experiment(
).sqrt()
)
)
.sort([experiment_field] + by)
)

if group_by_clause is None:
summary = summary.drop("literal") # created by empty group_by
else:
summary = summary.sort(group_by_clause)

return summary
Loading

0 comments on commit e5c150c

Please sign in to comment.