diff --git a/examples/articles/GettingStarted.ipynb b/examples/articles/GettingStarted.ipynb
index e7d0f4af..32c31a12 100644
--- a/examples/articles/GettingStarted.ipynb
+++ b/examples/articles/GettingStarted.ipynb
@@ -32,17 +32,11 @@
"If you do not have Python or no compatible version installed, `uv` will automatically install a compatible version.\n",
"\n",
"## Optional dependencies\n",
- "As of V4 of pdstools, we have made a big effort to reduce the number of big and heavy core dependencies. This means that while initial installation is very fast, you may at some points run into import errors and will be required to install additional dependency groups. If using `uv`, these can be installed with the `--extra` argument. \n",
+ "As of V4 of pdstools, we have made a big effort to reduce the number of big and heavy core dependencies. This means that while initial installation is very fast, you may at some points run into import errors and will be required to install additional dependency groups. \n",
"\n",
- "For instance, to install the optional dependencies to use the Pega DX API client, you should run \n",
- "\n",
- "```bash\n",
- "uv pip install pdstools --extra api\n",
- "```\n",
- "\n",
- "The alternative (pip-compatible) syntax for optional dependencies is:\n",
+ "To install extra dependencies, you can put them in square brackets after a package name. So, for instance, to install pdstools alongside the optional dependencies for the Pega DX API client, you should run:\n",
"```bash\n",
- "pip install 'pdstools[api]'\n",
+ "uv pip install 'pdstools[api]'\n",
"```\n",
"\n",
"For an overview of all optional dependencies and the dependency groups they will be installed for, see the table below:\n",
diff --git a/examples/ih/Conversion_Reporting.ipynb b/examples/ih/Conversion_Reporting.ipynb
new file mode 100644
index 00000000..077263fa
--- /dev/null
+++ b/examples/ih/Conversion_Reporting.ipynb
@@ -0,0 +1,168 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pdstools import IH\n",
+ "\n",
+ "import plotly.io as pio\n",
+ "import plotly as plotly\n",
+ "\n",
+ "plotly.offline.init_notebook_mode()\n",
+ "pio.renderers.default = \"vscode\""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Conversion Results\n",
+ "\n",
+ "Visualization of conversion modeling results from IH data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pathlib import Path\n",
+ "import polars as pl\n",
+ "\n",
+ "ih_export_file = Path(\n",
+ " \"./Data-pxStrategyResult_InteractionFiles_20241213T091932_GMT.zip\"\n",
+ ")\n",
+ "\n",
+ "if not ih_export_file.exists():\n",
+ " ih = IH.from_mock_data()\n",
+ "else:\n",
+ " ih = IH.from_ds_export(\n",
+ " ih_export_file,\n",
+ " query=pl.col.ExperimentGroup.is_not_null() & (pl.col.ExperimentGroup != \"\"),\n",
+ " )\n",
+ "\n",
+ "ih.aggregates.summary_success_rates(by=[\"ExperimentGroup\", \"Channel\"]).drop(\n",
+ " \"Outcomes\"\n",
+ ").collect().to_pandas().style.hide()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.overall_gauges(\n",
+ " metric=\"Conversion\",\n",
+ " condition=\"ExperimentGroup\",\n",
+ " by=\"Channel\",\n",
+ " reference_values={\"Web\": 0.055, \"Email\": 0.09},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Detailed View \n",
+ "\n",
+ "Showing conversion rates for all actions."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.success_rates_tree_map(metric=\"Conversion\")\n"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Conversion Rate Trends\n",
+ "\n",
+ "side-by-side bars and lines (separate methods) with error bars"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.success_rates_trend_bar(\n",
+ " metric=\"Conversion\",\n",
+ " condition=\"ExperimentGroup\",\n",
+ " every=\"1w\",\n",
+ ")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.success_rates_trend(metric=\"Conversion\", every=\"1d\")"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Engagement"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.overall_gauges(\n",
+ " condition=\"ExperimentGroup\",\n",
+ " by=\"Channel\",\n",
+ " reference_values={\"Web\": 0.20, \"Email\": 0.20},\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ih.plot.success_rates_trend(\n",
+ " by=\"Channel\"\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": ".venv",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/examples/ih/Example_IH_Analysis.ipynb b/examples/ih/Example_IH_Analysis.ipynb
index 49c257d7..30188e6e 100644
--- a/examples/ih/Example_IH_Analysis.ipynb
+++ b/examples/ih/Example_IH_Analysis.ipynb
@@ -2,961 +2,237 @@
"cells": [
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
- "import pandas as pd\n",
- "import sys\n",
+ "from pdstools import IH\n",
+ "from pdstools.utils import cdh_utils\n",
"\n",
- "from cdhtools.IHanalysis import *\n",
- "from cdhtools.cdh_utils import readDSExport\n",
+ "import polars as pl\n",
+ "import plotly.io as pio\n",
+ "import plotly as plotly\n",
"\n",
- "import matplotlib.pyplot as plt\n",
- "%matplotlib inline"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Importing: ../../data/Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\n"
- ]
- }
- ],
- "source": [
- "df_orig = readDSExport(\"Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\", path=\"../../data\")"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [],
- "source": [
- "df = initial_prep(df_orig, referenceTime='pxOutcomeTime')"
+ "plotly.offline.init_notebook_mode()\n",
+ "pio.renderers.default = \"vscode\""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
- "At first, take a look into the IH dataframe, explore the columns, outcome types and business structure"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " pySubjectType | \n",
- " pxInteractionID | \n",
- " ControlGroupValidityStart | \n",
- " pyStage | \n",
- " pyJourney | \n",
- " CustomerID | \n",
- " ChannelSubGroup | \n",
- " pyChannel | \n",
- " pyCustomerSubSegment | \n",
- " pyStep | \n",
- " ... | \n",
- " pyResponse | \n",
- " pyCategory | \n",
- " ControlGroupValidityEnd | \n",
- " pxDecisionTime | \n",
- " pyLabel | \n",
- " ChannelGroup | \n",
- " pyStrategy | \n",
- " Date | \n",
- " WeekOfYear | \n",
- " Week | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 0 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " SMS | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " U+ Personal Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 1 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " U+ Personal Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " Visa Gold Card | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " SMS | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " MasterCard Gold | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " CDHSample-Data-Customer | \n",
- " -3586780626931683381 | \n",
- " | \n",
- " | \n",
- " | \n",
- " Customer-4118 | \n",
- " | \n",
- " Web | \n",
- " | \n",
- " | \n",
- " ... | \n",
- " | \n",
- " | \n",
- " | \n",
- " 2021-01-27 13:22:05.810000+00:00 | \n",
- " AMEXPersonal | \n",
- " | \n",
- " InitializeModelsSmall | \n",
- " 2021-01-27 | \n",
- " 4 | \n",
- " 1 | \n",
- "
\n",
- " \n",
- "
\n",
- "
5 rows × 52 columns
\n",
- "
"
- ],
- "text/plain": [
- " pySubjectType pxInteractionID ControlGroupValidityStart \\\n",
- "0 CDHSample-Data-Customer -3586780626931683381 \n",
- "1 CDHSample-Data-Customer -3586780626931683381 \n",
- "2 CDHSample-Data-Customer -3586780626931683381 \n",
- "3 CDHSample-Data-Customer -3586780626931683381 \n",
- "4 CDHSample-Data-Customer -3586780626931683381 \n",
- "\n",
- " pyStage pyJourney CustomerID ChannelSubGroup pyChannel \\\n",
- "0 Customer-4118 SMS \n",
- "1 Customer-4118 Web \n",
- "2 Customer-4118 Web \n",
- "3 Customer-4118 SMS \n",
- "4 Customer-4118 Web \n",
- "\n",
- " pyCustomerSubSegment pyStep ... pyResponse pyCategory \\\n",
- "0 ... \n",
- "1 ... \n",
- "2 ... \n",
- "3 ... \n",
- "4 ... \n",
- "\n",
- " ControlGroupValidityEnd pxDecisionTime pyLabel \\\n",
- "0 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n",
- "1 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n",
- "2 2021-01-27 13:22:05.810000+00:00 Visa Gold Card \n",
- "3 2021-01-27 13:22:05.810000+00:00 MasterCard Gold \n",
- "4 2021-01-27 13:22:05.810000+00:00 AMEXPersonal \n",
- "\n",
- " ChannelGroup pyStrategy Date WeekOfYear Week \n",
- "0 InitializeModelsSmall 2021-01-27 4 1 \n",
- "1 InitializeModelsSmall 2021-01-27 4 1 \n",
- "2 InitializeModelsSmall 2021-01-27 4 1 \n",
- "3 InitializeModelsSmall 2021-01-27 4 1 \n",
- "4 InitializeModelsSmall 2021-01-27 4 1 \n",
- "\n",
- "[5 rows x 52 columns]"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " Count | \n",
- "
\n",
- " \n",
- " pyIssue | \n",
- " pyGroup | \n",
- " pyDirection | \n",
- " pyChannel | \n",
- " pyName | \n",
- " pyOutcome | \n",
- " | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " | \n",
- " Churned | \n",
- " 5072 | \n",
- "
\n",
- " \n",
- " Loyal | \n",
- " 4928 | \n",
- "
\n",
- " \n",
- " Sales | \n",
- " CreditCards | \n",
- " Inbound | \n",
- " Web | \n",
- " AMEXPersonal | \n",
- " Clicked | \n",
- " 1487 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6331 | \n",
- "
\n",
- " \n",
- " UPlusFinGold | \n",
- " Accepted | \n",
- " 367 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6468 | \n",
- "
\n",
- " \n",
- " UPlusFinPersonal | \n",
- " Accepted | \n",
- " 367 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6534 | \n",
- "
\n",
- " \n",
- " UPlusGold | \n",
- " Accepted | \n",
- " 1843 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 1204 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 7004 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 5487 | \n",
- "
\n",
- " \n",
- " UPlusPersonal | \n",
- " Accept | \n",
- " 2635 | \n",
- "
\n",
- " \n",
- " Accepted | \n",
- " 970 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 4361 | \n",
- "
\n",
- " \n",
- " VisaGold | \n",
- " Clicked | \n",
- " 1777 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 5538 | \n",
- "
\n",
- " \n",
- " Outbound | \n",
- " SMS | \n",
- " AMEXPersonal | \n",
- " Clicked | \n",
- " 1002 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6775 | \n",
- "
\n",
- " \n",
- " MasterCardGold | \n",
- " Clicked | \n",
- " 296 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 6438 | \n",
- "
\n",
- " \n",
- " MasterCardWorld | \n",
- " Clicked | \n",
- " 342 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 5846 | \n",
- "
\n",
- " \n",
- " UPlusFinGold | \n",
- " Accepted | \n",
- " 297 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 265 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 7081 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6645 | \n",
- "
\n",
- " \n",
- " UPlusFinPersonal | \n",
- " Accepted | \n",
- " 311 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 6482 | \n",
- "
\n",
- " \n",
- " UPlusGold | \n",
- " Accepted | \n",
- " 1463 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 5474 | \n",
- "
\n",
- " \n",
- " UPlusPersonal | \n",
- " Accept | \n",
- " 5206 | \n",
- "
\n",
- " \n",
- " Accepted | \n",
- " 684 | \n",
- "
\n",
- " \n",
- " Clicked | \n",
- " 581 | \n",
- "
\n",
- " \n",
- " NoResponse | \n",
- " 4984 | \n",
- "
\n",
- " \n",
- " Rejected | \n",
- " 4578 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- " Count\n",
- "pyIssue pyGroup pyDirection pyChannel pyName pyOutcome \n",
- " Churned 5072\n",
- " Loyal 4928\n",
- "Sales CreditCards Inbound Web AMEXPersonal Clicked 1487\n",
- " NoResponse 6331\n",
- " UPlusFinGold Accepted 367\n",
- " Rejected 6468\n",
- " UPlusFinPersonal Accepted 367\n",
- " Rejected 6534\n",
- " UPlusGold Accepted 1843\n",
- " Clicked 1204\n",
- " NoResponse 7004\n",
- " Rejected 5487\n",
- " UPlusPersonal Accept 2635\n",
- " Accepted 970\n",
- " Rejected 4361\n",
- " VisaGold Clicked 1777\n",
- " NoResponse 5538\n",
- " Outbound SMS AMEXPersonal Clicked 1002\n",
- " NoResponse 6775\n",
- " MasterCardGold Clicked 296\n",
- " NoResponse 6438\n",
- " MasterCardWorld Clicked 342\n",
- " NoResponse 5846\n",
- " UPlusFinGold Accepted 297\n",
- " Clicked 265\n",
- " NoResponse 7081\n",
- " Rejected 6645\n",
- " UPlusFinPersonal Accepted 311\n",
- " Rejected 6482\n",
- " UPlusGold Accepted 1463\n",
- " Rejected 5474\n",
- " UPlusPersonal Accept 5206\n",
- " Accepted 684\n",
- " Clicked 581\n",
- " NoResponse 4984\n",
- " Rejected 4578"
- ]
- },
- "execution_count": 7,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "df.groupby(['pyIssue', 'pyGroup', 'pyDirection', 'pyChannel', 'pyName', 'pyOutcome']).count()[[\n",
- " 'pxInteractionID']].rename(columns={'pxInteractionID':'Count'})"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "Use \"plot_daily_accept_rate\" to plot accept rate per day to understand how accept rates changed over time. To define accept rate, enter the positive (here: Accepted) and negative (here: Rejected) behaviour in the function. use kwargs to customize the graph. If the time ticks on the x axis are too many, shrink them using 'shrinkTicks'. If data is missing in certain days, force the graph make gaps for the missing days by setting 'allTime':True. you can also define hue"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "image/png": "",
- "text/plain": [
- "