diff --git a/examples/articles/GettingStarted.ipynb b/examples/articles/GettingStarted.ipynb index e7d0f4af..32c31a12 100644 --- a/examples/articles/GettingStarted.ipynb +++ b/examples/articles/GettingStarted.ipynb @@ -32,17 +32,11 @@ "If you do not have Python or no compatible version installed, `uv` will automatically install a compatible version.\n", "\n", "## Optional dependencies\n", - "As of V4 of pdstools, we have made a big effort to reduce the number of big and heavy core dependencies. This means that while initial installation is very fast, you may at some points run into import errors and will be required to install additional dependency groups. If using `uv`, these can be installed with the `--extra` argument. \n", + "As of V4 of pdstools, we have made a big effort to reduce the number of big and heavy core dependencies. This means that while initial installation is very fast, you may at some points run into import errors and will be required to install additional dependency groups. \n", "\n", - "For instance, to install the optional dependencies to use the Pega DX API client, you should run \n", - "\n", - "```bash\n", - "uv pip install pdstools --extra api\n", - "```\n", - "\n", - "The alternative (pip-compatible) syntax for optional dependencies is:\n", + "To install extra dependencies, you can put them in square brackets after a package name. So, for instance, to install pdstools alongside the optional dependencies for the Pega DX API client, you should run:\n", "```bash\n", - "pip install 'pdstools[api]'\n", + "uv pip install 'pdstools[api]'\n", "```\n", "\n", "For an overview of all optional dependencies and the dependency groups they will be installed for, see the table below:\n", diff --git a/examples/ih/Conversion_Reporting.ipynb b/examples/ih/Conversion_Reporting.ipynb new file mode 100644 index 00000000..077263fa --- /dev/null +++ b/examples/ih/Conversion_Reporting.ipynb @@ -0,0 +1,168 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pdstools import IH\n", + "\n", + "import plotly.io as pio\n", + "import plotly as plotly\n", + "\n", + "plotly.offline.init_notebook_mode()\n", + "pio.renderers.default = \"vscode\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Conversion Results\n", + "\n", + "Visualization of conversion modeling results from IH data." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "import polars as pl\n", + "\n", + "ih_export_file = Path(\n", + " \"./Data-pxStrategyResult_InteractionFiles_20241213T091932_GMT.zip\"\n", + ")\n", + "\n", + "if not ih_export_file.exists():\n", + " ih = IH.from_mock_data()\n", + "else:\n", + " ih = IH.from_ds_export(\n", + " ih_export_file,\n", + " query=pl.col.ExperimentGroup.is_not_null() & (pl.col.ExperimentGroup != \"\"),\n", + " )\n", + "\n", + "ih.aggregates.summary_success_rates(by=[\"ExperimentGroup\", \"Channel\"]).drop(\n", + " \"Outcomes\"\n", + ").collect().to_pandas().style.hide()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.overall_gauges(\n", + " metric=\"Conversion\",\n", + " condition=\"ExperimentGroup\",\n", + " by=\"Channel\",\n", + " reference_values={\"Web\": 0.055, \"Email\": 0.09},\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Detailed View \n", + "\n", + "Showing conversion rates for all actions." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.success_rates_tree_map(metric=\"Conversion\")\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Conversion Rate Trends\n", + "\n", + "side-by-side bars and lines (separate methods) with error bars" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.success_rates_trend_bar(\n", + " metric=\"Conversion\",\n", + " condition=\"ExperimentGroup\",\n", + " every=\"1w\",\n", + ")\n" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.success_rates_trend(metric=\"Conversion\", every=\"1d\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Engagement" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.overall_gauges(\n", + " condition=\"ExperimentGroup\",\n", + " by=\"Channel\",\n", + " reference_values={\"Web\": 0.20, \"Email\": 0.20},\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "ih.plot.success_rates_trend(\n", + " by=\"Channel\"\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": ".venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/examples/ih/Example_IH_Analysis.ipynb b/examples/ih/Example_IH_Analysis.ipynb index 49c257d7..30188e6e 100644 --- a/examples/ih/Example_IH_Analysis.ipynb +++ b/examples/ih/Example_IH_Analysis.ipynb @@ -2,961 +2,237 @@ "cells": [ { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "import pandas as pd\n", - "import sys\n", + "from pdstools import IH\n", + "from pdstools.utils import cdh_utils\n", "\n", - "from cdhtools.IHanalysis import *\n", - "from cdhtools.cdh_utils import readDSExport\n", + "import polars as pl\n", + "import plotly.io as pio\n", + "import plotly as plotly\n", "\n", - "import matplotlib.pyplot as plt\n", - "%matplotlib inline" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Importing: ../../data/Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\n" - ] - } - ], - "source": [ - "df_orig = readDSExport(\"Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\", path=\"../../data\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "df = initial_prep(df_orig, referenceTime='pxOutcomeTime')" + "plotly.offline.init_notebook_mode()\n", + "pio.renderers.default = \"vscode\"" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "At first, take a look into the IH dataframe, explore the columns, outcome types and business structure" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
pySubjectTypepxInteractionIDControlGroupValidityStartpyStagepyJourneyCustomerIDChannelSubGrouppyChannelpyCustomerSubSegmentpyStep...pyResponsepyCategoryControlGroupValidityEndpxDecisionTimepyLabelChannelGrouppyStrategyDateWeekOfYearWeek
0CDHSample-Data-Customer-3586780626931683381Customer-4118SMS...2021-01-27 13:22:05.810000+00:00U+ Personal CardInitializeModelsSmall2021-01-2741
1CDHSample-Data-Customer-3586780626931683381Customer-4118Web...2021-01-27 13:22:05.810000+00:00U+ Personal CardInitializeModelsSmall2021-01-2741
2CDHSample-Data-Customer-3586780626931683381Customer-4118Web...2021-01-27 13:22:05.810000+00:00Visa Gold CardInitializeModelsSmall2021-01-2741
3CDHSample-Data-Customer-3586780626931683381Customer-4118SMS...2021-01-27 13:22:05.810000+00:00MasterCard GoldInitializeModelsSmall2021-01-2741
4CDHSample-Data-Customer-3586780626931683381Customer-4118Web...2021-01-27 13:22:05.810000+00:00AMEXPersonalInitializeModelsSmall2021-01-2741
\n", - "

5 rows × 52 columns

\n", - "
" - ], - "text/plain": [ - " pySubjectType pxInteractionID ControlGroupValidityStart \\\n", - "0 CDHSample-Data-Customer -3586780626931683381 \n", - "1 CDHSample-Data-Customer -3586780626931683381 \n", - "2 CDHSample-Data-Customer -3586780626931683381 \n", - "3 CDHSample-Data-Customer -3586780626931683381 \n", - "4 CDHSample-Data-Customer -3586780626931683381 \n", - "\n", - " pyStage pyJourney CustomerID ChannelSubGroup pyChannel \\\n", - "0 Customer-4118 SMS \n", - "1 Customer-4118 Web \n", - "2 Customer-4118 Web \n", - "3 Customer-4118 SMS \n", - "4 Customer-4118 Web \n", - "\n", - " pyCustomerSubSegment pyStep ... pyResponse pyCategory \\\n", - "0 ... \n", - "1 ... \n", - "2 ... \n", - "3 ... \n", - "4 ... \n", - "\n", - " ControlGroupValidityEnd pxDecisionTime pyLabel \\\n", - "0 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n", - "1 2021-01-27 13:22:05.810000+00:00 U+ Personal Card \n", - "2 2021-01-27 13:22:05.810000+00:00 Visa Gold Card \n", - "3 2021-01-27 13:22:05.810000+00:00 MasterCard Gold \n", - "4 2021-01-27 13:22:05.810000+00:00 AMEXPersonal \n", - "\n", - " ChannelGroup pyStrategy Date WeekOfYear Week \n", - "0 InitializeModelsSmall 2021-01-27 4 1 \n", - "1 InitializeModelsSmall 2021-01-27 4 1 \n", - "2 InitializeModelsSmall 2021-01-27 4 1 \n", - "3 InitializeModelsSmall 2021-01-27 4 1 \n", - "4 InitializeModelsSmall 2021-01-27 4 1 \n", - "\n", - "[5 rows x 52 columns]" - ] - }, - "execution_count": 6, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.head()" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
Count
pyIssuepyGrouppyDirectionpyChannelpyNamepyOutcome
Churned5072
Loyal4928
SalesCreditCardsInboundWebAMEXPersonalClicked1487
NoResponse6331
UPlusFinGoldAccepted367
Rejected6468
UPlusFinPersonalAccepted367
Rejected6534
UPlusGoldAccepted1843
Clicked1204
NoResponse7004
Rejected5487
UPlusPersonalAccept2635
Accepted970
Rejected4361
VisaGoldClicked1777
NoResponse5538
OutboundSMSAMEXPersonalClicked1002
NoResponse6775
MasterCardGoldClicked296
NoResponse6438
MasterCardWorldClicked342
NoResponse5846
UPlusFinGoldAccepted297
Clicked265
NoResponse7081
Rejected6645
UPlusFinPersonalAccepted311
Rejected6482
UPlusGoldAccepted1463
Rejected5474
UPlusPersonalAccept5206
Accepted684
Clicked581
NoResponse4984
Rejected4578
\n", - "
" - ], - "text/plain": [ - " Count\n", - "pyIssue pyGroup pyDirection pyChannel pyName pyOutcome \n", - " Churned 5072\n", - " Loyal 4928\n", - "Sales CreditCards Inbound Web AMEXPersonal Clicked 1487\n", - " NoResponse 6331\n", - " UPlusFinGold Accepted 367\n", - " Rejected 6468\n", - " UPlusFinPersonal Accepted 367\n", - " Rejected 6534\n", - " UPlusGold Accepted 1843\n", - " Clicked 1204\n", - " NoResponse 7004\n", - " Rejected 5487\n", - " UPlusPersonal Accept 2635\n", - " Accepted 970\n", - " Rejected 4361\n", - " VisaGold Clicked 1777\n", - " NoResponse 5538\n", - " Outbound SMS AMEXPersonal Clicked 1002\n", - " NoResponse 6775\n", - " MasterCardGold Clicked 296\n", - " NoResponse 6438\n", - " MasterCardWorld Clicked 342\n", - " NoResponse 5846\n", - " UPlusFinGold Accepted 297\n", - " Clicked 265\n", - " NoResponse 7081\n", - " Rejected 6645\n", - " UPlusFinPersonal Accepted 311\n", - " Rejected 6482\n", - " UPlusGold Accepted 1463\n", - " Rejected 5474\n", - " UPlusPersonal Accept 5206\n", - " Accepted 684\n", - " Clicked 581\n", - " NoResponse 4984\n", - " Rejected 4578" - ] - }, - "execution_count": 7, - "metadata": {}, - "output_type": "execute_result" - } - ], - "source": [ - "df.groupby(['pyIssue', 'pyGroup', 'pyDirection', 'pyChannel', 'pyName', 'pyOutcome']).count()[[\n", - " 'pxInteractionID']].rename(columns={'pxInteractionID':'Count'})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Use \"plot_daily_accept_rate\" to plot accept rate per day to understand how accept rates changed over time. To define accept rate, enter the positive (here: Accepted) and negative (here: Rejected) behaviour in the function. use kwargs to customize the graph. If the time ticks on the x axis are too many, shrink them using 'shrinkTicks'. If data is missing in certain days, force the graph make gaps for the missing days by setting 'allTime':True. you can also define hue" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_daily_accept_rate(df, 'Accepted', 'Rejected', \n", - " **{'hue':['pyChannel'], 'allTime':True, 'shrinkTicks':True})" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "The above graph provides detailed metric behavior over time. Instead of Accept, you can use other outcome types. To get a rolled up view, plot the accept rate graph based on a weekly axis. The week values are calculated based on the starting date of the IH file" + "# Example IH Analysis\n", + "\n", + "Interaction History (IH) is a rich source of data at the level of individual interactions from Pega DSM applications. It contains the time of the interaction, the channel, the actions/treatments, the customer ID and is used to track different types of outcomes (decisions, sends, opens, clicks, etc). It does **not** contain customer attributes - only the IDs.\n", + "\n", + "This notebook gives some examples of data analysis on IH. Like most of PDSTools, it uses [plotly](https://plotly.com/python/) for visualization and [polars](https://docs.pola.rs/) (dataframe) but the purpose of this Notebook is more to serve example analyses than re-usable code, although of course we do try to provide some generic, re-usable functions. All of the analyses should be able to be replicated easily in other analytical BI environments - except perhaps the analysis of model performance / AUC.\n", + "\n", + "This notebook uses sample data shipped with PDStools. Replace it with your own actual IH data and modify the analyses as appropriate." ] }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_weekly_accept_rate(df, 'Accepted', 'Rejected', **{'showOutlier':True, 'hue':'pyDirection'})" + "ih = IH.from_ds_export(\n", + " \"../../data/Data-pxStrategyResult_pxInteractionHistory_20210101T010000_GMT.zip\"\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The above graphs provide insight into the accept rates on daily or weekly basis. ADM models however, take all time data every update cycle, to generate bubble charts. To view the historical cumulative accept rate, use the function below. If choosing a single model, this graph will be as if you had ADM success rate captured over time. Set 'showOutlier' to True to view outlier values" + "Preview of the raw IH data" ] }, { "cell_type": "code", - "execution_count": 10, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_daily_cumulative_accept_rate(df[df['pyName']=='UPlusPersonal'], 'Accepted', 'Rejected', \n", - " **{'allTime':True, 'shrinkTicks':True, 'showOutlier':True,\n", - " 'title':'Proposition: UPlusPersonal'})" + "ih.data.head().collect()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The above graph can be done in various granularity level. For example the below graph shows the cumulative accept rate over time across all the offers" + "The same interaction can occur multiple times: once when the first decision is made, then later when responses are captured (accepted, sent, clicked, etc.). For some of the analyses it makes more sense to group by interaction first. This is how that data looks like:" ] }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_daily_cumulative_accept_rate(df, 'Accepted', 'Rejected', \n", - " **{'allTime':True, 'shrinkTicks':True, 'showOutlier':True})" + "ih.aggregates._summary_interactions(by=[\"Channel\"]).head().collect()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "Below graph shows the cumulative accept rate per pyGroup, pyDirection and pyChannel" + "# Distribution Analysis\n", + "\n", + "A distribution of the offers (actions/treatments) is often the most obvious type of analysis. You can do an action distribution for specific outcomes (what is offered, what is accepted), view it conditionally (what got offered last month vs this month) - possibly with a delta view, or over time." ] }, { "cell_type": "code", - "execution_count": 12, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_daily_cumulative_accept_rate(df, 'Clicked', 'NoResponse', \n", - " **{'hue':['pyGroup', 'pyDirection', 'pyChannel'], \n", - " 'allTime':True, 'shrinkTicks':True})" - ] - }, - { - "cell_type": "markdown", + "execution_count": null, "metadata": {}, + "outputs": [], "source": [ - "In addition to accept rate, it is important to track other outcome values over time. Use 'daily' or 'weekly' to set the granularity of time axis. Instead of 'Accepted', other outcome labels can be explored over time" + "ih.plot.response_count_tree_map()\n" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_outcome_count_time(df, 'Accepted', 'weekly', **{'hue':'pyIssue', 'allTime':True, 'shrinkTicks':True})" + "ih.plot.action_distribution(query=pl.col.Outcome == \"Clicked\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "below graphs puts a couple of graphs together to provide better insight at the offer level to be able to compare the accept rate, accept count and total responses per model" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "Text(0.5, 1.0, 'Offers within Inbound direction')" - ] - }, - "execution_count": 14, - "metadata": {}, - "output_type": "execute_result" - }, - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "plot_df = get_accept_rate(df[df['pyDirection']=='Inbound'], 'Accepted', 'Rejected', 'pyName')\n", + "# Responses\n", "\n", - "fig, ax = plt.subplots(2,1,figsize=(13,9), sharex=True, gridspec_kw = {'hspace':0.05})\n", - "sort = plot_df.sort_values('Accept Rate (%)', ascending=False)['pyName'].tolist()\n", - "sns.barplot(x='pyName', y='Accept Rate (%)', data=plot_df, ax=ax[0], order=sort)\n", - "sns.barplot(x='pyName', y='Accepted', data=plot_df, ax=ax[1], order=sort)\n", - "sns.pointplot(x='pyName', y='Total', data=plot_df, ax=ax[1], order=sort)\n", - "for x in ax[1].get_xmajorticklabels():\n", - " x.set_rotation(90)\n", - "ax[0].set_xlabel('')\n", - "ax[1].text(2,2000,'The bars show the accepts\\nThe line shows accept+reject')\n", - "ax[0].set_ylabel('Accept Rate (%)', fontsize=13)\n", - "ax[1].set_ylabel('Accepts', fontsize=13)\n", - "ax[0].set_title('Offers within Inbound direction')" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Another insightful graph is to see what share of a given outcome label, each offer(or direction or channel) has. For example the below graph shows that of all the historical 'Accepted' labels, 'UPlusGold' proposition has a little over 50% of all the 'Accepted' outcomes. 'UPlusFinPersonal' has roughly 10% of all time Accepted outcomes. instead of proposition level, you can set other levels (channel, direction etc)." + "A simple view of the responses over time." ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_outcome_share_graph(df[df['pyChannel']=='Web'], 'Accepted', 'pyName', 'pyGroup')" + "ih.plot.response_counts(every=\"1d\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "It is also possible to see how the outcome share of a given proposition (or channel etc.) changed over time" + "Which could be viewed per channel as well:" ] }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "click_share_name_daily = get_outcome_share_time(df[df['pyChannel']=='Web'], 'Clicked', 'pyName', time='daily')\n", - "click_share_name_weekly = get_outcome_share_time(df[df['pyChannel']=='Web'], 'Clicked', 'pyName', time='weekly')" + "ih.plot.response_counts(\n", + " by=\"Channel\",\n", + " query=pl.col.Channel != \"\",\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The graph below shows among offer within Web channel, what share of Clicked outcome labels belonged to UPlusGold proposition every day. It can be seen that the value dropped significantly on 12-23" - ] - }, - { - "cell_type": "code", - "execution_count": 17, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "get_daily_graph(click_share_name_daily[click_share_name_daily['pyName']=='UPlusGold'], \n", - " 'Date', 'Clicked Share (%)', **{'shrinkTicks':True})" + "# Success Rates\n", + "\n", + "Success rates (accept rate, open rate, conversion rate) are interesting to track over time. In addition you may want to split by e.g. Channel, or contrast the rates for different experimental setups in an A-B testing set-up." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "same graph can be viewed on a weekly basis" + "Use \"plot_daily_accept_rate\" to plot accept rate per day to understand how accept rates changed over time. To define accept rate, enter the positive (here: Accepted) and negative (here: Rejected) behaviour in the function. use kwargs to customize the graph. If the time ticks on the x axis are too many, shrink them using 'shrinkTicks'. If data is missing in certain days, force the graph make gaps for the missing days by setting 'allTime':True. you can also define hue" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "get_daily_graph(click_share_name_weekly[click_share_name_weekly['pyName']=='UPlusGold'], \n", - " 'Week', 'Clicked Share (%)', **{'shrinkTicks':True})" + "ih.plot.success_rates_trend(\n", + " by=\"Channel\", query=pl.col.Channel.is_not_null() & (pl.col.Channel != \"\")\n", + ")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The graph below shows the Accepted share between two directions: Inbound/Outbound. Of course in this case because there are only 2 directions, when one graph goes up, the other has to go down so the sum of the two per day would be 100%" + "# Model Performance\n", + "\n", + "Similar to Success Rates: typically viewed over time, likely split by channel, conditioned on variations, e.g. NB vs AGB models." ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ - "click_share_direction_daily = get_outcome_share_time(df, 'Accepted', 'pyDirection', time='daily')" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAtAAAAFwCAYAAACRo0zvAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMSwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/YYfK9AAAACXBIWXMAAAsTAAALEwEAmpwYAAA3wUlEQVR4nO3deZiddX338fd39kxmsk+CGGKi7ChQjD5VsOJaFypqtWhFQVSU1oUqpWrbp1qLtWJb69qHx+US6/qouFaKG611D4qgQBExxKCQBZLMJJn9+/xxzpk5M5nlnGTmzJmZ9+u65jrn/O5z3+d7Mt7kk5/f+3dHZiJJkiSpMg1zXYAkSZI0nxigJUmSpCoYoCVJkqQqGKAlSZKkKhigJUmSpCoYoCVJkqQqNM11AdVas2ZNbty4ca7LkCRJ0gJ3ww037MrMrvHj8y5Ab9y4kS1btsx1GZIkSVrgIuKuicZt4ZAkSZKqYICWJEmSqmCAliRJkqow73qgJUmSFrOBgQG2b99Ob2/vXJeyYLS1tbF+/Xqam5srer8BWpIkaR7Zvn07nZ2dbNy4kYiY63Lmvcxk9+7dbN++nU2bNlW0jy0ckiRJ80hvby+rV682PM+QiGD16tVVzegboCVJkuYZw/PMqvbP0wAtSZKkqnR0dEz7no0bN7Jr164aVFNw/fXXc84559TkswzQkiRJUhUM0FrQhoeTvsEhevoGuX9/Pzv29bL9/gP8atd+fnFvNz//zV5u/PUefrT1Pr57xy5+cOdu7tjRw54D/WTmXJcvSVJdu/766zn77LN5znOew4knnsgLXvCCMX9/vv3tb+dhD3sYj3zkI7njjjsA2Lp1K49//OM59dRTecITnsC2bdsAuPDCC/nMZz4zsm9plnuqz7j22ms58cQTOeOMM/jc5z5Xq6/tKhyqTmYyOJwMDiX9Q8MMlH4Gx70eyrLnw/QPjn09dnvSPzh2W//QMINlzwcGx24bGBpmsHiM/rIayl8PDhVqPVxNDcHqjhZWL21lTWcra5a2sKazldVLW1jT0crqjsJj6Xlzo/8elSQtPj/5yU/4+c9/ztFHH82ZZ57Jd77zHc466ywAli9fzs0338zVV1/NpZdeype//GVe9apXccEFF3DBBRfwoQ99iFe/+tV8/vOfr/ozNm/ezMte9jK++c1vcuyxx3LeeefV4NsWGKDrwPBwefhMBkdCYDEQDk4cOkfD6RTbyoLlQNlxB0deZ1k4Lb4eH15HAmzh9WxpbgyaGxtoaghamhpobiz9FMZHx4KO1qax20rvbYpD9hvdHjQ3TbatMDYwlOze38eunn529fSxu6fwfHdPH7/c0cPOnj76Byf+M1i+pJk1HS2s7milqyxgl0J4V+doGF/a0ugFIJKkI/bmL/2cW36zb0aPefLRy/ibPzil4vc/8pGPZP369QCcfvrpbN26dSRAP//5zx95/LM/+zMAvve9743MFr/whS/k8ssvP6zP6OjoYNOmTRx33HEAnH/++Vx11VUV130kDNAVuO2efXz9lnvpLwue082Mjp+dHT8zWh6Yh45glnQqEdBSCohNhYDY1FAKomMDZGtzAx1tTcXt5SG0gZbSe5saaG4oe16+rTjWUvyM0ue1lB+neNymxkO3NTfGvAiUmUlP3yC7iwF7NGgXH/f3sau7n1vv2ceu7j729Q5OeJzWpobi7PXY2ezVZWOl8ZXtLTQ21P+fjSRpcWptbR153tjYyODg6N995X+3T/f3fFNTE8PDhUmq4eFh+vv7K/qMuWCArsAtv9nHO667HWBM8BwJkE2FWdOJZklbysLlaPgcN2taGmsYO0taPjNa2H/0+Zhtxc9tahi7zdA18yKCzrZmOtua2bhm6bTv7x8c5r79pbA9OptdCt07e/r47d5ebr57L7v390/4j6mGgFVLx85ml56Pn+le09FKW3PjbHx1SVIdqmameC586lOf4vWvfz2f+tSneNSjHgXAox/9aD75yU/ywhe+kI997GM85jGPAQqrdtxwww380R/9EV/84hcZGBiY8tgnnngiW7du5Ze//CUPechD+MQnPjHr36fEAF2BZ5x2NOecevS8mSVV/WhpauCo5W0ctbxt2vcODyd7Dw6we38fO7v7i7PZfeze3z+mpeTG+/awu6eP/f1DEx6no7VppJVk9LFstrvYy71maSvLljT5v2lJ0qy5//77OfXUU2ltbR0JuO9+97t58YtfzJVXXklXVxcf/vCHAXjZy17Gueeey2mnncZTnvIUli6deqKqra2Nq666iqc//em0t7fzmMc8hu7u7ln/TgAx31Ya2Lx5c27ZsmWuy5Dm3MH+oZGZ7dEWkn52lkJ3d6GlZHdPP/cd6GeiU725MVi9dOwsdtckLSWrlnqhpCTVg1tvvZWTTjpprstYcCb6c42IGzJz8/j3OgMtzVNLWho5ZlU7x6xqn/a9g0PD3H9gYEzYPqSlZH8/v7i3m109/ZNeLLqivXncLPbYoF0euNu9UFKStEAZoKVFoKmxga7OVro6W6d9b2bSXXah5O6ePnaO693e3dPPrb/Zx66eyS+UbGtuGAnXXSPLAU68LODK9hYa7NmXJM0TBmhJY0QEy9qaWdbWzKYKLpTsGxwqXCjZ3c+u8r7tkf7tPu7e08tPt+/lvikvlCxfgaQwm7267HUpjK9e2uKFkpKkOWWAlnREWpsaecDyJTxg+ZJp31u6UHLsEoDFVpKyiyfv2raf3T39HJjkQsnO1qZJbmpzaEvJsjYvlJQkzSwDtKSaaWgIVi5tYeXSFo5bN/37D/QPjiz3t3tc4C61k/xyZw8/3NrP/ZNcKNnS2FBY/m9kFZJWVi1tZkV7C8uWNLNiSTMr2ptZvqSZFUtaWL6kmc62JltKJEmTMkBLqlvtLU20r2qq+ELJ+w4UWkkKd5PsmyB89/M/93Rz/4F+egcmv6tmROHOksuLAXvZkkLgXlEaa28uC98tI2PLlzTbXiJJi4ABWtKC0NTYwNrONtZ2Tr/mNkDvwBD7Dg6w9+AAew4OsOdA8fmBfvaNHzs4wPb7D7LnQD97Dw4w1c1DW5saxsxoLyuG60PC97hA3tnW7M2PJM0r27dv50//9E+55ZZbGB4e5pxzzuHKK6+kpaVl0n3e+ta38sY3vhGArVu3cs455/Czn/2sViXzpje9iY6ODi677LIjOo4BWtKi1NbcSFtzI2uXVRa4S4aHk57+QfaOBO4B9hzsH3m+b9zY9vsPcMtvCiF8sp5uKMx6d7Y2sby9ELzLZ7mXj4Tw0UBePtbW3GCft6Saykye/exnc8kll/CFL3yBoaEhLr74Yv7yL/+SK6+8ctL9ygP0fGaAlqQqNDSMrlJyTJX79g8Os/fgAHvLAvdoCC+F7/6RWe+77z848nyi1UtKWhobWN4+2nIyGr5H20sODeQtLGtrosmb40g6DN/85jdpa2vjxS9+MQCNjY388z//M5s2bWLTpk3ccsstvOc97wHgnHPO4bLLLuPaa6/l4MGDnH766ZxyyilcccUVDA4O8oIXvIAf//jHnHLKKVx99dW0t7fzjW98g8suu4zBwUEe8YhH8P73v5/W1lY2btzIli1bWLNmDVu2bOGyyy7j+uuv501vehPbtm3jzjvvZNu2bVx66aW8+tWvBuCKK67gIx/5CGvXruWYY47h4Q9/+BF/fwO0JNVIS1Pl63GXy0z29w+x50D/6Cz3wbEz4PvKAvlv9vRy62+72XtwgJ6+idfpLinNek86yz0+kBfbUbxRjrS4/fznPz8kiC5btowNGzYwODjxf3fe9ra38Z73vIcbb7wRKLRw/M///A8f/OAHOfPMM7nooot43/vexytf+UouvPBCvvGNb3D88cfzohe9iPe///1ceumlU9Z022238a1vfYvu7m5OOOEELrnkEm666SY++clPcuONNzI4OMgZZ5xhgJakxSAi6GhtoqO1ifUrq9t3YGh4TODeW2ovOTB+rPD8tr372HtwkL0H+xkYmnzWu7kxRi60XF7W0z2+xaSwvaXsebO3hJdm0ldfD/fcPLPHPOph8NS3zewxJ3HMMcdw5plnAnD++efzrne9iyc96Uls2rSJ448/HoALLriA9773vdMG6Kc//em0trbS2trK2rVruffee/n2t7/Ns571LNrbCxejP+MZz5iRug3QkrSANTc2FG9KU/2s94H+oTGtJuWtJ+Xhe+/BAXZ093L7vYVZ7+5J7k5ZsrSlcWT1kjFBu31s28n47R2trukt1YuTTz6Zz3zmM2PG9u3bx7Zt21ixYgXDw6MrHfX29k56nPHn9HTneFNT08ixxx+3tXX0v3ONjY2TzoTPhJoF6IjYCnQDQ8BgZm4ujr8K+NPi+Fcy8/Ja1SRJmlhEsLS1iaWtTRy9Yvqb5JQbHBqmu3ewuJJJfzF8l7WcjAvkd+zoGdnWPzT58oKNDTHSVlLe810I32NXNSms591MR1vTyOy9q5xoQarRTPF4T3jCE3j961/P1VdfzYte9CKGhoZ43etex4UXXsiDH/xg/vVf/5Xh4WHuvvtufvjDH47s19zczMDAAM3NzQBs27aN733vezzqUY/i4x//OGeddRYnnHACW7du5Y477uDYY4/lox/9KI997GMB2LhxIzfccANPfepT+exnPzttnb/3e7/HhRdeyBve8AYGBwf50pe+xMtf/vIj/v61noF+XGbuKr2IiMcB5wKnZWZfRKytcT2SpBnW1NgwcsMcmP528CWZSe/AcPHCybI2k5HlBMdefHnf/n7u3LmfvQcH2Nc7MOGNdMq1tzQWwnRZqC697hwZbx59Pf69xeetTa56IkUE11xzDX/yJ3/CW97yFoaHh3na057GW9/6VlpaWti0aRMnn3wyJ510EmecccbIfhdffDGnnnoqZ5xxBldccQUnnHAC733ve7nooos4+eSTueSSS2hra+PDH/4wz33uc0cuInzFK14BwN/8zd/wkpe8hL/+67/m7LPPnrbOM844g/POO4/TTjuNtWvX8ohHPGJmvn9O91+cGVKcgd48LkB/GrgqM79e6XE2b96cW7ZsmYUKJUnz1dBw0t07MCZg9/QN0tM7SHfxsaevMNbdOziyrfR6f3/hcarVTkqaG6MsUDeXhe/pwnkTnWUhvb250Tte6rDceuutnHTSSXNdxoIz0Z9rRNxQ6pooV8sZ6ASui4gE/k9mXgUcDzwmIq4AeoHLMvNHNaxJkrQANDZE4ULG9hYetPrwjpGZ9A0OjwnY3X0DI0G7PHzvHxfOd3b38atd+4vbB6a802VJBHS0FNpkxgbsyWfGO1obC8/L39vW5IWZUo3VMkCflZl3F9s0vhYRtxU/fxXwu8AjgE9HxINz3LR4RFwMXAywYcOGGpYsSVosImLkBjvVLjU43sDQMPvHBe7uslnvqWbG793XO7q9b3Da1hQo3AGzc4pZ8I7W5inC+ejjkmaXJ5QqUbMAnZl3Fx93RMQ1wCOB7cDnioH5hxExDKwBdo7b9yrgKii0cNSqZkmSDkdzY8PIjPiRGB5ODg4MTdB6MjBtOP/Nnt6ymfOBKZclLGkIirPbzWPD+DR94Z1jZsm9aFMLX00CdEQsBRoys7v4/MnA3wI9wOOAb0XE8UALsGvyI0mStHg0NIyuhrJu2ZEdq29wiJ7eQfb3DU3amlLeF16aGd9TvCV9afv+KW5JX86LNmdXZvrnMoOqvSawVjPQ64Brir/oJuDjmXltRLQAH4qInwH9wAXj2zckSdKRa21qpLWjkdUdR3acoeFkf//4sF3ejjI06UWb2/YfGH3dd+QXbXa2NdHV2crazrbiYytrl7WypqN1QfeFt7W1sXv3blavXm2IngGZye7du2lra6t4n5oE6My8EzhtgvF+4Pxa1CBJko5cY0OwrK2ZZW3NR3Sc0rKFpTA91UWbY1ZM6Ru9aHPPgX7uPzBwyLEjYFV7SyFUL2ujq6MQrNcWw3bpeVdnK+0t8++ecuvXr2f79u3s3Llz+jerIm1tbaxfv77i98+//9VIkqR5LyJY0tLIkpYju2izf3CY3fv72LGvjx3dfezo7h15vrO7j53dvfzi3m52dvcxOMGMd0dr00iYXrusbfT5uLC9fElz3cz2Njc3s2nTprkuY1EzQEuSpHmrpamBByxfwgOWT33HzOHh5P4D/SPBujxs7yw+v3n7HnZ093Fggj7vlsYGusrD9bJiwB4ZK4Tt1UtbaFrA7SMqMEBLkqQFr6EhWN3RyuqOVk56wNTv7ekbZMe+3mLIHg3YO4sz21t37+eHW+9jzyTtI6uXjobs0faRtjHBu6uzlbbmxln6tpptBmhJkqQyHa1NdHR18OCuqa+47BscYldP/9iwva+XnT2jLSW3/nYfu3r6J7xgsrOtacK+7PKw3dXZxrK2prppH1GBAVqSJOkwtDY18sAVS3jgiqnbR4aGk/v294/MZI/MapcF759s28OO7t4J72LZ2tQwOptdFrbXdrbRVTbLvXppq+tv14gBWpIkaRY1NsRI//TJTL6gd2bS3TdYnL3uLV4EWezXLobtO3b28N1f7mJf7+CEn7N6actom0jZ6iNd42a5W5tsHzkSBmhJkqQ6EDG6ROCxa6duH+kdGBoJ1zuLs9rlF0Teu6+Xm+/ey+6ePiZabnv5kuYJL4bsGtdS0tFq+8hEDNCSJEnzTFtzI8esaueYVe1Tvm9oONndM/ZiyPIl/3Z29/Gjrfexo7uP/sFD20eWNDcesvLI6OvRWe5V7S00LKL2EQO0JEnSAtXYEIX1rZdNfZe9zGTfwcGxPdrjwvZt93Tz7V/sonuC9pGmhmDNmJaRYttIWdhe21m4S2RL0/xf5s8ALUmStMhFBMvbm1ne3sxx6zqnfO/B/qExF0SW+rNLLSV37+nlxl/vYff+fnKC9pGV7c0jbSKH3Iq9LGwvba3fmFq/lUmSJKnuLGlpZMPqdjasnrp9ZGBomN09/ePC9tiVSO7cuZ8d3b0MDB2atJcW71J5/LpOrnrR5tn6OofFAC1JkqQZ19zYwFHL2zhqeRuwfNL3ZSZ7DgyM6csuD9tL6vCGMwZoSZIkzZmIYOXSFlYubeGEo6ZuH6kX87+LW5IkSaohA7QkSZJUBQO0JEmSVAUDtCRJklQFA7QkSZJUBQO0JEmSVAUDtCRJklQFA7QkSZJUBQO0JEmSVAUDtCRJklQFA7QkSZJUBQO0JEmSVIWaBeiI2BoRN0fEjRGxZdy210VERsSaWtUjSZIkHY6mGn/e4zJzV/lARBwDPBnYVuNaJEmSpKrVQwvHPwOXAznXhUiSJEnTqWWATuC6iLghIi4GiIhzgbsz86c1rEOSJEk6bLVs4TgrM++OiLXA1yLiNuCNFNo3plQM3BcDbNiwYXarlCRJkqZQsxnozLy7+LgDuAZ4LLAJ+GlEbAXWAz+OiKMm2PeqzNycmZu7urpqVbIkSZJ0iJoE6IhYGhGdpecUZp1/lJlrM3NjZm4EtgNnZOY9tahJkiRJOhy1auFYB1wTEaXP/HhmXlujz5YkSZJmTE0CdGbeCZw2zXs21qIWSZIk6UhUFKAjYiXwO8Aq4D7gxsy8bzYLkyRJkurRlAE6Ip4FXAqcBewH9gHLgPaI+A7wzsy8ZraLlCRJkurFpBcRRsR/ApcBHwUenJnLMnN9Zi4DHgxcDVxWfJ8kSZK0KEw1A/3WzPyPiTZk5jbgg8AHI2LadZwlSZKkhWLSGejJwvME77tu5sqRJEmS6ltVq3BERAPwYgoXFN4B/J/MPDgbhUmSJEn1qNobqVwJPA64GXgM8G8zXpEkSZJUx6ZbheMZmfnFsqGHZ+bZxW0fBHbOYm2SJElS3ZluBvoVEfHxiFhVfP2riPibiHgS8A/ATbNbniRJklRfpgzQmfk04D+A70bEecCrgVYKa0M3A8+b7QIlSZKkejLtRYSZ+ZGI+A/gPcAfA6/IzN/OemWSJElSHZr2IsKICGAwM59D4aYq34yIi2a9MkmSJKkOTRmgI+JcYAdwc0RsA34DPBp4bER8LSIeVIMaJUmSpLox3Qz0u4CnZOYDgOcAb8vM+zPzAuCfgH+f7QIlSZKkejJdgG6lMOsM8NviawAy86vA/5qluiRJkqS6NN1FhG8GboqI24FNwJ+Ub8zMntkqTJIkSapHUwbozHx/RHwWeBBwR2beX5uyJEmSpPpUyTJ2OyhcSChJkiQtepP2QEfERyJi41Q7R8TGiPjIjFclSZIk1ampZqC/B/wgIm4CvgbcAuwDlgEnA08CTgX+eraLlCRJkurFpAE6M/81Iq4GzgeeCbwWWAncD/wE+AxwbmYeqEGdkiRJUl2Y7iLCA8BVxR9JkiRp0Zv2Vt6SJEmSRhmgJUmSpCoYoCVJkqQqTLsO9EyJiK1ANzAEDGbm5oi4EvgDoB/4JfDizNxTq5okSZKkatV6BvpxmXl6Zm4uvv4a8NDMPBW4HXhDjeuRJEmSqlJRgI6Ixoj4q4j4RUTsLY79fkS84kg+PDOvy8zB4svvA+uP5HiSJEnSbKt0BvotwDOAvwCyOHY78PIqPiuB6yLihoi4eILtFwFfreJ4kiRJUs1V2gP9x8CjMvO3EfGB4thWYGMVn3VWZt4dEWuBr0XEbZn5XwAR8ZfAIPCxiXYsBu6LATZs2FDFR0qSJEkzq9IZ6HZgx7ixFqC30g/KzLuLjzuAa4BHAkTEhcA5wAsyMyfZ96rM3JyZm7u6uir9SEmSJGnGVRqgfwy8eNzYHwM/rGTniFgaEZ2l58CTgZ9FxFOAy4FneEtwSZIkzQeVtnBcBlwfEc8D2iPiS8Bm4HEV7r8OuCYiSp/58cy8NiLuAFoptHQAfD8zj+jCREmSJGk2VRSgM/NnEXES8CLgNuAu4KWZeW+F+98JnDbB+LFV1CpJkiTNuWkDdEQ0AT8BHpGZ/zj7JUmSJEn1a9oe6OI6zSsYXb5OkiRJWrQqvYjwX4ArirPRkiRJ0qJVaSB+OYU1ny+JiN8Cw6UNmXn8LNQlSZIk1aVKA/TfzWoVkiRJ0jxR6SocH5ntQiRJkqT5oOKe5ohYDTwC6AKiNJ6ZV89CXZIkSVJdqihAR8QTgc8C/RRW5NhTfPwVYICWJEnSolHpKhxvA/42M7uAnuLjW4B/nbXKJEmSpDpUaYA+Dnhn8XmpfeMfgEtnuB5JkiSprlUaoA8ArcXnuyNiA9ACrJyVqiRJkqQ6VWmA/i7wzOLzrwJfBL4OfG8WapIkSZLqVqWrcJzPaNi+DHgd0An802wUJUmSJNWrSteBPlj2vBe4YtYqkiRJkupYNetAPwrYTGHmeURmvnWmi5IkSZLqVaXrQL8JeCNwI7C/bFMCBmhJkiQtGpXOQL8CeExm/mA2i5EkSZLqXaWrcATwo9ksRJIkSZoPKg3QHwBeMpuFSJIkSfPBpC0cEfE1Cj3OUAjafx4RrwZ+W/6+zHzy7JUnSZIk1ZepeqD/e9zrb89mIZIkSdJ8MGmAzsw317IQSZIkaT6YchWOiGgCIjMHysYuBE4H/iszPzer1UmSJEl1ZrqLCD8FvLj0IiL+CrgKOAv4WES8dBZrkyRJkurOdAF6M/DlstevAl6amZuB84FLZqswSZIkqR5NF6BXZuZvACLiJGA58Onits8DGyv9oIjYGhE3R8SNEbGlOLYqIr4WEb8oPq6s+htIkiRJNTRdgN4fER3F55uBn2Vmb/F1UPmdDEsel5mnF2ewAV4PfCMzjwO+UXwtSZIk1a3pAvS3gbdExInAy4Fry7adwLg1oQ/DucBHis8/AjzzCI8nSZIkzarpAvRfAE8BbgGWAf9Utu0FHLpW9FQSuC4iboiIi4tj6zKzFMLvAdZVcTxJkiSp5qZswcjMXwEnRcSqzLxv3Oa3A/1VfNZZmXl3RKwFvhYRt437rIyInGjHYuC+GGDDhg1VfKQkSZI0s6abgQZggvBMZu7JzAOVflBm3l183AFcAzwSuDciHgBQfNwxyb5XZebmzNzc1dVV6UdKkiRJM66iAH2kImJpRHSWngNPBn4GfBG4oPi2C4Av1KIeSZIk6XBVu4rG4VoHXBMRpc/8eGZeGxE/Aj4dES8B7gL+qEb1SJIkSYelJgE6M+8ETptgfDfwhFrUIEmSJM2EmrRwSJIkSQvFpDPQEfEtCkvPTSkzHz+jFUmSJEl1bKoWjq+XPV8DvIzC7bt/ReEW3s8E/u8s1SVJkiTVpUkDdGZeUXoeEV8EnpWZXysbeyLwmtktT5IkSaovlfZAnw18Y9zYt4DHzmg1kiRJUp2rNED/Gjhv3NhzgO0zW44kSZJU3ypdxu5y4LMR8QpgK4Ue6P9FIURLkiRJi0alt/L+CnAK8B/AfuA64JTM/PIs1iZJkiTVnYpvpJKZvwTeOou1SJIkSXWv4hupRMT5EXFdRNxUfP17EfHs2StNkiRJqj8VBeiIeC3wZuBaYENxeCeF3mhJkiRp0ah0BvoS4KmZ+U+M3p3wduDYWalKkiRJqlOVBuhVmXl78XkpQAcV3OpbkiRJWkgqDdC3RMQ548aeAvx0huuRJEmS6lqlq3C8EfhKRHwaaI2IdwPPA8aHakmSJGlBq3Qd6G8DvwscpHAL7wbg7Mz8wSzWJkmSJNWdimagI2JjZt4CvGrc+IMy865ZqUySJEmqQ5X2QN80yfhPZqoQSZIkaT6oNEDHIQMRzbgKhyRJkhaZKVs4IuJrFEJya0RcN27zBuDHs1WYJEmSVI+m64H+7+LjY4HvlI0PA/cA/282ipIkSZLq1ZQBOjPfDBARt2bmp2tTkiRJklS/KlqFIzM/HREdFNZ9Xg9sB76Smd2zWZwkSZJUbypdxm4z8O8U1oHeRqH/+V0R8bTM3DKL9UmSJEl1pdJVON4H/GNmPigzH5OZDwLeAbx/9kqTJEmS6k+lAfok4B/Hjf0TcGI1HxYRjRHxk4j4cvH1EyLixxFxY0T8d0QcW83xJEmSpFqrNEDfCDx03NjDiuPVeA1wa9nr9wMvyMzTgY8Df1Xl8SRJkqSaqqgHGrgO+HJEfAC4C9gIXARcFRF/XHpTZn58sgNExHrg6cAVwGtLuwDLis+XA7+ppnhJkiSp1ioN0BcBA8AFZWODxfGSpDCLPJl3ApcDnWVjLwX+PSIOAvuA362wHkmSJGlOVLqM3aYj+ZCIOAfYkZk3RMTZZZv+DHhaZv4gIv6cQl/1SyfY/2LgYoANGzYcSSmSJEnSEam0BxqAiDg6Ig5nlvhM4BkRsRX4JPD4iPgKcFpm/qD4nk8Bj55o58y8KjM3Z+bmrq6uw/h4SZIkaWZUFKAjYm1EfJ3CDVS+Xhw7LyLeV8n+mfmGzFyfmRuB5wHfBM4FlkfE8cW3PYmxFxhKkiRJdafSHuh3Ab8CuoA7imPfBP7ucD84Mwcj4mXAZyNiGLifsT3VkiRJUt2pNEA/DnhQZvZGRAJk5s6IWFvtB2bm9cD1xefXANdUewxJkiRprlTaA93HuLAdEauA+2a8IkmSJKmOVRqgrwP+MSKay8beDHxl5kuSJEmS6lelLRyXA5+n0KfcFhF7gJ9SuBBQkiRJWjQqXQf6PuD3IuLhwCYKdyPckpk5m8VJkiRJ9aaiAB0RK4H+zLwBuKE4tjQimjNzzyzWJ0mSJNWVSnugvwicMm7socAXZrYcSZIkqb5VGqBPAbaMG9sCPGxmy5EkSZLqW6UBuhdoHze2FBiY2XIkSZKk+lZpgP5v4K0R0QAQEQH8LfCd2SpMkiRJqkeVLmP35xRu3f2HEXEnhZU4+oHHz1ZhkiRJUj2qdBm7uyLiocA5wEZgK/CVzDwwe6VJkiRJ9afSGWgy8yDw/2axFkmSJKnuVdQDHRHXRcTjx409ISK+OjtlSZIkSfWp0osIzwD+a9zYt4FHzGw5kiRJUn2rNEAPA83jxhqBmNlyJEmSpPpWaYC+AXjVuLFXAj+e2XIkSZKk+lbpRYR/AVwfEX8I3A4cB5wAnD1LdUmSJEl1qaIZ6My8CTgZ+AywD/gscHJm/nQWa5MkSZLqTjXL2N0DXFl6HRGnRMQbMvPVs1KZJEmSVIcq7YEGICJaI+JFEfEd4GYKq3NIkiRJi0ZFM9ARcTLwcuB8oJ1C8H5KZl43i7VJkiRJdWfKGeiIeGFEfBv4GfBY4E3AA4H7gBtnuzhJkiSp3kw3A/0RYDfw9MwcuetghMs/S5IkaXGargf6r4Ee4PMRcU1E/EFEVNU3LUmSJC0kU4bhzLwCeDDwTCApLF93N7ACOHqWa5MkSZLqzrSzyVnw1cx8NvAg4H3APcCPIuLT1XxYRDRGxE8i4svF1xERV0TE7RFxa0S4JJ4kSZLqWlXtGJn528x8C4VZ6XOBlio/7zXArWWvLwSOAU7MzJOAT1Z5PEmSJKmmDqufuTgr/e+Z+cxK94mI9cDTgQ+UDV8C/G1mDhePu+Nw6pEkSZJqpZYXBL4TuBwYLht7CHBeRGyJiK9GxHE1rEeSJEmqWk0CdEScA+zIzBvGbWoFejNzM/B/gQ9Nsv/FxZC9ZefOnbNcrSRJkjS5Ws1Anwk8IyK2UuhzfnxE/BuwHfhc8T3XAKdOtHNmXpWZmzNzc1dXVy3qlSRJkiZUkwCdmW/IzPWZuRF4HvDNzDwf+DzwuOLbHgvcXot6JEmSpMM13Z0IZ9vbgI9FxJ9RuGHLS+e4HkmSJGlKNQ/QmXk9cH3x+R4KK3NIkiRJ84K35ZYkSZKqYICWJEmSqmCAliRJkqpggJYkSZKqYICWJEmSqmCAliRJkqpggJYkSZKqYICWJEmSqmCAliRJkqpggJYkSZKqYICWJEmSqmCAliRJkqrQNNcFSJIkaZEb6IWee0d/uu8ZfWxeAk+7cq4rHMMALUmSpJmXCX37oGfH2EDccw90jwvLvXsO3T8aYGkXrD255qVPxwAtSZKkyg0Pw4HdZUH4nmIw3jFu7F4YPHjo/o2t0LkOOo6CNcfBxsdAx7rRsdLj0jXQ0Fj771cBA7QkSZJgsH/yNoqRxx2wfwcMDx66f+vyYvhdBw/cDJ1HFYNx+eNaaFsBETX/ejPJAC1JkrSQ9XWXtVGMmyEuD8sH75tg5yjMBJdmhtc9dNxMcdlPS3vNv9pcMUBLkiTNN5lw4L6y9ol7J2+jGNh/6P6NLaPBd9WDYcOjJmmj6IJG4+J4/olIkiTVi6GBcSF4knaKnh0wPHDo/i2do+H3AafD8RO1UayDJSvnfRvFXDJAS5Ikzbb+/cUAfO8EbRT3jLZYHNgN5KH7t68ZnSHuOnHiNorOo6Blac2/2mJkgJYkSTocmXDw/okvtBsfkvu7D92/oWk0/K7YAOsfMXEbRcdaaGyu/ffTpAzQkiRJ5YYGYf/OCtoo7oWh/kP3b146Gn7XPRSOfeIkbRSroMGbQs9HBmhJkrQ4DBycfIa4557RFov9O5mwjWLJytGZ4dXHjp0h7jxqdFtrZ82/mmrLAC1JkuavTOjdO/kMcXdZMO7be+j+0VgIwB3rYNkD4egzxrVRFGeLO9ZCU2vtv5/qkgFakiTNjczCxXX9PYW1ivv2FR/Lf8aPlb23d09hNnmw99BjNy0ZDcFdJ8KDzy5rnyi7+K59dd3e7U71q6YBOiIagS3A3Zl5Ttn4u4CLMrOjlvVIkqTDMDRYuChuyrA7VSgu/vR3Qw5P/3mNrYW2iJGfZYXZ4rUnFWePjzr04rvWZS7TpllT6xno1wC3AstKAxGxGVhZ4zokSVpcMgsztZOG3cmC7rjx/h4YOFDZZ7aUAm/HaPgthdsxgbhzdKylY9xYh60Tqjs1C9ARsR54OnAF8NriWCNwJfDHwLNqVYskSfPG8FBZi0MVrQ4TtUUMD07/eQ1Nh4bajrWw+iGj4y3jw2/noaG4pcMVJrRg1XIG+p3A5UD5pamvBL6Ymb8N/28WSdJCMthfWdidsv+3uL0Sze2HBtgVD5og6E4Qdst/mtpsfZCmUZMAHRHnADsy84aIOLs4djTwXODsCva/GLgYYMOGDbNWpyRpkStd1DZl2K2w1WGi9YEPEYeG2bYVsPyYSYJux8Tht6UTGl0XQKqVyJxgncOZ/pCIvwdeCAwCbRR6oPuKP6VLZzcAd2bmsVMda/Pmzblly5ZZrFaSNC8N9sHBPYU7w/XuOYxVHY7worYxIXeqmd6yseZ2Z3ulOhYRN2Tm5vHjNfnnama+AXhDsZCzgcvKV+EojvdMF54lSQvc8HAh4JZC8MH7iz97xo3tOXSskgvbJryo7SgvapNUFf//HknSzBvorSAETzDWu3fqGeCmtsLd4JasLLQ6rNwIS1YUx1YUxkrPW5d7UZukWVHzAJ2Z1wPXTzDuGtCSVE+GhwqBdtLAu2fyYDzRjS1GxGjoLQXeVZvGBuNSCB4ztgKal8zqV5akSjgDLUkLWSYMHKx+Jvjg/dC7D5jiOpnm9nEh+MGHBuNDQvDKQhuEM8GS5jEDtCTNB6XZ4GpD8ME9MNQ3+XGjYWy4bV8Dq48dF4JXThyM7QOWtEgZoCWpVjILF7qVh92KQvBe6Ns79bFbOsaG2zXHVRCCVxZ6g10FQpKqYoCWpGoNDY5bDaKSFSOK24YHJj9uQ9PYcNuxDrpOnD4Ety2HppbZ/taSpCIDtKTFKbNwU4wJZ4InGivOBB+8v7BW8FRal41e9LZkBaw9cfq+4CUrCrPIzgZLUt0zQEtaGEp3kNu/A/bvgp4dsH/n6E9PcXz/TjiwuxCMhwcnP15D89hZ32UPhHUPnT4Ety2HxuZafGNJ0hwxQEuqX8NDhRnfCcPwzmIgLj7v2QmDByc+TttyWLoWlnZB1wnQvnrqELxkpXeIkyRNygAtqbYGeseG4TGzw2VheP9OOLBr4ptqRGMhDC/tgo6uwqoRI6/Xjj4v/dgfLEmaQQZoSUcms7C82iGzwxOF412F2zRPpHlpIQwv7SrcXW795kPDcOl12wrXEZYkzRkDtKRDDQ0U+oSn6iPeX/Z8qH+CgwS0ryq2TqyBo39n4jC8dE3hsWVpzb+mJEmHwwAtLRb9+ydvlRh/4d3B+yY+RmNLIRB3dBWWWFv3sNEA3FEMyqVe4/bV0Oh/YiRJC49/u0nz1fBw4QK7kTA8TTgeODDxcdqWj84Kd50AG886NAyXXrcu88I6SdKiZ4CW6slg38RheKJl2fbvghw69BjRWBZ+18Dqh0zQOlG23dsxS5JUFQN0JW76NHz+Twr/93Vjc/Fx/POmCcbLtjdMtL2SY5Q9b2ieYt+ybQ2Nc/0nppLMwkVzY2aDJ2ud2DX57ZqblxbCbsdaWLEBHvjwcWG4a3S2eMlKL7CTJGkWGaArseZ4ePQrCxdWDQ0ULpgaeZzg+WDvFO8pO8ZEs4czIRomD/OVhvAJ39Nc3H+qfwRMd4wJ/jEx31oChgYLF9hN10dc+pnyArvirPADThtdkq08DJdee4GdJEl1wwBdiaNPL/zMtOGhiQP58ODk4Xx8CJ/oPcMVhPzS68E+6Ouu7Biz5ZBQPl3Ir2RGf5pjTPYPgaGBsvA7SSA+cB+Qh36PxpaxrRLrThnXR1wWjr3ATpKkecu/wedSQ2Php7ltriuZXua4YD9NyB+eJuRPOZM/xTH6uqc4Rtl7Z2p2v3X5aOvEmuPgQWdOPlPsBXaSJC0KBmhVJmJ0Vpd50E4wPFzBTPwE/whoaBoNx+1r5sc/biRJUk0ZoLUwNTRAQ6srTEiSpBnnpfqSJElSFQzQkiRJUhUM0JIkSVIVDNCSJElSFQzQkiRJUhUM0JIkSVIVDNCSJElSFQzQkiRJUhUM0JIkSVIVDNCSJElSFSIz57qGqkTETuCuOfjoNcCuOfhc1Za/58XB3/Pi4O954fN3vDjM5e/5QZnZNX5w3gXouRIRWzJz81zXodnl73lx8Pe8OPh7Xvj8HS8O9fh7toVDkiRJqoIBWpIkSaqCAbpyV811AaoJf8+Lg7/nxcHf88Ln73hxqLvfsz3QkiRJUhWcgZYkSZKqYICWJEmSqmCAliRJkqpggNaiFhFNZc87ImJzRKyay5okHb6IWOU5LGm2GaC1aEXEhcC9EXF7RDwVuAn4B+CnEfH8OS1OUsUiYkNEfLJ4p9ofAD+MiB3FsY1zXJ6kGRIRN891DSVN079FWrBeB5wAdAI/BX4nM38ZEeuArwGfmMviJFXsU8A7gRdk5hBARDQCzwU+Cfzu3JUmqRoR8ezJNgFH1bKWqbiMnRatiLgxM08vPv9NZh5dtu2mzDx1zoqTVLGI+EVmHlftNkn1JyIGgI8BEwXU52RmZ41LmpAz0FrMtkXE31OYgb4tIv4R+BzwROC3c1qZpGrcEBHvAz4C/Lo4dgxwAfCTOatK0uG4CXhHZv5s/IaIeOIc1DMhZ6C1aEXEMuBPKfwr9z3A7wMvBu4C/i4zDdHSPBARLcBLgHOBBxaH7wa+CHwwM/vmqjZJ1YmIxwB3Zea2CbZtzswtc1DWIQzQkiRJUhVchUOLVkQ0RsTLI+ItEXHmuG1/NVd1SapORLRHxOUR8ecR0RYRF0TEFyPi7RHRMdf1SapcRDQV/26+NiJuKv58NSJeERHNc11fiTPQWrQi4gNAO/BD4IXAf2bma4vbfpyZZ8xlfZIqExGfptD7vITCyjq3UliZ4xnAUZn5wjksT1IVIuITwB4K1zRsLw6vp3BNw6rMPG+OShvDAK1Fq3yljeINVd4HrAGeD3w/M39nLuuTVJnSijoRERQuAH5AZmbx9U9dUUeaPyLi9sw8vtpttWYLhxazltKTzBzMzIuBG4FvAv7fvtI8k4UZoX8vPpZeO0skzS/3RcRzI2Iko0ZEQ0ScB9w/h3WNYYDWYrYlIp5SPpCZfwt8GNg4JxVJOhxbSr3OmXlRaTAiHgJ0z1lVkg7H84DnMHqn4NuBe4FnF7fVBVs4JEkLVkRE+hedNC9FxGqAzNw917WM5wy0VCYirprrGiQdudK5bHiW5q/M3J2Zu+vx72YDtDTW5rkuQNKM8FyWFo66O58N0NJYO+a6AEkzwnNZWjjq7ny2B1qSJEmqgjPQ0gTqsd9KUvU8l6X5Zb7cJdgZaC1aEbFqsk0Ubr6wvpb1SDo8nsvSwjFf7hJsgNaiFRFDwF0U/pItyeLrB2Zmy4Q7SqornsvSwjFf7hLcNNcFSHPoTuAJmblt/IaI+PUc1CPp8HguSwvHmLsEAxdHxP+mzu4SbA+0FrN3Aisn2fb2GtYh6ci8E89laaGYF3cJtoVDkiRJqoItHFrUIuJE4FzggcWhu4EvZuatc1eVpGp5LksLx3w4n23h0KIVEX8BfJLChUY/LP4E8ImIeP1c1iapcp7L0sIxX85nWzi0aEXE7cApmTkwbrwF+HlmHjc3lUmqhueytHDMl/PZGWgtZsPA0ROMP6C4TdL84LksLRzz4ny2B1qL2aXANyLiF0BpqasNwLHAK+eqKElVuxTPZWmhuJR5cD7bwqFFLSIagEcy9kKFH2Xm0NxVJalansvSwjEfzmcDtFQmIi7OzKvmug5JR8ZzWVo46vF8tgdaGusVc12ApBnhuSwtHHV3PhugpbFirguQNCM8l6WFo+7OZ1s4pDIRsT4zt891HZKOjOeytHDU4/nsDLRUpnSCRsSL57oWSYfPc1laOOrxfHYGWppARGzLzA1zXYekI+O5LC0c9XQ+uw60Fq2IuGmyTcC6WtYi6fB5LksLx3w5nw3QWszWAb8P3D9uPIDv1r4cSYfJc1laOObF+WyA1mL2ZaAjM28cvyEirq95NZIOl+eytHDMi/PZHmhJkiSpCq7CIUmSJFXBAK1FKyJOjYjvR8SvI+KqiFhZtu2Hc1mbpMp5LksLx3w5nw3QWszeB7wJeBhwO/DfEfGQ4rbmuSpKUtU8l6WFY16cz15EqMWsMzOvLT5/R0TcAFwbES8EvDhAmj88l6WFY16czwZoLWoRsTwz9wJk5rci4g+BzwKr5rYySdXwXJYWjvlwPtvCocXsH4CTygcy8ybgCcDn5qQiSYfDc1laOObF+ewydpIkSVIVnIHWohURyyPibRFxW0TcFxG7I+LW4tiKua5PUmU8l6WFY76czwZoLWafpnCr0LMzc1VmrgYeVxz79JxWJqkansvSwjEvzmdbOLRoRcT/ZOYJ1W6TVF88l6WFY76cz85AazG7KyIuj4h1pYGIWBcRfwH8eg7rklQdz2Vp4ZgX57MBWovZecBq4D8j4v6IuA+4nsIyOX80l4VJqornsrRwzIvz2RYOLWoRcSKwHvh+ZvaUjT+lbCF3SXXOc1laOObD+ewMtBatiHg18AXglcDPIuLcss1vnZuqJFXLc1laOObL+eydCLWYvQx4eGb2RMRG4DMRsTEz/wWIuS1NUhU8l6WFY16czwZoLWYNpf9rKDO3RsTZFE7UB1FHJ6mkaXkuSwvHvDifbeHQYnZvRJxeelE8Yc8B1gAPm6uiJFXNc1laOObF+exFhFq0ImI9MJiZ90yw7czM/M4clCWpSp7L0sIxX85nA7QkSZJUBVs4JEmSpCoYoCVJkqQqGKAlSZKkKhigJWmeiIjrI6IvIrojYm9E3BkRH42Ih1dxjK0Rcf5s1ilJC50BWpLml7dkZmdmLgceB9wFfD8injXHdUnSomGAlqR5KjPvysy/Aq4G3h0Fr4mI24qz1Nsi4u8johEgIr4EbAA+EBE9EXFdcbwpIt4YEbdHxJ6I+E5EbJ67byZJ9c0ALUnz3yeBBwInANuBpwLLgHOBi4CXAmTmHwDbgJdmZkdmPrm4/5uL730KsBr4EHBtRKys5ZeQpPnCAC1J89/24uPqzPxsZv4qC34CfBR4wmQ7RkQArwb+PDPvzMyhzPwg8Fvg6bNeuSTNQ01zXYAk6YitLz7ujojnA68FHkzhv/EtwPen2HcN0AF8KSLK76zVXHZcSVIZA7QkzX/nAXcD+4F/A54NfDUz+yPiHUB5P/PwuH13Ffd7Ymb+qBbFStJ8ZwuHJM1TEXFMRLwZuBB4DYWZ5AZgJzAQEb8LvHDcbvcAx5VeZGYC/wK8IyKOKx63IyJ+PyKOnv1vIUnzTxT+2ylJqncRcT3wKKAfSGA38F3gXzLzh8X3/G/gVRRaN74FbAVOz8yzi9ufBrwbWAV8PzOfGhFNFPqgX0qhbWM/hbaPV2Vmqb9aklRkgJYkSZKqYAuHJEmSVAUDtCRJklQFA7QkSZJUBQO0JEmSVAUDtCRJklQFA7QkSZJUBQO0JEmSVAUDtCRJklQFA7QkSZJUhf8PPso4sBs0Q7AAAAAASUVORK5CYII=", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], - "source": [ - "get_daily_graph(click_share_direction_daily, \n", - " 'Date', 'Accepted Share (%)', **{'shrinkTicks':True, 'hue':'pyDirection'})" + "ih.plot.model_performance_trend(by=\"Channel\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "The above graph can help identify how things evolve as a whole. It helps identify when the share for one direction (or channel etc.) goes down, which channel takes over goes down, which other " - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "same graph can be done for pyName as well. However, since there are usually so many pyNames, it would be hard to follow up and identify which offer had the highest share over time, and when an offer's share drops, which other offer takes over. So instead of looking over time, the below graph calculates a delta between the share percentage across two time frames. This is significanlty helpful when things in the strategy changes (priotitization, eligibility etc.) it helps identify how the system reacts once there is a change introduced." + "# Propensity Distribution\n", + "\n", + "IH also contains information about the factors that determine the prioritization of the offers: lever values, propensities etc.\n", + "\n", + "Here we show the distribution of the propensities of the offers made. \n" ] }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "", - "text/plain": [ - "
" - ] - }, - "metadata": { - "needs_background": "light" - }, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ - "plot_share_delta_graph(df[df['pyChannel']=='SMS'].reset_index(drop=True), 'Clicked', 'pyName', dates=4)" + "import plotly.figure_factory as ff\n", + "\n", + "channels = [\n", + " c\n", + " for c in ih.data.select(pl.col.Channel.unique().sort())\n", + " .collect()[\"Channel\"]\n", + " .to_list()\n", + " if c is not None and c != \"\"\n", + " # if c == \"Web\"\n", + "]\n", + "\n", + "plot_data = [\n", + " ih.data.filter(pl.col.Channel == c)\n", + " .select([\"Propensity\"])\n", + " .collect()[\"Propensity\"]\n", + " .sample(fraction=0.1)\n", + " .to_list()\n", + " for c in channels\n", + "]\n", + "\n", + "fig = ff.create_distplot(plot_data, group_labels=channels, show_hist=False)\n", + "fig.update_layout(title=\"Propensity Distribution\")\n", + "fig" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ - "In the above graph, the clicked outcome shares for MasterCardGold has increased by 5% recently. The time range can be specified either by defining a lookback window (in that case only enter an integer) or by a list of two tuples where the first tuple represents the earlier time range and the second tuple represent the recent time range" + "# Response Analysis\n", + "\n", + "Time is one of the dimensions in IH. Here we take a look at how subsequent responses relate to the original decision. It shows, for example, how much time there typically is between the moment of decision and the click.\n", + "\n", + "This type of analysis is usually part of attribution analysis when considering conversion modeling.\n" ] }, { @@ -964,15 +240,41 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "import plotly.express as px\n", + "\n", + "outcomes = [\n", + " c\n", + " for c in ih.data.select(pl.col.Outcome.unique().sort())\n", + " .collect()[\"Outcome\"]\n", + " .to_list()\n", + " if c is not None and c != \"\"\n", + "]\n", + "plot_data=ih.data.filter(pl.col.OutcomeTime.is_not_null()).group_by(\"InteractionID\").agg(\n", + " [pl.col.OutcomeTime.min().alias(\"Decision_Time\")]+\n", + " [pl.col.OutcomeTime.filter(pl.col.Outcome == o).max().alias(o) for o in outcomes],\n", + ").collect().unpivot(\n", + " index=[\"InteractionID\", \"Decision_Time\"],\n", + " variable_name=\"Outcome\",\n", + " value_name=\"Time\",\n", + ").with_columns(\n", + " Duration = (pl.col.Time - pl.col.Decision_Time).dt.total_seconds()\n", + ").filter(pl.col.Duration > 0)\n", + "fig = px.box(\n", + " plot_data,\n", + " x=\"Duration\",\n", + " y=\"Outcome\",\n", + " color=\"Outcome\",\n", + " template=\"pega\"\n", + ")\n", + "fig" + ] } ], "metadata": { - "interpreter": { - "hash": "0c5c31b7614ab5f7bbff6555bdc6f3ec4cea8754d51936ee45052251e94c1071" - }, "kernelspec": { - "display_name": "Python 3.9.4 64-bit ('newvfenv': conda)", + "display_name": ".venv", + "language": "python", "name": "python3" }, "language_info": { @@ -984,8 +286,7 @@ "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.9.9" + "pygments_lexer": "ipython3" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 00f7d9d3..88a8519c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = ['polars==1.16', 'typing_extensions'] version = {attr="pdstools.__version__"} [project.optional-dependencies] -adm = ['plotly[express]>=6.0.0rc0', 'requests'] +adm = ['plotly', 'requests'] pega_io = ['aioboto3', 'polars_hash'] api = ['httpx', 'pydantic', 'anyio'] healthcheck = ['pdstools[adm]', 'great_tables>=0.13', 'quarto', 'papermill', 'xlsxwriter>=3.0', 'pydot'] diff --git a/python/pdstools/__init__.py b/python/pdstools/__init__.py index 7d1a9cbd..cc881f7d 100644 --- a/python/pdstools/__init__.py +++ b/python/pdstools/__init__.py @@ -1,12 +1,13 @@ """Pega Data Scientist Tools Python library""" -__version__ = "4.0.0" +__version__ = "4.0.1" from pathlib import Path from polars import enable_string_cache from .adm.ADMDatamart import ADMDatamart +from .ih.IH import IH from .infinity import Infinity from .pega_io import Anonymization, read_ds_export from .prediction.Prediction import Prediction @@ -23,6 +24,7 @@ __all__ = [ "ADMDatamart", + "IH", "Anonymization", "read_ds_export", "Prediction", diff --git a/python/pdstools/adm/ADMDatamart.py b/python/pdstools/adm/ADMDatamart.py index 9e5dcfbd..fc90fadf 100644 --- a/python/pdstools/adm/ADMDatamart.py +++ b/python/pdstools/adm/ADMDatamart.py @@ -120,7 +120,9 @@ def __init__( self.aggregates = Aggregates(datamart=self) self.agb = AGB(datamart=self) self.generate = Reports(datamart=self) - self.cdh_guidelines = CDHGuidelines() + self.cdh_guidelines = ( + CDHGuidelines() + ) # not sure if this should be part of the ADM DM self.model_data = self._validate_model_data( model_df, query=query, extract_pyname_keys=extract_pyname_keys @@ -313,6 +315,11 @@ def _validate_model_data( if "Treatment" in schema.names(): self.context_keys.append("Treatment") + # Model technique (NaiveBayes or GradientBoost) added in '24 (US-648869 and related) + if "ModelTechnique" not in schema.names(): + df = df.with_columns( + ModelTechnique=pl.lit(None), + ) self.context_keys = [k for k in self.context_keys if k in schema.names()] df = df.with_columns( @@ -402,7 +409,6 @@ def apply_predictor_categorization( categorization() if callable(categorization) else categorization ) - if df is not None: return df.with_columns(PredictorCategory=categorization_expr) diff --git a/python/pdstools/adm/Aggregates.py b/python/pdstools/adm/Aggregates.py index c5f0dc15..ef0b39a4 100644 --- a/python/pdstools/adm/Aggregates.py +++ b/python/pdstools/adm/Aggregates.py @@ -460,8 +460,7 @@ def name_normalizer(x): .agg( pl.col("SnapshotTime").min().cast(pl.Date).alias("DateRange Min"), pl.col("SnapshotTime").max().cast(pl.Date).alias("DateRange Max"), - pl.col("Positives").sum(), - pl.col("ResponseCount").sum(), + pl.sum(["Positives", "ResponseCount"]), (cdh_utils.weighted_performance_polars() * 100).alias("Performance"), pl.col("Configuration").cast(pl.Utf8), pl.col("Configuration") @@ -469,6 +468,12 @@ def name_normalizer(x): .str.to_uppercase() .is_in([x.upper() for x in self.cdh_guidelines.standard_configurations]) .alias("isNBADModelConfiguration"), + (pl.col("ModelTechnique") == "GradientBoost") + .any(ignore_nulls=False) + .alias("usesAGB"), + (pl.col("ModelTechnique") == "GradientBoost") + .all(ignore_nulls=False) + .alias("usesAGBOnly"), actionIdentifierExpr.drop_nulls() .n_unique() .alias("Total Number of Actions"), @@ -561,7 +566,70 @@ def name_normalizer(x): ) ) - def predictor_last_snapshot(self) -> Optional[pl.DataFrame]: + def summary_by_configuration(self) -> pl.DataFrame: + """ + Generates a summary of the ADM model configurations. + + Returns + ------- + pl.DataFrame + A Polars DataFrame containing the configuration summary. + """ + + action_dim_agg = [pl.col("Name").n_unique().alias("Actions")] + if "Treatment" in self.datamart.context_keys: + action_dim_agg += [ + pl.col("Treatment").n_unique().alias("Unique Treatments") + ] + else: + action_dim_agg += [pl.lit(0).alias("Unique Treatments")] + + if "Issue" in self.datamart.context_keys: + action_dim_agg += [ + pl.col("Issue").cast(pl.String).unique().alias("Used for (Issues)") + ] + + group_by_cols = ["Configuration"] + [ + c for c in ["Channel", "Direction"] if c in self.datamart.context_keys + ] + configuration_summary = ( + self.last(table="model_data") + .group_by(group_by_cols) + .agg( + [ + pl.when((pl.col("ModelTechnique") == "GradientBoost").any()) + .then(pl.lit("Yes")) + .when(pl.col("ModelTechnique").is_null().any()) + .then(pl.lit("Unknown")) + .otherwise(pl.lit("No")) + .alias("AGB") + ] + + [ + pl.col("ModelID").n_unique(), + ] + + action_dim_agg + + [pl.sum(["ResponseCount", "Positives"])], + ) + .with_columns( + [ + # pl.col("Configuration") + # .is_in(standardNBADNames.keys()) + # .alias("Standard in NBAD Framework"), + (pl.col("ModelID") / pl.col("Actions")) + .round(2) + .alias("ModelsPerAction"), + ] + ) + .sort(group_by_cols) + ) + if "Issue" in self.datamart.context_keys: + configuration_summary = configuration_summary.with_columns( + pl.col("Used for (Issues)").list.unique().list.sort().list.join(", ") + ) + + return configuration_summary + + def predictors_overview(self) -> Optional[pl.DataFrame]: """ Generate a summary of the last snapshot of predictor data. @@ -580,7 +648,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]: predictor_summary = ( self.last(table="predictor_data") - .filter(pl.col("PredictorName") != "Classifier") + .filter(pl.col("PredictorName") != "Classifier") # TODO not name, there is a type .join( self.last(table="model_data") .select(["ModelID"] + model_identifiers) @@ -624,7 +692,7 @@ def predictor_last_snapshot(self) -> Optional[pl.DataFrame]: ) return predictor_summary - except ValueError: + except ValueError: # really? swallowing? return None def overall_summary( @@ -730,6 +798,8 @@ def overall_summary( # TODO there was something about OmniAdaptiveModel here - but I don't recall what was the issue pl.col("usesNBAD").any(), pl.col("usesNBADOnly").all(), + pl.col("usesAGB").any(), + pl.col("usesAGBOnly").all(), # pl.lit(usesNBAD).alias("usesNBAD"), # ((pl.len() > 0) & pl.lit(usesNBAD and usesNBADOnly)).alias( # "usesNBADOnly" diff --git a/python/pdstools/adm/Reports.py b/python/pdstools/adm/Reports.py index 4fa25f5c..b3626cef 100644 --- a/python/pdstools/adm/Reports.py +++ b/python/pdstools/adm/Reports.py @@ -565,8 +565,8 @@ def excel_report( } if self.datamart.predictor_data is not None: - tabs["predictor_last_snapshot"] = ( - self.datamart.aggregates.predictor_last_snapshot() + tabs["predictors_overview"] = ( + self.datamart.aggregates.predictors_overview() ) if predictor_binning and self.datamart.predictor_data is not None: diff --git a/python/pdstools/app/decision_analyzer/pages/10_Business_Value_Analysis.py b/python/pdstools/app/decision_analyzer/pages/10_Business_Value_Analysis.py index 35a85c72..d5f56ecd 100644 --- a/python/pdstools/app/decision_analyzer/pages/10_Business_Value_Analysis.py +++ b/python/pdstools/app/decision_analyzer/pages/10_Business_Value_Analysis.py @@ -1,52 +1,61 @@ -import polars as pl import streamlit as st -from da_streamlit_utils import get_current_scope_index, st_value_distribution -from utils import NBADScope_Mapping, ensure_data - -# TODO Finish up to show effect on proposition distribution (side to side) - -"# Business Value Analysis" - -""" -A closer look at the values associated with actions. - -* Is my value distribution very skewed? Are there actions with significantly different values than the others? -* What's the range of the values? - -""" -ensure_data() st.warning( - "Current sample data action values are artificial so the analysis is just an example." -) - -st.session_state["sidebar"] = st.sidebar - -scope_options = st.session_state.decision_data.getPossibleScopeValues() -if "scope" not in st.session_state: - st.session_state.scope = scope_options[0] - -valueData = st.session_state.decision_data.getValueDistributionData() - -with st.container(border=True): - st.plotly_chart( - st_value_distribution(valueData, st.session_state.scope), - use_container_width=True, - ) - - scope_index = get_current_scope_index(scope_options) - st.selectbox( - "Granularity:", - options=scope_options, - format_func=lambda option: NBADScope_Mapping[option], - index=scope_index, - key="scope", - ) - -"Actions having different values:" - -st.dataframe( - valueData.filter(pl.col("Value_min") != pl.col("Value_max")).collect(), - hide_index=True, - column_config=NBADScope_Mapping, + "In maintenance!!, please see: https://streamlit-dev.dsmcloud.io/Business%20Value%20Analysis for the older version. If the link doesn't work, contact Yusuf Uyanik." ) +# import polars as pl +# import streamlit as st + +# from da_streamlit_utils import ( +# get_current_scope_index, +# st_value_distribution, +# ensure_data, +# ) +# from pdstools.decision_analyzer.utils import NBADScope_Mapping + +# # TODO Finish up to show effect on proposition distribution (side to side) + +# "# Business Value Analysis" + +# """ +# A closer look at the values associated with actions. + +# * Is my value distribution very skewed? Are there actions with significantly different values than the others? +# * What's the range of the values? + +# """ +# ensure_data() +# st.warning( +# "Current sample data action values are artificial so the analysis is just an example." +# ) + +# st.session_state["sidebar"] = st.sidebar + +# scope_options = st.session_state.decision_data.getPossibleScopeValues() +# if "scope" not in st.session_state: +# st.session_state.scope = scope_options[0] + +# valueData = st.session_state.decision_data.getValueDistributionData() + +# with st.container(border=True): +# st.plotly_chart( +# st_value_distribution(valueData, st.session_state.scope), +# use_container_width=True, +# ) + +# scope_index = get_current_scope_index(scope_options) +# st.selectbox( +# "Granularity:", +# options=scope_options, +# format_func=lambda option: NBADScope_Mapping[option], +# index=scope_index, +# key="scope", +# ) + +# "Actions having different values:" + +# st.dataframe( +# valueData.filter(pl.col("Value_min") != pl.col("Value_max")).collect(), +# hide_index=True, +# column_config=NBADScope_Mapping, +# ) diff --git a/python/pdstools/app/decision_analyzer/pages/11_Business_Lever_Analysis.py b/python/pdstools/app/decision_analyzer/pages/11_Business_Lever_Analysis.py index b9523beb..3020e087 100644 --- a/python/pdstools/app/decision_analyzer/pages/11_Business_Lever_Analysis.py +++ b/python/pdstools/app/decision_analyzer/pages/11_Business_Lever_Analysis.py @@ -3,7 +3,10 @@ import polars as pl import streamlit as st -from utils import ensure_data, find_lever_value +from da_streamlit_utils import ( + ensure_data, +) +from pdstools.decision_analyzer.utils import find_lever_value # TODO not so sure what to do with this tool - maybe generalize to work across a selection not just a single action and figure out a multiplier # TODO but do show the effect of levering right away (distributions side to side) just like we should do in the thresholding analysis (share code) diff --git a/python/pdstools/app/decision_analyzer/pages/4_Action_Funnel.py b/python/pdstools/app/decision_analyzer/pages/4_Action_Funnel.py index 93fde563..92730742 100644 --- a/python/pdstools/app/decision_analyzer/pages/4_Action_Funnel.py +++ b/python/pdstools/app/decision_analyzer/pages/4_Action_Funnel.py @@ -36,7 +36,6 @@ st.session_state["sidebar"] = st.sidebar if "local_filters" in st.session_state: del st.session_state["local_filters"] - with st.session_state["sidebar"]: scope_options = st.session_state.decision_data.getPossibleScopeValues() stage_options = st.session_state.decision_data.getPossibleStageValues() diff --git a/python/pdstools/app/decision_analyzer/pages/8_Offer_Quality_Analysis.py b/python/pdstools/app/decision_analyzer/pages/8_Offer_Quality_Analysis.py index 07db5069..5d37cb50 100644 --- a/python/pdstools/app/decision_analyzer/pages/8_Offer_Quality_Analysis.py +++ b/python/pdstools/app/decision_analyzer/pages/8_Offer_Quality_Analysis.py @@ -1,13 +1,13 @@ import streamlit as st -from plots import getTrendChart, offer_quality_piecharts +from pdstools.decision_analyzer.plots import getTrendChart, offer_quality_piecharts from da_streamlit_utils import ( get_current_scope_index, get_current_stage_index, + ensure_data, ) -from utils import ( +from pdstools.decision_analyzer.utils import ( NBADScope_Mapping, - ensure_data, filtered_action_counts, ) diff --git a/python/pdstools/app/decision_analyzer/pages/9_Thresholding_Analysis.py b/python/pdstools/app/decision_analyzer/pages/9_Thresholding_Analysis.py index 6bcd938b..edf87779 100644 --- a/python/pdstools/app/decision_analyzer/pages/9_Thresholding_Analysis.py +++ b/python/pdstools/app/decision_analyzer/pages/9_Thresholding_Analysis.py @@ -2,8 +2,7 @@ import polars as pl import streamlit as st -from plots import distribution, threshold_deciles -from utils import ensure_data +from da_streamlit_utils import ensure_data # TODO Interactive Thresholding isn't working properly yet. Also show the total numbers. # TODO Instead of priority/propensity side to side have a drop-down to select which property to show @@ -80,7 +79,9 @@ # st.dataframe(plotData) st.plotly_chart( - threshold_deciles(threshold_deciles_data, thresholding_mapping[thresholding_on]), + st.session_state.decision_data.plot.threshold_deciles( + thresholding_on, thresholding_mapping[thresholding_on] + ), use_container_width=True, ) @@ -98,13 +99,11 @@ ), # Hmm, probalby not the right way # additional_filters=((pl.col(thresholding_on).list.eval(pl.element() > current_threshold)).list.any()), ) -# st.write(xxx.head().collect()) st.write( - distribution( + st.session_state.decision_data.plot.distribution( xxx, scope="pyIssue", breakdown="pyGroup", - title="Effect of Thresholding", horizontal=True, ) ) diff --git a/python/pdstools/decision_analyzer/decision_data.py b/python/pdstools/decision_analyzer/decision_data.py index 71b1a6b9..1664728a 100644 --- a/python/pdstools/decision_analyzer/decision_data.py +++ b/python/pdstools/decision_analyzer/decision_data.py @@ -81,7 +81,7 @@ def __init__(self, raw_data: pl.LazyFrame): "pyIssue", "pyGroup", "pyName", - # "pyTreatment", # should be in there dependent on what's in the data + "pyTreatment", # should be in there dependent on what's in the data "pyChannel", "pyDirection", "pxComponentName", diff --git a/python/pdstools/decision_analyzer/plots.py b/python/pdstools/decision_analyzer/plots.py index 0ba0d41f..50e5c856 100644 --- a/python/pdstools/decision_analyzer/plots.py +++ b/python/pdstools/decision_analyzer/plots.py @@ -13,8 +13,8 @@ class Plot: def __init__(self, decision_data): self._decision_data = decision_data - def threshold_deciles(self, thresholding_name, return_df=False): - df = self._decision_data.whatever_preprocessing + def threshold_deciles(self, thresholding_on, thresholding_name, return_df=False): + df = self._decision_data.getThresholdingData(thresholding_on) if return_df: return df @@ -590,12 +590,17 @@ def offer_quality_piecharts( "only_irrelevant_actions", "has_no_offers", ] - df = ( + all_frames = ( df.group_by("pxEngagementStage") .agg(pl.sum(value_finder_names)) .collect() .partition_by("pxEngagementStage", as_dict=True) ) + # TODO Temporary solution to fit the pie charts into the screen, pick only first 5 stages + df = {} + NBADStages_FilterView = NBADStages_FilterView[:5] + for stage in NBADStages_FilterView[:5]: + df[(stage,)] = all_frames[(stage,)] if return_df: return df @@ -608,7 +613,7 @@ def offer_quality_piecharts( ) for i, stage in enumerate(NBADStages_FilterView): - plotdf = df[stage].drop("pxEngagementStage") + plotdf = df[(stage,)].drop("pxEngagementStage") fig.add_trace( go.Pie( values=list(plotdf.to_numpy())[0], diff --git a/python/pdstools/decision_analyzer/table_definition.py b/python/pdstools/decision_analyzer/table_definition.py index 57ae6bbb..bd5f30a3 100644 --- a/python/pdstools/decision_analyzer/table_definition.py +++ b/python/pdstools/decision_analyzer/table_definition.py @@ -345,10 +345,6 @@ class TableConfig(TypedDict): "Arbitration.TopSelection", "TreatmentPlacements", "Channels.ExtensionPoint", - "Channels.ExtensionPoint", - "Channels.ExtensionPoint", - "Channels.ExtensionPoint", - "Channels.ExtensionPoint", "ContactPolicies.ChannelLimits", "ContactPolicies.ExtensionPoint", "FinalLimitsAndBundlingPostExtensionPoint", diff --git a/python/pdstools/ih/Aggregates.py b/python/pdstools/ih/Aggregates.py index d4c9a8dd..17b25a09 100644 --- a/python/pdstools/ih/Aggregates.py +++ b/python/pdstools/ih/Aggregates.py @@ -1,11 +1,184 @@ -from typing import TYPE_CHECKING +from datetime import timedelta +from typing import TYPE_CHECKING, List, Optional, Union +import polars as pl from ..utils.namespaces import LazyNamespace +from ..utils import cdh_utils +from ..utils.types import QUERY if TYPE_CHECKING: from .IH import IH as IH_Class class Aggregates(LazyNamespace): + def __init__(self, ih: "IH_Class"): + super().__init__() self.ih = ih + + def _summary_interactions( + self, + by: Optional[Union[str, List[str]]] = None, + every: Optional[Union[str, timedelta]] = None, + query: Optional[QUERY] = None, + ) -> pl.LazyFrame: + if every is not None: + source = self.ih.data.with_columns(pl.col.OutcomeTime.dt.truncate(every)) + else: + source = self.ih.data + + group_by_clause = cdh_utils.safe_flatten_list( + [by] + (["OutcomeTime"] if every is not None else []) + ) + + interactions = ( + cdh_utils._apply_query(source, query) + .group_by( + (group_by_clause + ["InteractionID"]) + if group_by_clause is not None + else ["InteractionID"] + ) + .agg( + # Take only one outcome per interaction. TODO should perhaps be the last one. + [ + pl.when( + pl.col.Outcome.is_in( + self.ih.positive_outcome_labels[metric] + ).any() + ) + .then(pl.lit(True)) + .when( + pl.col.Outcome.is_in( + self.ih.negative_outcome_labels[metric] + ).any() + ) + .then(pl.lit(False)) + .alias(f"Interaction_Outcome_{metric}") + for metric in self.ih.positive_outcome_labels.keys() + ], + Propensity=pl.col.Propensity.last(), + Outcomes=pl.col.Outcome.unique().sort(), # for debugging + ) + ) + return interactions + + def summary_success_rates( + self, + by: Optional[Union[str, List[str]]] = None, + every: Optional[Union[str, timedelta]] = None, + query: Optional[QUERY] = None, + ) -> pl.LazyFrame: + """Groups the IH data summarizing into success rates (SuccessRate) and standard error (StdErr). + + It optionally groups by one or more dimensions (e.g. Experiment, Channel, Issue etc). When + given, the 'every' argument is used to divide the timerange into buckets. It uses the same string + language as Polars. + + Every interaction is considered to have only one outcome: positive, negative or none. When any + outcome in the interaction is in the positive labels, the outcome is considered positive. Next, + when any is in the negative labels, the outcome of the interaction is considered negative. Otherwise + there is no defined outcome and the interaction is ignored in calculations of success rate or error. + + Parameters + ---------- + by : Optional[Union[str, List[str]]], optional + Grouping keys, by default None + every : Optional[str], optional + Every interval start and period length, by default None + + Returns + ------- + pl.LazyFrame + A polars frame with the grouping keys and columns for the total number of Positives, Negatives, + number of Interactions, success rate (SuccessRate) and standard error (StdErr). + """ + + group_by_clause = cdh_utils.safe_flatten_list( + [by] + (["OutcomeTime"] if every is not None else []) + ) + + summary = ( + self._summary_interactions(by, every, query) + .group_by(group_by_clause) + .agg( + [ + pl.col(f"Interaction_Outcome_{metric}") + .filter(pl.col(f"Interaction_Outcome_{metric}")) + .len() + .alias(f"Positives_{metric}") + for metric in self.ih.positive_outcome_labels.keys() + ] + + [ + pl.col(f"Interaction_Outcome_{metric}") + .filter(pl.col(f"Interaction_Outcome_{metric}").not_()) + .len() + .alias(f"Negatives_{metric}") + for metric in self.ih.positive_outcome_labels.keys() + ], + Interactions=pl.len(), + Outcomes=pl.col.Outcomes.list.explode() + .unique() + .sort() + .drop_nulls(), # for debugging + ) + .with_columns( + [ + ( + pl.col(f"Positives_{metric}") + / ( + pl.col(f"Positives_{metric}") + + pl.col(f"Negatives_{metric}") + ) + ).alias(f"SuccessRate_{metric}") + for metric in self.ih.positive_outcome_labels.keys() + ] + ) + .with_columns( + [ + ( + ( + pl.col(f"SuccessRate_{metric}") + * (1 - pl.col(f"SuccessRate_{metric}")) + ) + / ( + pl.col(f"Positives_{metric}") + + pl.col(f"Negatives_{metric}") + ) + ) + .sqrt() + .alias(f"StdErr_{metric}") + for metric in self.ih.positive_outcome_labels.keys() + ] + ) + ) + + if group_by_clause is None: + summary = summary.drop("literal") # created by empty group_by + else: + summary = summary.sort(group_by_clause) + + return summary + + def summary_outcomes( + self, + by: Optional[Union[str, List[str]]] = None, + every: Optional[Union[str, timedelta]] = None, + query: Optional[QUERY] = None, + ): + + if every is not None: + source = self.ih.data.with_columns(pl.col.OutcomeTime.dt.truncate(every)) + else: + source = self.ih.data + + group_by_clause = cdh_utils.safe_flatten_list( + ["Outcome"] + [by] + (["OutcomeTime"] if every is not None else []) + ) + + summary = ( + cdh_utils._apply_query(source, query) + .group_by(group_by_clause) + .agg(Count=pl.len()) + ).sort(cdh_utils.safe_flatten_list(["Count"]+group_by_clause)) + + return summary diff --git a/python/pdstools/ih/IH.py b/python/pdstools/ih/IH.py index d25b32db..0ded1472 100644 --- a/python/pdstools/ih/IH.py +++ b/python/pdstools/ih/IH.py @@ -1,14 +1,139 @@ +import datetime +import os +import random +from typing import Dict, List, Optional, Union import polars as pl + from .Aggregates import Aggregates from .Plots import Plots +from ..utils.cdh_utils import _polars_capitalize, _apply_query +from ..utils.types import QUERY +from ..pega_io.File import read_ds_export class IH: data: pl.LazyFrame + positive_outcome_labels: Dict[str, List[str]] def __init__(self, data: pl.LazyFrame): - self.data = data + self.data = _polars_capitalize(data) self.aggregates = Aggregates(ih=self) - self.plots = Plots(ih=self) + self.plot = Plots(ih=self) + self.positive_outcome_labels = { + "Engagement": ["Accepted", "Accept", "Clicked", "Click"], + "Conversion": ["Conversion"], + } + self.negative_outcome_labels = { + "Engagement": [ + "Impression", + "Impressed", + "Pending", + "NoResponse", + ], + "Conversion": ["Impression", "Pending"], + } + + @classmethod + def from_ds_export( + cls, + ih_filename: Union[os.PathLike, str], + query: Optional[QUERY] = None, + ): + """Import from a Pega Dataset Export""" + + data = read_ds_export(ih_filename).with_columns( + # TODO this should come from some polars func in utils + pl.col("pxOutcomeTime").str.strptime(pl.Datetime, "%Y%m%dT%H%M%S%.3f %Z") + ) + if query is not None: + data = _apply_query(data, query=query) + + return IH(data) + + @classmethod + def from_mock_data(cls, days=90, n=100000): + """Generate sample data""" + accept_rate = 0.2 + accept_avg_duration_minutes = 10 + convert_over_accept_rate_test = 0.5 + convert_over_accept_rate_control = 0.3 + convert_avg_duration_days = 2 + + now = datetime.datetime.now() + + # TODO maybe this should be changed in PDS tools - w/o __TimeStamp__ flag + # def to_prpc_time_str(__TimeStamp__): + # return to_prpc_date_time(__TimeStamp__)[0:15] + + ih_fake_impressions = pl.DataFrame( + { + "pxInteractionID": [str(int(1e9 + i)) for i in range(n)], + "pyChannel": random.choices(["Web", "Email"], k=n), + "pyIssue": "Acquisition", + "pyGroup": "Phones", + "pyName": "AppleIPhone1564GB", + "ExperimentGroup": ["Conversion-Test", "Conversion-Control"] + * int(n / 2), + "pxOutcomeTime": [ + (now - datetime.timedelta(days=i * days / n)) for i in range(n) + ], + "__AcceptDurationMinutes__": [ + random.uniform(0, 2 * accept_avg_duration_minutes) for i in range(n) + ], + "__ConvertDurationDays__": [ + random.uniform(0, 2 * convert_avg_duration_days) for i in range(n) + ], + } + ).with_columns( + pyOutcome=pl.when(pl.col.pyChannel == "Web") + .then(pl.lit("Impression")) + .otherwise(pl.lit("Pending")) + ) + ih_fake_accepts = ih_fake_impressions.sample(fraction=accept_rate).with_columns( + pl.col.pxOutcomeTime + + pl.duration(minutes=pl.col("__AcceptDurationMinutes__")), + pyOutcome=pl.when(pl.col.pyChannel == "Web") + .then(pl.lit("Clicked")) + .otherwise(pl.lit("Accepted")), + ) + ih_fake_converts_test = ( + ih_fake_accepts.filter(pl.col.ExperimentGroup == "Conversion-Test") + .sample(fraction=convert_over_accept_rate_test) + .with_columns( + pl.col.pxOutcomeTime + + pl.duration(days=pl.col("__ConvertDurationDays__")), + pyOutcome=pl.lit("Conversion"), + ) + ) + ih_fake_converts_control = ( + ih_fake_accepts.filter(pl.col.ExperimentGroup == "Conversion-Control") + .sample(fraction=convert_over_accept_rate_control) + .with_columns( + pl.col.pxOutcomeTime + + pl.duration(days=pl.col("__ConvertDurationDays__")), + pyOutcome=pl.lit("Conversion"), + ) + ) + + ih_data = ( + pl.concat( + [ + ih_fake_impressions, + ih_fake_accepts, + ih_fake_converts_test, + ih_fake_converts_control, + ] + ) + .filter(pl.col("pxOutcomeTime") < pl.lit(now)) + .drop( + [ + "__AcceptDurationMinutes__", + "__ConvertDurationDays__", + ] + ) + .sort("pxInteractionID", "pxOutcomeTime") + ) + + return IH(ih_data.lazy()) diff --git a/python/pdstools/ih/Plots.py b/python/pdstools/ih/Plots.py index 63f8924b..7db952ec 100644 --- a/python/pdstools/ih/Plots.py +++ b/python/pdstools/ih/Plots.py @@ -1,5 +1,13 @@ -from typing import TYPE_CHECKING +from datetime import timedelta +from typing import TYPE_CHECKING, Dict, List, Optional, Union +import polars as pl +import plotly as plotly +import plotly.express as px +import plotly.graph_objs as go +from plotly.subplots import make_subplots +from ..utils.types import QUERY +from ..utils import cdh_utils from ..utils.namespaces import LazyNamespace if TYPE_CHECKING: @@ -8,4 +16,337 @@ class Plots(LazyNamespace): def __init__(self, ih: "IH_Class"): + super().__init__() self.ih = ih + + def overall_gauges( + self, + condition: Union[str, pl.Expr], + metric: Optional[str] = "Engagement", + by: Optional[str] = "Channel", + reference_values: Optional[Dict[str, float]] = None, + title: Optional[str] = None, + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + plot_data = self.ih.aggregates.summary_success_rates( + by=[condition, by], query=query + ) + + if return_df: + return plot_data + + if title is None: + title = f"{metric} Overall Rates" + + plot_data = plot_data.collect() + + cols = plot_data[by].unique().shape[0] # TODO can be None + rows = ( + plot_data[condition].unique().shape[0] + ) # TODO generalize to support pl expression + + fig = make_subplots( + rows=rows, + cols=cols, + specs=[[{"type": "indicator"} for c in range(cols)] for t in range(rows)], + ) + fig.update_layout( + height=270 * rows, + autosize=True, + title=title, + margin=dict(b=10, t=120, l=10, r=10), + ) + index = 0 + for row in plot_data.iter_rows(named=True): + ref_value = ( + reference_values.get(row[by], None) if reference_values else None + ) + gauge = { + "axis": {"tickformat": ",.2%"}, + "threshold": { + "line": {"color": "red", "width": 2}, + "thickness": 0.75, + "value": ref_value, + }, + } + if ref_value: + if row[f"SuccessRate_{metric}"] < ref_value: + gauge = { + "axis": {"tickformat": ",.2%"}, + "bar": { + "color": ( + "#EC5300" + if row[f"SuccessRate_{metric}"] < (0.75 * ref_value) + else "#EC9B00" + ) + }, + "threshold": { + "line": {"color": "red", "width": 2}, + "thickness": 0.75, + "value": ref_value, + }, + } + + trace1 = go.Indicator( + mode="gauge+number+delta", + number={"valueformat": ",.2%"}, + value=row[f"SuccessRate_{metric}"], + delta={"reference": ref_value, "valueformat": ",.2%"}, + title={"text": f"{row[by]}: {row[condition]}"}, + gauge=gauge, + ) + r, c = divmod(index, cols) + fig.add_trace(trace1, row=(r + 1), col=(c + 1)) + index = index + 1 + + return fig + + def response_count_tree_map( + self, + by: Optional[List[str]] = None, + title: Optional[str] = None, + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + + if by is None: + by = [ + f + for f in ["Direction", "Channel", "Issue", "Group", "Name"] + if f in self.ih.data.collect_schema().names() + ] + elif isinstance(by, str): + by = [by] + + plot_data = self.ih.aggregates.summary_outcomes( + by=by, + query=query, + ) + if return_df: + return plot_data + + fig = px.treemap( + plot_data.collect(), + path=[px.Constant("ALL")] + ["Outcome"] + by, + values="Count", + color="Count", + branchvalues="total", + # color_continuous_scale=px.colors.sequential.RdBu_r, + title=title, + height=640, + template="pega", + ) + fig.update_coloraxes(showscale=False) + fig.update_traces(textinfo="label+value+percent parent") + fig.update_layout(margin=dict(t=50, l=25, r=25, b=25)) + + return fig + + def success_rates_tree_map( + self, + metric: Optional[str] = "Engagement", + by: Optional[List[str]] = None, + title: Optional[str] = None, + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + if by is None: + by = [ + f + for f in ["Direction", "Channel", "Issue", "Group", "Name"] + if f in self.ih.data.collect_schema().names() + ] + + plot_data = self.ih.aggregates.summary_success_rates(by=by, query=query) + + if return_df: + return plot_data + + if title is None: + title = f"{metric} Rates for All Actions" + + plot_data = plot_data.collect().with_columns( + CTR_DisplayValue=pl.col(f"SuccessRate_{metric}").round(3), + ) + + fig = px.treemap( + plot_data, + path=[px.Constant("ALL")] + by, + values="CTR_DisplayValue", + color="CTR_DisplayValue", + color_continuous_scale=px.colors.sequential.RdBu, + title=title, + hover_data=[ + f"StdErr_{metric}", + f"Positives_{metric}", + f"Negatives_{metric}", + ], + height=640, + template="pega", + ) + fig.update_coloraxes(showscale=False) + fig.update_traces(textinfo="label+value") + fig.update_layout(margin=dict(t=50, l=25, r=25, b=25)) + + return fig + + def action_distribution( + self, + # TODO change - one is the by, when multiple join together + # other is the facet dimension/condition + by: Optional[str] = "Name", + title: Optional[str] = "Action Distribution", + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + plot_data = self.ih.aggregates.summary_outcomes(by=by, query=query) + + if return_df: + return plot_data + + fig = px.bar( + plot_data.collect(), + x="Count", + y="Name", + template="pega", + title=title, + ) + return fig + + def success_rates_trend_bar( + self, + condition: Union[str, pl.Expr], + metric: Optional[str] = "Engagement", + every: Union[str, timedelta] = "1d", + by: Optional[str] = None, + title: Optional[str] = None, + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + + plot_data = self.ih.aggregates.summary_success_rates( + every=every, + by=[condition] + [by], # TODO generalize to support pl expression + query=query, + ) + + if return_df: + return plot_data + + if title is None: + title = f"{metric} Rates over Time" + + fig = px.bar( + plot_data.collect(), + x="OutcomeTime", + y=f"SuccessRate_{metric}", + color=condition, + error_y=f"StdErr_{metric}", + facet_row=by, + barmode="group", + custom_data=[condition], + template="pega", + title=title, + ) + fig.update_yaxes(tickformat=",.3%").update_layout(xaxis_title=None) + return fig + + def success_rates_trend( + self, + metric: Optional[str] = "Engagement", + every: Union[str, timedelta] = "1d", + by: Optional[str] = None, + title: Optional[str] = None, + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + plot_data = self.ih.aggregates.summary_success_rates( + every=every, by=by, query=query + ) + + if return_df: + return plot_data + + fig = px.line( + plot_data.collect(), + x="OutcomeTime", + y=f"SuccessRate_{metric}", + color=by, + facet_row=by, + # custom_data=[experiment_field] if experiment_field is not None else None, + template="pega", + title=title, + ) + + fig.update_yaxes(tickformat=",.3%").update_layout(xaxis_title=None) + return fig + + def response_counts( + self, + every: Union[str, timedelta] = "1d", + by: Optional[str] = None, + title: Optional[str] = "Responses", + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + plot_data = self.ih.aggregates.ih.aggregates.summary_outcomes( + every=every, by=by, query=query + ).collect() + + if return_df: + return plot_data.lazy() + + fig = px.bar( + plot_data, + x="OutcomeTime", + y="Count", + color="Outcome", + template="pega", + title=title, + facet_row=by, + ) + fig.update_layout(xaxis_title=None) + + return fig + + def model_performance_trend( + self, + metric: Optional[str] = "Engagement", + every: Union[str, timedelta] = "1d", + by: Optional[str] = None, + title: Optional[str] = "Model Performance over Time", + query: Optional[QUERY] = None, + return_df: Optional[bool] = False, + ): + + group_by_clause = cdh_utils.safe_flatten_list([by] + ["OutcomeTime"]) + plot_data = ( + self.ih.aggregates._summary_interactions(every=every, by=by, query=query) + .filter( + pl.col.Propensity.is_not_null() + & pl.col(f"Interaction_Outcome_{metric}").is_not_null() + ) + .group_by(group_by_clause) + .agg( + pl.map_groups( + exprs=[f"Interaction_Outcome_{metric}", "Propensity"], + function=lambda data: cdh_utils.auc_from_probs(data[0], data[1]), + return_dtype=pl.Float64, + ).alias("Performance") + ) + .sort(["OutcomeTime"]) + ) + + if return_df: + return plot_data + + fig = px.line( + plot_data.collect(), + y="Performance", + x="OutcomeTime", + color=by, + template="pega", + title=title, + ) + return fig diff --git a/python/pdstools/ih/__init__.py b/python/pdstools/ih/__init__.py index e69de29b..9d3c5be9 100644 --- a/python/pdstools/ih/__init__.py +++ b/python/pdstools/ih/__init__.py @@ -0,0 +1,3 @@ +from .IH import IH + +__all__ = ["IH"] \ No newline at end of file diff --git a/python/pdstools/infinity/internal/_base_client.py b/python/pdstools/infinity/internal/_base_client.py index 47e3aa25..bb5c066e 100644 --- a/python/pdstools/infinity/internal/_base_client.py +++ b/python/pdstools/infinity/internal/_base_client.py @@ -71,7 +71,7 @@ def __init__( auth: Union[httpx.Auth, PegaOAuth], verify: bool = False, pega_version: Union[str, None] = None, - timeout: float = 20, + timeout: float = 90, ): self._base_url = self._enforce_trailing_slash(httpx.URL(base_url)) self.auth = auth @@ -178,7 +178,7 @@ def __init__( auth: Union[httpx.Auth, PegaOAuth], verify: bool = False, pega_version: Union[str, None] = None, - timeout: float = 20, + timeout: float = 90, ): super().__init__( base_url=base_url, auth=auth, verify=verify, pega_version=pega_version diff --git a/python/pdstools/reports/HealthCheck.qmd b/python/pdstools/reports/HealthCheck.qmd index 5c29400b..d817674a 100644 --- a/python/pdstools/reports/HealthCheck.qmd +++ b/python/pdstools/reports/HealthCheck.qmd @@ -73,22 +73,6 @@ def fig_set_xaxis_modelperformance(fig, label="Model Performance"): .update_xaxes(title=label, showticklabels=True, visible=True) ) return fig - - -# def highlight_non_standard_channels(v): -# if v not in set(standardNBADNames.values()): -# color = "orange" -# else: -# color = "" -# return "background-color: %s" % color - -# def highlight_non_standard_configurations(v): -# if v not in set(standardNBADNames.keys()): -# color = "orange" -# else: -# color = "" -# return "background-color: %s" % color - ``` ```{python} @@ -100,7 +84,9 @@ def fig_set_xaxis_modelperformance(fig, label="Model Performance"): title = "ADM Model Overview" subtitle = "Sample data" -# Insert the paths to your data files here to run the notebook from your IDE +# Insert the paths to your data files here to run the notebook from your IDE. +# Edit the _quarto.yml to enable/disable specific sections of the quarto output. +# Parameters will be overriden by quarto when a parameters yaml is provided model_file_path = None prediction_file_path = None @@ -229,7 +215,7 @@ except Exception as e: The [Plotly](https://plotly.com/python/) charts have [user controls for panning, zooming etc](https://plotly.com/chart-studio-help/zoom-pan-hover-controls/) but note that these interactive plots do not render well in portals like Sharepoint or Box. It is preferable to view them from a browser. ::: -# Overview of Channels +# Overview of the Channels In a typical NBAD setup, treatments for a channels are modelled by a channel specific model configuration as well as a cross-channel *OmniAdaptiveModel* configuration. This cross-channel configuration is typically only used as a fall-back, and, additionally, for action-level insights. @@ -241,13 +227,18 @@ The "OmniChannel" percentage is an indicator of the overlap of actions between c df_channel_overview = ( datamart.aggregates.summary_by_channel() .with_columns( - NBAD=pl.when(pl.col("usesNBAD")) - .then( - pl.when(pl.col("usesNBADOnly")) - .then(pl.lit("Yes")) - .otherwise(pl.lit("With additional configurations")) - ) - .otherwise(pl.lit("No")) + NBAD=pl.when(pl.col("usesNBADOnly")) + .then(pl.lit("Yes")) + .when(pl.col("usesNBAD")) + .then(pl.lit("With additional configurations")) + .otherwise(pl.lit("No")), + AGB=pl.when(pl.col("usesAGB").is_null()) + .then(pl.lit("?")) + .when(pl.col("usesAGBOnly")) + .then(pl.lit("Yes")) + .when(pl.col("usesAGB")) + .then(pl.lit("Partially")) + .otherwise(pl.lit("No")), ) .drop( [ @@ -272,7 +263,7 @@ formatted_channel_overview = ( "Treatments": ["Total Number of Treatments", "Used Treatments"], "Issues": "Issues", "OmniChannel": "OmniChannel Actions", - "CTR" : "CTR", + "CTR": "CTR", }, highlight_lists={ "Channel": cdh_guidelines.standard_channels, @@ -289,7 +280,7 @@ formatted_channel_overview = ( ) .tab_spanner( label=html("ADM Models"), - columns=["Positives", "ResponseCount", "Performance", "Configuration"], + columns=["Positives", "ResponseCount", "Performance", "Configuration", "AGB"], ) .tab_spanner( label=html("NBAD Setup"), @@ -322,6 +313,8 @@ display( "isValid", "usesNBAD", "usesNBADOnly", + "usesAGB", + "usesAGBOnly", ] ) .tab_style( @@ -339,9 +332,12 @@ display( "Performance", "Configuration", "CTR", + "AGB", "isValid", "usesNBAD", "usesNBADOnly", + "usesAGB", + "usesAGBOnly", ] ).tab_style( style=style.text(decorate="line-through"), @@ -465,6 +461,10 @@ if prediction_file_path: "isValid", ] ) + .tab_style( + style=style.text(decorate="line-through"), + locations=loc.body(rows=pl.col("isValid").not_()), + ) ) display(gt) @@ -782,7 +782,7 @@ except Exception as e: report_utils.quarto_plot_exception("Success Rates over Time", e) ``` -# Overview of Adaptive Models +# Overview of the Adaptive Models ```{python} n_unique_models = len(last_data.select("ModelID").unique()) # TODO or uniqueN ? @@ -797,47 +797,11 @@ There are a total of **{n_unique_models}** Adaptive Models in the latest snapsho In the standard configuration there is one Adaptive model per treatment/action for a configuration. ```{python} -action_dim_agg = [pl.col("Name").n_unique().alias("Actions")] -if report_utils.polars_col_exists(last_data, "Treatment"): - action_dim_agg += [pl.col("Treatment").n_unique().alias("Unique Treatments")] -else: - action_dim_agg += [pl.lit(0).alias("Unique Treatments")] - -# TODO work this into a get_model_overview function in the ADMDatamart -# note there already is a perhaps useful model_summary in there which returns -# some but not all the info we need here -# datamart.model_summary(context_keys=["Configuration","Channel","Direction"]).collect() - -model_overview = ( - last_data.group_by( - ["Configuration"] - + report_utils.polars_subset_to_existing_cols( - datamart_all_columns, ["Channel", "Direction"] - ) - ) - .agg( - [ - pl.col("ModelID").n_unique(), - ] - + action_dim_agg - + [pl.sum("ResponseCount"), pl.sum("Positives")] - ) - .with_columns( - [ - # pl.col("Configuration") - # .is_in(standardNBADNames.keys()) - # .alias("Standard in NBAD Framework"), - (pl.col("ModelID") / pl.col("Actions")) - .round(2) - .alias("ModelsPerAction"), - ] - ) - .sort(["Configuration", "Channel", "Direction"]) -) +model_overview = datamart.aggregates.summary_by_configuration() display( report_utils.table_standard_formatting( - model_overview, + model_overview.collect(), title="Model Overview", cdh_guidelines=cdh_guidelines, highlight_limits={ @@ -860,6 +824,7 @@ display( ModelID="Number of Models", Actions="Unique Actions", ModelsPerAction="Average number of Models per Action", + ResponseCount="Total Responses", ) ) ``` @@ -872,11 +837,9 @@ If there are any model configurations that have fewer than {configuration_respon ) configuration_overview = ( - datamart.model_data - # first, take max per model ID - .group_by("Configuration").agg( - pl.max("ResponseCount").alias("Responses"), pl.max("Positives") - ) + datamart.aggregates.last(table="model_data") + .group_by("Configuration") + .agg(pl.sum("ResponseCount").alias("Responses"), pl.sum("Positives")) ).collect() all_configurations = configuration_overview.select(["Configuration"]).unique() @@ -1069,58 +1032,11 @@ if datamart.predictor_data is None: ) ``` -This analysis looks at the predictors that are driving the models. - -The predictors are categorized (by color) by their “source”. - -This is something that can be configured when reading the data. By default it simply takes the first part before the dot in the predictor name, so this typically distinguishes between e.g. *Customer*, *Account*, *IH* and parameterized (*Param.*) predictors. - -You can customize this (when reading in the data) to add patterns to identify for example external scores. - -## Number of Predictors per model configuration - -This shows the total number of predictors per model configuration (this includes both active and inactive predictors). - -Note that the total number of predictors in the model data does not always equate the data from the more detailed view split by category below. - -```{python} -if datamart.predictor_data is not None: - context_aggregations = [] - if report_utils.polars_col_exists(datamart.combined_data, "Channel"): - context_aggregations += [pl.col("Channel").unique().alias("Used in (Channels)")] - if report_utils.polars_col_exists(datamart.combined_data, "Issue"): - context_aggregations += [pl.col("Issue").unique().alias("Used for (Issues)")] - - predictors_per_configuration = ( - datamart.combined_data.filter(pl.col("EntryType") != "Classifier") - .group_by("Configuration") - .agg( - [pl.col("PredictorName").unique().count().alias("Predictor Count")] - + context_aggregations - ) - .sort("Configuration") - .collect() - ) - - gt = report_utils.table_standard_formatting( - predictors_per_configuration, "Number of Predictors per Configuration", - cdh_guidelines=cdh_guidelines, - ).tab_style( - style=style.text(weight="bold"), - locations=loc.body(columns="Predictor Count"), - ) - gt = report_utils.table_style_predictor_count(gt, ["Predictor Count"], cdh_guidelines) - - display(gt) - -else: - report_utils.quarto_callout_no_predictor_data_warning() - -``` - ## Number of Predictors per Predictor Category -Split by category (defaults to the string before the first dot, can be overridden when reading the data). +The Predictor Categories identify the source of the predictors. By default we split by the first dot, so this distinguishes between between e.g. *Customer*, *Account*, *IH* and parameterized (*Param.*) predictors. + +You can override this behavior when the data is read. The numbers here can differ from the totals above, these ones are leading. @@ -1470,8 +1386,11 @@ if datamart.predictor_data is not None: } try: + # TODO see about this warning that we're getting + # The default of observed=False is deprecated... + fig = px.treemap( - missing.to_pandas(), + missing, path=path, color="Percentage without responses", template="pega", diff --git a/python/pdstools/reports/ModelReport.qmd b/python/pdstools/reports/ModelReport.qmd index b4e677ff..84a62f36 100644 --- a/python/pdstools/reports/ModelReport.qmd +++ b/python/pdstools/reports/ModelReport.qmd @@ -49,6 +49,8 @@ from pdstools.utils import report_utils ```{python} # | tags: [parameters] +# Insert the paths to your data files here to run the notebook from your IDE. +# Edit the _quarto.yml to enable/disable specific sections of the quarto output. # Parameters will be overriden by quarto when a parameters yaml is provided title = "ADM Model Details" diff --git a/python/pdstools/utils/cdh_utils.py b/python/pdstools/utils/cdh_utils.py index a2277026..953ffbb2 100644 --- a/python/pdstools/utils/cdh_utils.py +++ b/python/pdstools/utils/cdh_utils.py @@ -1,6 +1,8 @@ import datetime +from functools import partial import io import logging +from operator import is_not import re import tempfile import warnings @@ -216,8 +218,7 @@ def _extract_keys( .alias(c) for c in overlap ] - ) - .drop([f"{c}_decoded" for c in overlap]) + ).drop([f"{c}_decoded" for c in overlap]) ) @@ -258,6 +259,9 @@ def parse_pega_date_time_formats( pl.col(timestamp_col).str.to_datetime( "%d-%b-%y", strict=False, ambiguous="null" ), + pl.col(timestamp_col).str.to_datetime( + "%d%b%Y:%H:%M:%S", strict=False, ambiguous="null" + ), pl.col(timestamp_col).str.to_datetime( timestamp_fmt or "%Y", strict=False, ambiguous="null" ), @@ -474,7 +478,9 @@ def auc_to_gini(auc: float) -> float: return 2 * safe_range_auc(auc) - 1 -def _capitalize(fields: Union[str, Iterable[str]]) -> List[str]: +def _capitalize( + fields: Union[str, Iterable[str]], extra_endwords: Optional[Iterable[str]] = None +) -> List[str]: """Applies automatic capitalization, aligned with the R couterpart. Parameters @@ -562,7 +568,9 @@ def _capitalize(fields: Union[str, Iterable[str]]) -> List[str]: "Offline", "Update", "Strategy", + "ModelTechnique", ] + if not isinstance(fields, list): fields = [fields] fields = [re.sub("^p(x|y|z)", "", field.lower()) for field in fields] @@ -575,9 +583,9 @@ def _capitalize(fields: Union[str, Iterable[str]]) -> List[str]: return fields -def _polars_capitalize(df: F) -> F: +def _polars_capitalize(df: F, extra_endwords: Optional[Iterable[str]] = None) -> F: cols = df.collect_schema().names() - renamed_cols = _capitalize(cols) + renamed_cols = _capitalize(cols, extra_endwords) def deduplicate(columns: List[str]): seen: Dict[str, int] = {} @@ -805,7 +813,9 @@ def lift_impl(bin_pos, bin_neg, total_pos, total_neg): # TODO not sure how polars (mis)behaves when there are no positives at all # I would hope for a NaN but base python doesn't do that. Polars perhaps. # Stijn: It does have proper None value support, may work like you say - bin_pos * (total_pos + total_neg) / ((bin_pos + bin_neg) * total_pos) + bin_pos + * (total_pos + total_neg) + / ((bin_pos + bin_neg) * total_pos) ).alias("Lift") return lift_impl(pos_col, neg_col, pos_col.sum(), neg_col.sum()) @@ -1145,3 +1155,17 @@ def create_working_and_temp_dir( else tempfile.mkdtemp(prefix="tmp_", dir=working_dir) ) return working_dir, Path(temp_dir_name) + + +# Safe flattening of nested lists, removing None elements, and not splitting strings +def safe_flatten_list(alist: List) -> List: + if alist is None: + return None + alist = list(filter(partial(is_not, None), alist)) + alist = [ + item + for sublist in [[item] if type(item) is not list else item for item in alist] + for item in sublist + ] + alist = list(filter(partial(is_not, None), alist)) + return alist if len(alist) > 0 else None diff --git a/python/pdstools/utils/report_utils.py b/python/pdstools/utils/report_utils.py index 5a880d4d..5275c344 100644 --- a/python/pdstools/utils/report_utils.py +++ b/python/pdstools/utils/report_utils.py @@ -1,3 +1,4 @@ +import re import traceback from typing import Dict, List, Literal, Optional, Union from IPython.display import display, Markdown @@ -124,6 +125,15 @@ def table_standard_formatting( highlight_configurations: List[str] = [], rag_styler: callable = rag_background_styler, ): + def apply_style(gt, rag, rows): + style = rag_styler(rag) + if style is not None: + gt = gt.tab_style( + style=style, + locations=loc.body(columns=col_name, rows=rows), + ) + return gt + def apply_rag_styling(gt, col_name, metric): if col_name in source_table.collect_schema().names(): min_val = cdh_guidelines.min(metric) @@ -169,42 +179,11 @@ def apply_rag_styling(gt, col_name, metric): ] # TODO consider that bad / warning rows are exclusive - def apply_style(gt, rag, rows): - style = rag_styler(rag) - if style is not None: - gt = gt.tab_style( - style=style, - locations=loc.body(columns=col_name, rows=rows), - ) - return gt - gt = apply_style(gt, "green", good_rows) gt = apply_style(gt, "amber", warning_rows) gt = apply_style(gt, "red", bad_rows) return gt - def apply_standard_name_style(gt, col_name, standard_list): - if col_name in source_table.collect_schema().names(): - values = source_table[col_name].to_list() - non_standard_rows = [ - i for i, v in enumerate(values) if v not in standard_list - ] - gt = gt.tab_style( - style=rag_styler("yellow"), - locations=loc.body(columns=col_name, rows=non_standard_rows), - ) - return gt - - def apply_configuration_style(gt, col_name): - if col_name in source_table.collect_schema().names(): - values = source_table[col_name].to_list() - multiple_config_rows = [i for i, v in enumerate(values) if v.count(",") > 1] - gt = gt.tab_style( - style=rag_styler("yellow"), - locations=loc.body(columns=col_name, rows=multiple_config_rows), - ) - return gt - gt = ( GT(source_table, rowname_col=rowname_col, groupname_col=groupname_col) .tab_options(table_font_size=8) @@ -214,57 +193,66 @@ def apply_configuration_style(gt, col_name): if title is not None: gt = gt.tab_header(title=title, subtitle=subtitle) - def metric_styling_model_performance(gt, cols): - return gt.fmt_number( - decimals=2, - columns=cols, - ) - - def metric_styling_percentage(gt, cols): - return gt.fmt_percent( - decimals=0, - columns=cols, - ) - - def metric_styling_ctr(gt, cols): - return gt.fmt_percent( - decimals=3, - columns=cols, - ) - - def metric_styling_default(gt, cols): - return gt.fmt_number( - decimals=0, - compact=True, - columns=cols, - ) - for metric in highlight_limits.keys(): cols = highlight_limits[metric] if isinstance(cols, str): cols = [cols] + # Highlight colors for col_name in cols: gt = apply_rag_styling(gt, col_name=col_name, metric=metric) - # gt = gt.fmt_number( - # columns=col_name, decimals=0, compact=True - # ) # default number formatting applied to everything - consider being smarter, in config + + # Value formatting match metric: case "Model Performance": - gt = metric_styling_model_performance(gt, cols) + gt = gt.fmt_number( + decimals=2, + columns=cols, + ) case "Engagement Lift": - gt = metric_styling_percentage(gt, cols) + gt = gt.fmt_percent( + decimals=0, + columns=cols, + ) case "OmniChannel": - gt = metric_styling_percentage(gt, cols) + gt = gt.fmt_percent( + decimals=0, + columns=cols, + ) case "CTR": - gt = metric_styling_ctr(gt, cols) + gt = gt.fmt_percent( + decimals=3, + columns=cols, + ) case _: - gt = metric_styling_default(gt, cols) + gt = gt.fmt_number( + decimals=0, + compact=True, + columns=cols, + ) - for metric in highlight_lists.keys(): - gt = apply_standard_name_style(gt, metric, highlight_lists[metric]) + # Highlight columns with non-standard values + def simplify_name(x: str) -> str: + if x is None: + return x + return re.sub("\\W", "", x, flags=re.IGNORECASE).upper() - for metric in highlight_configurations: - gt = apply_configuration_style(gt, metric) + for col_name in highlight_lists.keys(): + if col_name in source_table.collect_schema().names(): + simplified_names = [simplify_name(x) for x in highlight_lists[col_name]] + values = source_table[col_name].to_list() + non_standard_rows = [ + i + for i, v in enumerate(values) + if simplify_name(v) not in simplified_names + ] + gt = apply_style(gt, "yellow", non_standard_rows) + + # Highlight column with more than one element (assuming its a comma-separated string) + for col_name in highlight_configurations: + if col_name in source_table.collect_schema().names(): + values = source_table[col_name].to_list() + multiple_config_rows = [i for i, v in enumerate(values) if v.count(",") > 1] + gt = apply_style(gt, "yellow", multiple_config_rows) return gt diff --git a/python/tests/test_Aggregates.py b/python/tests/test_Aggregates.py index 40c9a484..85cb8876 100644 --- a/python/tests/test_Aggregates.py +++ b/python/tests/test_Aggregates.py @@ -43,13 +43,18 @@ def test_aggregate_predictor_counts(agg): def test_aggregate_summary_by_channel(agg): summary_by_channel = agg.summary_by_channel().collect() assert summary_by_channel.shape[0] == 3 - assert summary_by_channel.shape[1] == 21 + assert summary_by_channel.shape[1] == 23 assert summary_by_channel["Total Number of Actions"].to_list() == [24, 27, 19] def test_aggregate_overall_summary(agg): overall_summary = agg.overall_summary().collect() assert overall_summary.shape[0] == 1 - assert overall_summary.shape[1] == 18 + assert overall_summary.shape[1] == 20 assert overall_summary["Number of Valid Channels"].item() == 3 assert overall_summary["Total Number of Treatments"].item() == 0 + +def test_summary_by_configuration(agg): + configuration_summary = agg.summary_by_configuration().collect() + assert "AGB" in configuration_summary.columns + \ No newline at end of file diff --git a/python/tests/test_datasets.py b/python/tests/test_datasets.py index 01217fcc..4e9569e3 100644 --- a/python/tests/test_datasets.py +++ b/python/tests/test_datasets.py @@ -21,7 +21,7 @@ def __new__(cls, ldf: pl.LazyFrame): def test_import_CDHSample(): Sample = datasets.cdh_sample() - assert Sample.model_data.shape == (1047, 27) + assert Sample.model_data.shape == (1047, 28) def test_import_SampleTrees(): diff --git a/python/tests/test_end_to_end.py b/python/tests/test_end_to_end.py index 77bd07ae..f2545513 100644 --- a/python/tests/test_end_to_end.py +++ b/python/tests/test_end_to_end.py @@ -38,13 +38,13 @@ def __new__(cls, ldf: pl.LazyFrame): def test_end_to_end(sample: ADMDatamart): - assert sample.model_data.shape == (1047, 27) + assert sample.model_data.shape == (1047, 28) assert sample.predictor_data.shape == (70735, 39) - assert sample.combined_data.shape == (4576, 65) + assert sample.combined_data.shape == (4576, 66) - assert sample.aggregates.last().shape == (68, 27) + assert sample.aggregates.last().shape == (68, 28) assert sample.aggregates.last(table="predictor_data").shape == (4576, 39) assert sample.model_data.collect_schema()["SnapshotTime"] == pl.Datetime diff --git a/python/tests/test_healthcheck.py b/python/tests/test_healthcheck.py index c6e8984c..a843b5f6 100644 --- a/python/tests/test_healthcheck.py +++ b/python/tests/test_healthcheck.py @@ -38,7 +38,7 @@ def test_ExportTables(sample: ADMDatamart): spreadsheet = ExcelFile(excel) assert list(spreadsheet.sheet_names) == [ "modeldata_last_snapshot", - "predictor_last_snapshot", + "predictors_overview", "predictor_binning", ] # TODO we could go further and check the size of the sheets @@ -54,7 +54,7 @@ def test_ExportTables_NoBinning(sample: ADMDatamart): spreadsheet = ExcelFile(excel) assert list(spreadsheet.sheet_names) == [ "modeldata_last_snapshot", - "predictor_last_snapshot", + "predictors_overview", ] # TODO we could go further and check the size of the sheets # spreadsheet = read_excel(excel, sheet_name=None)