From eb49aacec4c141954c091cac543f4986178b555e Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Fri, 3 May 2024 15:29:05 -0400 Subject: [PATCH 1/2] Fix column list --- .../customized_diagnoser.ipynb | 112 +++++++------- examples/example_notebooks/diagnoser.ipynb | 145 ++++++++++-------- .../diagnoser/models/diagnosis_report.py | 2 +- 3 files changed, 136 insertions(+), 123 deletions(-) diff --git a/examples/example_notebooks/customized_diagnoser.ipynb b/examples/example_notebooks/customized_diagnoser.ipynb index 5f183a7..77791f6 100644 --- a/examples/example_notebooks/customized_diagnoser.ipynb +++ b/examples/example_notebooks/customized_diagnoser.ipynb @@ -17,14 +17,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 38, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T02:03:15.122705Z", - "start_time": "2024-05-03T02:03:15.119284Z" + "end_time": "2024-05-03T19:28:01.733321Z", + "start_time": "2024-05-03T19:28:01.731598Z" } }, "outputs": [], @@ -47,12 +47,12 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 39, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:31.740325Z", - "start_time": "2024-05-03T02:03:15.137102Z" + "end_time": "2024-05-03T19:28:09.883007Z", + "start_time": "2024-05-03T19:28:01.757481Z" } }, "outputs": [], @@ -84,12 +84,12 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 40, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:32.055442Z", - "start_time": "2024-05-03T02:03:31.743114Z" + "end_time": "2024-05-03T19:28:09.888906Z", + "start_time": "2024-05-03T19:28:09.884819Z" } }, "outputs": [], @@ -110,11 +110,11 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 41, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:32.690456Z", - "start_time": "2024-05-03T02:03:32.056136Z" + "end_time": "2024-05-03T19:28:10.298067Z", + "start_time": "2024-05-03T19:28:09.890844Z" } }, "outputs": [ @@ -122,7 +122,7 @@ "data": { "text/plain": "(TimeRange(start=datetime.datetime(2021, 5, 20, 0, 0, tzinfo=datetime.timezone.utc), end=datetime.datetime(2024, 5, 2, 21, 0, tzinfo=datetime.timezone.utc)),\n ,\n '2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z')" }, - "execution_count": 4, + "execution_count": 41, "metadata": {}, "output_type": "execute_result" } @@ -143,12 +143,12 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 42, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.421897Z", - "start_time": "2024-05-03T02:03:32.692755Z" + "end_time": "2024-05-03T19:28:11.034284Z", + "start_time": "2024-05-03T19:28:10.300461Z" } }, "outputs": [ @@ -157,7 +157,7 @@ "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" }, - "execution_count": 5, + "execution_count": 42, "metadata": {}, "output_type": "execute_result" } @@ -181,12 +181,12 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 43, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.427628Z", - "start_time": "2024-05-03T02:03:33.422561Z" + "end_time": "2024-05-03T19:28:11.043855Z", + "start_time": "2024-05-03T19:28:11.035480Z" } }, "outputs": [ @@ -195,7 +195,7 @@ "text/plain": " monitor_id analyzer_id \\\n0 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n1 missing-values-ratio-35881327-monitor missing-values-ratio-35881327 \n2 unique-ratio-b7b84aee-monitor unique-ratio-b7b84aee \n\n metric failed_count max_failed_per_column \\\n0 inferred_data_type 3 3 \n1 count_null_ratio 1 1 \n2 unique_est_ratio 1 1 \n\n min_failed_per_column avg_failed_per_column action_count action_targets \n0 3 3 2 [email, slack] \n1 1 1 0 [] \n2 1 1 0 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetricfailed_countmax_failed_per_columnmin_failed_per_columnavg_failed_per_columnaction_countaction_targets
0inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type33332[email, slack]
1missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio11110[]
2unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio11110[]
\n
" }, - "execution_count": 6, + "execution_count": 43, "metadata": {}, "output_type": "execute_result" } @@ -214,11 +214,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 44, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.431118Z", - "start_time": "2024-05-03T02:03:33.428470Z" + "end_time": "2024-05-03T19:28:11.048310Z", + "start_time": "2024-05-03T19:28:11.045233Z" } }, "outputs": [ @@ -226,7 +226,7 @@ "data": { "text/plain": "'frequent-items-drift-monitor-x2hr9z'" }, - "execution_count": 7, + "execution_count": 44, "metadata": {}, "output_type": "execute_result" } @@ -246,12 +246,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 45, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.434461Z", - "start_time": "2024-05-03T02:03:33.432056Z" + "end_time": "2024-05-03T19:28:11.052828Z", + "start_time": "2024-05-03T19:28:11.049460Z" } }, "outputs": [ @@ -259,7 +259,7 @@ "data": { "text/plain": "Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')])" }, - "execution_count": 8, + "execution_count": 45, "metadata": {}, "output_type": "execute_result" } @@ -279,20 +279,20 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 46, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.623760Z", - "start_time": "2024-05-03T02:03:33.435077Z" + "end_time": "2024-05-03T19:28:11.245894Z", + "start_time": "2024-05-03T19:28:11.054041Z" } }, "outputs": [ { "data": { - "text/plain": "Analyzer(metadata=Metadata(version=2, schemaVersion=1, updatedTimestamp=1714699900837, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=[, 'desc', 'issue_d', 'url'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" + "text/plain": "Analyzer(metadata=Metadata(version=5, schemaVersion=1, updatedTimestamp=1714764383143, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', 'desc', , 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" }, - "execution_count": 9, + "execution_count": 46, "metadata": {}, "output_type": "execute_result" } @@ -312,12 +312,12 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 47, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.839587Z", - "start_time": "2024-05-03T02:03:33.624489Z" + "end_time": "2024-05-03T19:28:11.479972Z", + "start_time": "2024-05-03T19:28:11.246810Z" } }, "outputs": [ @@ -326,7 +326,7 @@ "text/plain": " segment total_anomalies batch_count\n0 overall 34 30", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
segmenttotal_anomaliesbatch_count
0overall3430
\n
" }, - "execution_count": 10, + "execution_count": 47, "metadata": {}, "output_type": "execute_result" } @@ -351,11 +351,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 48, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.846498Z", - "start_time": "2024-05-03T02:03:33.842929Z" + "end_time": "2024-05-03T19:28:11.486357Z", + "start_time": "2024-05-03T19:28:11.483353Z" } }, "outputs": [ @@ -363,7 +363,7 @@ "data": { "text/plain": "'overall'" }, - "execution_count": 11, + "execution_count": 48, "metadata": {}, "output_type": "execute_result" } @@ -383,11 +383,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 49, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.988624Z", - "start_time": "2024-05-03T02:03:33.847364Z" + "end_time": "2024-05-03T19:28:11.643588Z", + "start_time": "2024-05-03T19:28:11.487234Z" } }, "outputs": [ @@ -396,7 +396,7 @@ "text/plain": " column total_anomalies\n0 issue_d 30\n1 url 3\n2 desc 1\n3 disbursement_method 0\n4 earliest_cr_line 0\n5 emp_length 0\n6 emp_title 0\n7 grade 0\n8 hardship_flag 0\n9 home_ownership 0\n10 initial_list_status 0\n11 last_credit_pull_d 0\n12 last_pymnt_d 0\n13 loan_status 0\n14 next_pymnt_d 0\n15 purpose 0\n16 pymnt_plan 0\n17 sub_grade 0\n18 term 0\n19 title 0\n20 verification_status 0\n21 verification_status_joint 0\n22 addr_state 0\n23 zip_code 0\n24 application_type 0\n25 debt_settlement_flag 0", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
columntotal_anomalies
0issue_d30
1url3
2desc1
3disbursement_method0
4earliest_cr_line0
5emp_length0
6emp_title0
7grade0
8hardship_flag0
9home_ownership0
10initial_list_status0
11last_credit_pull_d0
12last_pymnt_d0
13loan_status0
14next_pymnt_d0
15purpose0
16pymnt_plan0
17sub_grade0
18term0
19title0
20verification_status0
21verification_status_joint0
22addr_state0
23zip_code0
24application_type0
25debt_settlement_flag0
\n
" }, - "execution_count": 12, + "execution_count": 49, "metadata": {}, "output_type": "execute_result" } @@ -416,11 +416,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 50, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:33.992473Z", - "start_time": "2024-05-03T02:03:33.989400Z" + "end_time": "2024-05-03T19:28:11.647546Z", + "start_time": "2024-05-03T19:28:11.644423Z" } }, "outputs": [ @@ -428,7 +428,7 @@ "data": { "text/plain": "['issue_d',\n 'url',\n 'desc',\n 'disbursement_method',\n 'earliest_cr_line',\n 'emp_length',\n 'emp_title',\n 'grade',\n 'hardship_flag',\n 'home_ownership',\n 'initial_list_status',\n 'last_credit_pull_d',\n 'last_pymnt_d',\n 'loan_status',\n 'next_pymnt_d',\n 'purpose',\n 'pymnt_plan',\n 'sub_grade',\n 'term',\n 'title',\n 'verification_status',\n 'verification_status_joint',\n 'addr_state',\n 'zip_code',\n 'application_type',\n 'debt_settlement_flag']" }, - "execution_count": 13, + "execution_count": 50, "metadata": {}, "output_type": "execute_result" } @@ -447,11 +447,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 51, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T02:03:38.883541Z", - "start_time": "2024-05-03T02:03:33.993121Z" + "end_time": "2024-05-03T19:28:15.881932Z", + "start_time": "2024-05-03T19:28:11.648505Z" } }, "outputs": [], @@ -463,12 +463,12 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 52, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T02:03:38.892248Z", - "start_time": "2024-05-03T02:03:38.884434Z" + "end_time": "2024-05-03T19:28:15.890154Z", + "start_time": "2024-05-03T19:28:15.883421Z" } }, "outputs": [ diff --git a/examples/example_notebooks/diagnoser.ipynb b/examples/example_notebooks/diagnoser.ipynb index ff4d687..01d9b06 100644 --- a/examples/example_notebooks/diagnoser.ipynb +++ b/examples/example_notebooks/diagnoser.ipynb @@ -18,14 +18,14 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 5, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T01:31:30.980544Z", - "start_time": "2024-05-03T01:31:30.977943Z" + "end_time": "2024-05-03T19:26:07.122244Z", + "start_time": "2024-05-03T19:26:07.119688Z" } }, "outputs": [], @@ -48,11 +48,11 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:33.173740Z", - "start_time": "2024-05-03T01:31:30.988187Z" + "end_time": "2024-05-03T19:26:15.399941Z", + "start_time": "2024-05-03T19:26:07.131180Z" } }, "outputs": [], @@ -62,7 +62,7 @@ "\n", "org_id = input(\"Enter org ID\")\n", "dataset_id = input(\"Enter model/dataset ID\")\n", - "api_key = getpass.getpass()\n", + "api_key = getpass.getpass(\"Enter API key\")\n", "api_endpoint = 'https://api.whylabsapp.com'\n", "\n", "env_setup(\n", @@ -82,11 +82,11 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:33.433678Z", - "start_time": "2024-05-03T01:31:33.175869Z" + "end_time": "2024-05-03T19:26:15.406192Z", + "start_time": "2024-05-03T19:26:15.402128Z" } }, "outputs": [], @@ -106,19 +106,19 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.402540Z", - "start_time": "2024-05-03T01:31:33.436496Z" + "end_time": "2024-05-03T19:26:22.706626Z", + "start_time": "2024-05-03T19:26:15.407624Z" } }, "outputs": [ { "data": { - "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-x2hr9z', interval='2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1674392, maxRowName='desc', maxRowCount=1674392), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=34, maxAnomalyCount=30, meanAnomalyCount=11, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=3), NamedCount(name='desc', count=1)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=2), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=30), qualityIssues=[], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')]), analyzer=Analyzer(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536888574, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=[], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" + "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-x2hr9z', interval='2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1674392, maxRowName='desc', maxRowCount=1674392), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=34, maxAnomalyCount=30, meanAnomalyCount=11, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=3), NamedCount(name='desc', count=1)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=2), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=27), qualityIssues=[QualityIssueRecord(name='analyzer_changed', description='Analyzer changed within the diagnostic interval', detectors=['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches'])], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')]), analyzer=Analyzer(metadata=Metadata(version=4, schemaVersion=1, updatedTimestamp=1714763295788, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['desc', , 'issue_d', 'url'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" }, - "execution_count": 4, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -130,11 +130,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.415056Z", - "start_time": "2024-05-03T01:31:40.405237Z" + "end_time": "2024-05-03T19:26:22.723005Z", + "start_time": "2024-05-03T19:26:22.709411Z" } }, "outputs": [ @@ -146,7 +146,7 @@ "Monitor has 1 notification actions ['email'].\n", "\n", "Analyzer is drift configuration for frequent_items metric with TrailingWindow baseline.\n", - "Analyzer \"frequent-items-drift-analyzer-x2hr9z\" targets 30 columns and ran on 26 columns in the diagnosed segment.\n", + "Analyzer \"frequent-items-drift-analyzer-x2hr9z\" targets 27 columns and ran on 26 columns in the diagnosed segment.\n", "\n", "\n", "Diagnostic segment is \"overall\".\n", @@ -166,18 +166,18 @@ "\n", "No failures were detected.\n", "\n", - "No issues impacting diagnosis quality were detected\n", + "Conditions that may impact diagnosis quality include:\n", + "\t* analyzer_changed: Analyzer changed within the diagnostic interval - detectors ['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches']\n", + "\n", "Conditions that may contribute to noise include:\n", "\t* Condition changing_discrete (many values are unique across batches) for 3 columns: ['desc', 'issue_d', 'url']\n", - "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 4 columns: ['desc', 'issue_d', 'url', 'desc']\n", + "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:--------|----:|\n", - "| issue_d | 30 |\n", - "| url | 3 |\n", - "| desc | 1 |\n", - "Accounting for 34 anomalies out of 34\n" + "| | 0 |\n", + "|:-----|----:|\n", + "| desc | 1 |\n", + "Accounting for 1 anomalies out of 34\n" ] } ], @@ -199,11 +199,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.419753Z", - "start_time": "2024-05-03T01:31:40.415990Z" + "end_time": "2024-05-03T19:26:22.728615Z", + "start_time": "2024-05-03T19:26:22.724414Z" } }, "outputs": [], @@ -214,11 +214,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.424467Z", - "start_time": "2024-05-03T01:31:40.420491Z" + "end_time": "2024-05-03T19:26:22.734842Z", + "start_time": "2024-05-03T19:26:22.729870Z" } }, "outputs": [ @@ -387,9 +387,21 @@ " ]\n", " }\n", " },\n", - " \"targetedColumnCount\": 30\n", + " \"targetedColumnCount\": 27\n", " },\n", - " \"qualityIssues\": [],\n", + " \"qualityIssues\": [\n", + " {\n", + " \"name\": \"analyzer_changed\",\n", + " \"description\": \"Analyzer changed within the diagnostic interval\",\n", + " \"detectors\": [\n", + " \"stale_analysis\",\n", + " \"changing_discrete\",\n", + " \"low_drift_threshold\",\n", + " \"missing_baseline_batches\",\n", + " \"small_nonnull_batches\"\n", + " ]\n", + " }\n", + " ],\n", " \"conditions\": [\n", " {\n", " \"columns\": [\n", @@ -445,10 +457,10 @@ " },\n", " \"analyzer\": {\n", " \"metadata\": {\n", - " \"version\": 1,\n", + " \"version\": 4,\n", " \"schemaVersion\": 1,\n", - " \"updatedTimestamp\": 1705536888574,\n", - " \"author\": \"user_809f777d_3741_4991_8ced_42f09b883ac7\",\n", + " \"updatedTimestamp\": 1714763295788,\n", + " \"author\": \"user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98\",\n", " \"description\": null\n", " },\n", " \"id\": \"frequent-items-drift-analyzer-x2hr9z\",\n", @@ -472,7 +484,10 @@ " \"group:discrete\"\n", " ],\n", " \"exclude\": [\n", - " \"group:output\"\n", + " \"desc\",\n", + " \"group:output\",\n", + " \"issue_d\",\n", + " \"url\"\n", " ],\n", " \"profileId\": null\n", " },\n", @@ -521,11 +536,11 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.435769Z", - "start_time": "2024-05-03T01:31:40.425350Z" + "end_time": "2024-05-03T19:26:22.742280Z", + "start_time": "2024-05-03T19:26:22.736043Z" } }, "outputs": [ @@ -563,11 +578,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:40.438830Z", - "start_time": "2024-05-03T01:31:40.436652Z" + "end_time": "2024-05-03T19:26:22.745719Z", + "start_time": "2024-05-03T19:26:22.743195Z" } }, "outputs": [ @@ -586,11 +601,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:41.510071Z", - "start_time": "2024-05-03T01:31:40.439570Z" + "end_time": "2024-05-03T19:26:23.502634Z", + "start_time": "2024-05-03T19:26:22.746622Z" } }, "outputs": [ @@ -625,11 +640,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 15, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:31:41.528882Z", - "start_time": "2024-05-03T01:31:41.513691Z" + "end_time": "2024-05-03T19:26:23.525937Z", + "start_time": "2024-05-03T19:26:23.508893Z" } }, "outputs": [ @@ -638,7 +653,7 @@ "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" }, - "execution_count": 11, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } @@ -661,11 +676,11 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 16, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T01:59:10.186840Z", - "start_time": "2024-05-03T01:59:03.667269Z" + "end_time": "2024-05-03T19:26:28.850168Z", + "start_time": "2024-05-03T19:26:23.527095Z" } }, "outputs": [ @@ -702,15 +717,13 @@ "\n", "Conditions that may contribute to noise include:\n", "\t* Condition changing_discrete (many values are unique across batches) for 3 columns: ['desc', 'issue_d', 'url']\n", - "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 4 columns: ['desc', 'issue_d', 'url', 'desc']\n", + "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:--------|----:|\n", - "| issue_d | 30 |\n", - "| url | 3 |\n", - "| desc | 1 |\n", - "Accounting for 34 anomalies out of 34\n" + "| | 0 |\n", + "|:-----|----:|\n", + "| desc | 1 |\n", + "Accounting for 1 anomalies out of 34\n" ] } ], @@ -732,12 +745,12 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": 17, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T01:59:21.543539Z", - "start_time": "2024-05-03T01:59:21.533198Z" + "end_time": "2024-05-03T19:26:28.860332Z", + "start_time": "2024-05-03T19:26:28.851199Z" } }, "outputs": [ @@ -746,7 +759,7 @@ "text/plain": " monitor_id analyzer_id \\\n0 frequent-items-drift-monitor-x2hr9z frequent-items-drift-analyzer-x2hr9z \n1 frequent-items-drift-monitor-mat0jo frequent-items-drift-analyzer-mat0jo \n2 frequent-items-drift-monitor-01rbfl frequent-items-drift-analyzer-01rbfl \n3 frequent-items-drift-monitor-3c0hc2 frequent-items-drift-analyzer-3c0hc2 \n4 frequent-items-drift-monitor-9gmtix frequent-items-drift-analyzer-9gmtix \n5 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n\n metric column_count segment_count anomaly_count \\\n0 frequent_items 3 1 34 \n1 frequent_items 3 1 34 \n2 frequent_items 3 1 34 \n3 frequent_items 3 1 34 \n4 frequent_items 3 1 34 \n5 inferred_data_type 1 1 14 \n\n max_anomaly_per_column min_anomaly_per_column avg_anomaly_per_column \\\n0 30 1 11 \n1 30 1 11 \n2 30 1 11 \n3 30 1 11 \n4 30 1 11 \n5 14 14 14 \n\n action_count action_targets \n0 1 [email] \n1 2 [email, slack] \n2 1 [email] \n3 1 [email] \n4 1 [email] \n5 2 [email, slack] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
2frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
3frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
4frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
5inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
\n
" }, - "execution_count": 21, + "execution_count": 17, "metadata": {}, "output_type": "execute_result" } diff --git a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py index 6226842..da90c02 100644 --- a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py +++ b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py @@ -187,8 +187,8 @@ def describe_conditions(self) -> str: if len(self.conditions) == 0: return "No conditions related to noise were detected." text = "Conditions that may contribute to noise include:\n" - condition_cols: List[str] = [] for condition in self.conditions: + condition_cols: List[str] = [] text += f"\t* Condition {condition.name} ({condition.summary})" if condition.columns is not None: condition_cols += condition.columns From d212c8b803f8c88d67919364aceeb038b3b949b3 Mon Sep 17 00:00:00 2001 From: Christine Draper Date: Fri, 3 May 2024 16:45:33 -0400 Subject: [PATCH 2/2] Fix column issue properly --- .../customized_diagnoser.ipynb | 114 +++++++++--------- examples/example_notebooks/diagnoser.ipynb | 114 +++++++++--------- .../diagnoser/models/diagnosis_report.py | 6 +- 3 files changed, 119 insertions(+), 115 deletions(-) diff --git a/examples/example_notebooks/customized_diagnoser.ipynb b/examples/example_notebooks/customized_diagnoser.ipynb index 77791f6..8268621 100644 --- a/examples/example_notebooks/customized_diagnoser.ipynb +++ b/examples/example_notebooks/customized_diagnoser.ipynb @@ -17,14 +17,14 @@ }, { "cell_type": "code", - "execution_count": 38, + "execution_count": 1, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T19:28:01.733321Z", - "start_time": "2024-05-03T19:28:01.731598Z" + "end_time": "2024-05-03T20:44:40.525488Z", + "start_time": "2024-05-03T20:44:40.522741Z" } }, "outputs": [], @@ -47,12 +47,12 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 2, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:09.883007Z", - "start_time": "2024-05-03T19:28:01.757481Z" + "end_time": "2024-05-03T20:44:48.257815Z", + "start_time": "2024-05-03T20:44:40.549686Z" } }, "outputs": [], @@ -84,12 +84,12 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": 3, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:09.888906Z", - "start_time": "2024-05-03T19:28:09.884819Z" + "end_time": "2024-05-03T20:44:48.555773Z", + "start_time": "2024-05-03T20:44:48.259295Z" } }, "outputs": [], @@ -110,11 +110,11 @@ }, { "cell_type": "code", - "execution_count": 41, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:10.298067Z", - "start_time": "2024-05-03T19:28:09.890844Z" + "end_time": "2024-05-03T20:44:48.967016Z", + "start_time": "2024-05-03T20:44:48.556657Z" } }, "outputs": [ @@ -122,7 +122,7 @@ "data": { "text/plain": "(TimeRange(start=datetime.datetime(2021, 5, 20, 0, 0, tzinfo=datetime.timezone.utc), end=datetime.datetime(2024, 5, 2, 21, 0, tzinfo=datetime.timezone.utc)),\n ,\n '2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z')" }, - "execution_count": 41, + "execution_count": 4, "metadata": {}, "output_type": "execute_result" } @@ -143,12 +143,12 @@ }, { "cell_type": "code", - "execution_count": 42, + "execution_count": 5, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.034284Z", - "start_time": "2024-05-03T19:28:10.300461Z" + "end_time": "2024-05-03T20:44:49.668954Z", + "start_time": "2024-05-03T20:44:48.968682Z" } }, "outputs": [ @@ -157,7 +157,7 @@ "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" }, - "execution_count": 42, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -181,12 +181,12 @@ }, { "cell_type": "code", - "execution_count": 43, + "execution_count": 6, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.043855Z", - "start_time": "2024-05-03T19:28:11.035480Z" + "end_time": "2024-05-03T20:44:49.675298Z", + "start_time": "2024-05-03T20:44:49.669786Z" } }, "outputs": [ @@ -195,7 +195,7 @@ "text/plain": " monitor_id analyzer_id \\\n0 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n1 missing-values-ratio-35881327-monitor missing-values-ratio-35881327 \n2 unique-ratio-b7b84aee-monitor unique-ratio-b7b84aee \n\n metric failed_count max_failed_per_column \\\n0 inferred_data_type 3 3 \n1 count_null_ratio 1 1 \n2 unique_est_ratio 1 1 \n\n min_failed_per_column avg_failed_per_column action_count action_targets \n0 3 3 2 [email, slack] \n1 1 1 0 [] \n2 1 1 0 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetricfailed_countmax_failed_per_columnmin_failed_per_columnavg_failed_per_columnaction_countaction_targets
0inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type33332[email, slack]
1missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio11110[]
2unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio11110[]
\n
" }, - "execution_count": 43, + "execution_count": 6, "metadata": {}, "output_type": "execute_result" } @@ -214,11 +214,11 @@ }, { "cell_type": "code", - "execution_count": 44, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.048310Z", - "start_time": "2024-05-03T19:28:11.045233Z" + "end_time": "2024-05-03T20:44:49.678419Z", + "start_time": "2024-05-03T20:44:49.676141Z" } }, "outputs": [ @@ -226,7 +226,7 @@ "data": { "text/plain": "'frequent-items-drift-monitor-x2hr9z'" }, - "execution_count": 44, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -246,12 +246,12 @@ }, { "cell_type": "code", - "execution_count": 45, + "execution_count": 8, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.052828Z", - "start_time": "2024-05-03T19:28:11.049460Z" + "end_time": "2024-05-03T20:44:49.681587Z", + "start_time": "2024-05-03T20:44:49.679132Z" } }, "outputs": [ @@ -259,7 +259,7 @@ "data": { "text/plain": "Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')])" }, - "execution_count": 45, + "execution_count": 8, "metadata": {}, "output_type": "execute_result" } @@ -279,20 +279,20 @@ }, { "cell_type": "code", - "execution_count": 46, + "execution_count": 9, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.245894Z", - "start_time": "2024-05-03T19:28:11.054041Z" + "end_time": "2024-05-03T20:44:49.878456Z", + "start_time": "2024-05-03T20:44:49.682343Z" } }, "outputs": [ { "data": { - "text/plain": "Analyzer(metadata=Metadata(version=5, schemaVersion=1, updatedTimestamp=1714764383143, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', 'desc', , 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" + "text/plain": "Analyzer(metadata=Metadata(version=6, schemaVersion=1, updatedTimestamp=1714769079201, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['issue_d', , 'desc', 'url'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None)))" }, - "execution_count": 46, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -312,12 +312,12 @@ }, { "cell_type": "code", - "execution_count": 47, + "execution_count": 10, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.479972Z", - "start_time": "2024-05-03T19:28:11.246810Z" + "end_time": "2024-05-03T20:44:50.042989Z", + "start_time": "2024-05-03T20:44:49.879144Z" } }, "outputs": [ @@ -326,7 +326,7 @@ "text/plain": " segment total_anomalies batch_count\n0 overall 34 30", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
segmenttotal_anomaliesbatch_count
0overall3430
\n
" }, - "execution_count": 47, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -351,11 +351,11 @@ }, { "cell_type": "code", - "execution_count": 48, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.486357Z", - "start_time": "2024-05-03T19:28:11.483353Z" + "end_time": "2024-05-03T20:44:50.047697Z", + "start_time": "2024-05-03T20:44:50.045178Z" } }, "outputs": [ @@ -363,7 +363,7 @@ "data": { "text/plain": "'overall'" }, - "execution_count": 48, + "execution_count": 11, "metadata": {}, "output_type": "execute_result" } @@ -383,11 +383,11 @@ }, { "cell_type": "code", - "execution_count": 49, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.643588Z", - "start_time": "2024-05-03T19:28:11.487234Z" + "end_time": "2024-05-03T20:44:50.203811Z", + "start_time": "2024-05-03T20:44:50.048446Z" } }, "outputs": [ @@ -396,7 +396,7 @@ "text/plain": " column total_anomalies\n0 issue_d 30\n1 url 3\n2 desc 1\n3 disbursement_method 0\n4 earliest_cr_line 0\n5 emp_length 0\n6 emp_title 0\n7 grade 0\n8 hardship_flag 0\n9 home_ownership 0\n10 initial_list_status 0\n11 last_credit_pull_d 0\n12 last_pymnt_d 0\n13 loan_status 0\n14 next_pymnt_d 0\n15 purpose 0\n16 pymnt_plan 0\n17 sub_grade 0\n18 term 0\n19 title 0\n20 verification_status 0\n21 verification_status_joint 0\n22 addr_state 0\n23 zip_code 0\n24 application_type 0\n25 debt_settlement_flag 0", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
columntotal_anomalies
0issue_d30
1url3
2desc1
3disbursement_method0
4earliest_cr_line0
5emp_length0
6emp_title0
7grade0
8hardship_flag0
9home_ownership0
10initial_list_status0
11last_credit_pull_d0
12last_pymnt_d0
13loan_status0
14next_pymnt_d0
15purpose0
16pymnt_plan0
17sub_grade0
18term0
19title0
20verification_status0
21verification_status_joint0
22addr_state0
23zip_code0
24application_type0
25debt_settlement_flag0
\n
" }, - "execution_count": 49, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -416,11 +416,11 @@ }, { "cell_type": "code", - "execution_count": 50, + "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:11.647546Z", - "start_time": "2024-05-03T19:28:11.644423Z" + "end_time": "2024-05-03T20:44:50.207865Z", + "start_time": "2024-05-03T20:44:50.204706Z" } }, "outputs": [ @@ -428,7 +428,7 @@ "data": { "text/plain": "['issue_d',\n 'url',\n 'desc',\n 'disbursement_method',\n 'earliest_cr_line',\n 'emp_length',\n 'emp_title',\n 'grade',\n 'hardship_flag',\n 'home_ownership',\n 'initial_list_status',\n 'last_credit_pull_d',\n 'last_pymnt_d',\n 'loan_status',\n 'next_pymnt_d',\n 'purpose',\n 'pymnt_plan',\n 'sub_grade',\n 'term',\n 'title',\n 'verification_status',\n 'verification_status_joint',\n 'addr_state',\n 'zip_code',\n 'application_type',\n 'debt_settlement_flag']" }, - "execution_count": 50, + "execution_count": 13, "metadata": {}, "output_type": "execute_result" } @@ -447,11 +447,11 @@ }, { "cell_type": "code", - "execution_count": 51, + "execution_count": 14, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:28:15.881932Z", - "start_time": "2024-05-03T19:28:11.648505Z" + "end_time": "2024-05-03T20:44:54.750266Z", + "start_time": "2024-05-03T20:44:50.208637Z" } }, "outputs": [], @@ -463,12 +463,12 @@ }, { "cell_type": "code", - "execution_count": 52, + "execution_count": 15, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:28:15.890154Z", - "start_time": "2024-05-03T19:28:15.883421Z" + "end_time": "2024-05-03T20:44:54.758004Z", + "start_time": "2024-05-03T20:44:54.751369Z" } }, "outputs": [ @@ -505,7 +505,7 @@ "\n", "Conditions that may contribute to noise include:\n", "\t* Condition changing_discrete (many values are unique across batches) for 3 columns: ['desc', 'issue_d', 'url']\n", - "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 4 columns: ['desc', 'issue_d', 'url', 'desc']\n", + "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", "| | 0 |\n", diff --git a/examples/example_notebooks/diagnoser.ipynb b/examples/example_notebooks/diagnoser.ipynb index 01d9b06..7118071 100644 --- a/examples/example_notebooks/diagnoser.ipynb +++ b/examples/example_notebooks/diagnoser.ipynb @@ -18,14 +18,14 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 2, "metadata": { "pycharm": { "name": "#%%\n" }, "ExecuteTime": { - "end_time": "2024-05-03T19:26:07.122244Z", - "start_time": "2024-05-03T19:26:07.119688Z" + "end_time": "2024-05-03T20:44:23.177189Z", + "start_time": "2024-05-03T20:44:23.175308Z" } }, "outputs": [], @@ -48,11 +48,11 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 3, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:15.399941Z", - "start_time": "2024-05-03T19:26:07.131180Z" + "end_time": "2024-05-03T20:44:30.824990Z", + "start_time": "2024-05-03T20:44:23.209328Z" } }, "outputs": [], @@ -82,11 +82,11 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 4, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:15.406192Z", - "start_time": "2024-05-03T19:26:15.402128Z" + "end_time": "2024-05-03T20:44:31.391534Z", + "start_time": "2024-05-03T20:44:30.827501Z" } }, "outputs": [], @@ -106,19 +106,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 5, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.706626Z", - "start_time": "2024-05-03T19:26:15.407624Z" + "end_time": "2024-05-03T20:44:38.796330Z", + "start_time": "2024-05-03T20:44:31.392151Z" } }, "outputs": [ { "data": { - "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-x2hr9z', interval='2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1674392, maxRowName='desc', maxRowCount=1674392), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=34, maxAnomalyCount=30, meanAnomalyCount=11, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=3), NamedCount(name='desc', count=1)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=2), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=27), qualityIssues=[QualityIssueRecord(name='analyzer_changed', description='Analyzer changed within the diagnostic interval', detectors=['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches'])], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')]), analyzer=Analyzer(metadata=Metadata(version=4, schemaVersion=1, updatedTimestamp=1714763295788, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['desc', , 'issue_d', 'url'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" + "text/plain": "MonitorDiagnosisReport(orgId='org-0', datasetId='model-0', analyzerId='frequent-items-drift-analyzer-x2hr9z', interval='2024-04-02T00:00:00.000Z/2024-05-02T00:00:00.000Z', expectedBatchCount=0, diagnosticData=DiagnosticDataSummary(diagnosticSegment=Segment(tags=[]), diagnosticProfile=ProfileSummary(minRowName='desc', minRowCount=1674392, maxRowName='desc', maxRowCount=1674392), diagnosticBatches=BatchesSummary(minBatchName='desc', minBatchCount=30, maxBatchName='desc', maxBatchCount=30), analysisResults=AnalysisResultsSummary(results=ResultRecord(diagnosedColumnCount=26, batchCount=30), failures=FailureRecord(totalFailuresCount=0, maxFailuresCount=0, meanFailuresCount=0, byColumnCount=[], byTypeCount=[]), anomalies=AnomalyRecord(totalAnomalyCount=34, maxAnomalyCount=30, meanAnomalyCount=11, batchCount=30, byColumnCount=[NamedCount(name='issue_d', count=30), NamedCount(name='url', count=3), NamedCount(name='desc', count=1)], byColumnBatchCount=[NamedCount(name='addr_state', count=30), NamedCount(name='application_type', count=30), NamedCount(name='debt_settlement_flag', count=30), NamedCount(name='desc', count=2), NamedCount(name='disbursement_method', count=30), NamedCount(name='earliest_cr_line', count=30), NamedCount(name='emp_length', count=30), NamedCount(name='emp_title', count=30), NamedCount(name='grade', count=30), NamedCount(name='hardship_flag', count=30), NamedCount(name='home_ownership', count=30), NamedCount(name='initial_list_status', count=30), NamedCount(name='issue_d', count=30), NamedCount(name='last_credit_pull_d', count=30), NamedCount(name='last_pymnt_d', count=30), NamedCount(name='loan_status', count=30), NamedCount(name='next_pymnt_d', count=30), NamedCount(name='purpose', count=30), NamedCount(name='pymnt_plan', count=30), NamedCount(name='sub_grade', count=30), NamedCount(name='term', count=30), NamedCount(name='title', count=30), NamedCount(name='url', count=30), NamedCount(name='verification_status', count=30), NamedCount(name='verification_status_joint', count=30), NamedCount(name='zip_code', count=30)])), targetedColumnCount=27), qualityIssues=[QualityIssueRecord(name='analyzer_changed', description='Analyzer changed within the diagnostic interval', detectors=['stale_analysis', 'changing_discrete', 'low_drift_threshold', 'missing_baseline_batches', 'small_nonnull_batches'])], conditions=[ConditionRecord(columns=['desc', 'issue_d', 'url'], info=None, summary='many values are unique across batches', name='changing_discrete'), ConditionRecord(columns=['desc'], info=None, summary='less than 500 non-null records in 50% or more of the batches', name='small_nonnull_batches')], monitor=Monitor(metadata=Metadata(version=1, schemaVersion=1, updatedTimestamp=1705536890090, author='user_809f777d_3741_4991_8ced_42f09b883ac7', description=None), id='frequent-items-drift-monitor-x2hr9z', displayName=None, tags=None, analyzerIds=['frequent-items-drift-analyzer-x2hr9z'], schedule=ImmediateSchedule(type='immediate'), disabled=False, severity=3, mode=DigestMode(type='DIGEST', filter=None, creationTimeOffset=None, datasetTimestampOffset=None, groupBy=None), actions=[GlobalAction(type='global', target='email')]), analyzer=Analyzer(metadata=Metadata(version=5, schemaVersion=1, updatedTimestamp=1714764383143, author='user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98', description=None), id='frequent-items-drift-analyzer-x2hr9z', displayName=None, tags=None, schedule=FixedCadenceSchedule(type='fixed', cadence=, exclusionRanges=None), disabled=None, disableTargetRollup=None, targetMatrix=ColumnMatrix(segments=[Segment(tags=[])], type=, include=[], exclude=['url', 'desc', , 'issue_d'], profileId=None), dataReadinessDuration=None, batchCoolDownPeriod=None, backfillGracePeriodDuration=None, config=DriftConfig(schemaVersion=None, params=None, metric=, type=, algorithm='hellinger', threshold=0.7, minBatchSize=1, baseline=TrailingWindowBaseline(datasetId=None, inheritSegment=None, type=, size=7, offset=None, exclusionRanges=None))), analyzedColumnCount=26)" }, - "execution_count": 8, + "execution_count": 5, "metadata": {}, "output_type": "execute_result" } @@ -130,11 +130,11 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 6, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.723005Z", - "start_time": "2024-05-03T19:26:22.709411Z" + "end_time": "2024-05-03T20:44:38.811834Z", + "start_time": "2024-05-03T20:44:38.798918Z" } }, "outputs": [ @@ -174,10 +174,12 @@ "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:-----|----:|\n", - "| desc | 1 |\n", - "Accounting for 1 anomalies out of 34\n" + "| | 0 |\n", + "|:--------|----:|\n", + "| issue_d | 30 |\n", + "| url | 3 |\n", + "| desc | 1 |\n", + "Accounting for 34 anomalies out of 34\n" ] } ], @@ -199,11 +201,11 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 7, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.728615Z", - "start_time": "2024-05-03T19:26:22.724414Z" + "end_time": "2024-05-03T20:44:38.816760Z", + "start_time": "2024-05-03T20:44:38.813018Z" } }, "outputs": [], @@ -214,11 +216,11 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 8, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.734842Z", - "start_time": "2024-05-03T19:26:22.729870Z" + "end_time": "2024-05-03T20:44:38.822615Z", + "start_time": "2024-05-03T20:44:38.817801Z" } }, "outputs": [ @@ -457,9 +459,9 @@ " },\n", " \"analyzer\": {\n", " \"metadata\": {\n", - " \"version\": 4,\n", + " \"version\": 5,\n", " \"schemaVersion\": 1,\n", - " \"updatedTimestamp\": 1714763295788,\n", + " \"updatedTimestamp\": 1714764383143,\n", " \"author\": \"user_c9292ec40407f7b580f0a2c90745ebfba2b9e6ea81c848ef944d31e48a45f98\",\n", " \"description\": null\n", " },\n", @@ -484,10 +486,10 @@ " \"group:discrete\"\n", " ],\n", " \"exclude\": [\n", + " \"url\",\n", " \"desc\",\n", " \"group:output\",\n", - " \"issue_d\",\n", - " \"url\"\n", + " \"issue_d\"\n", " ],\n", " \"profileId\": null\n", " },\n", @@ -536,11 +538,11 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 9, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.742280Z", - "start_time": "2024-05-03T19:26:22.736043Z" + "end_time": "2024-05-03T20:44:38.832653Z", + "start_time": "2024-05-03T20:44:38.823912Z" } }, "outputs": [ @@ -578,11 +580,11 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 10, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:22.745719Z", - "start_time": "2024-05-03T19:26:22.743195Z" + "end_time": "2024-05-03T20:44:38.835957Z", + "start_time": "2024-05-03T20:44:38.833756Z" } }, "outputs": [ @@ -601,11 +603,11 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 11, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:23.502634Z", - "start_time": "2024-05-03T19:26:22.746622Z" + "end_time": "2024-05-03T20:44:39.994817Z", + "start_time": "2024-05-03T20:44:38.836984Z" } }, "outputs": [ @@ -640,11 +642,11 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 12, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:23.525937Z", - "start_time": "2024-05-03T19:26:23.508893Z" + "end_time": "2024-05-03T20:44:40.011819Z", + "start_time": "2024-05-03T20:44:39.998108Z" } }, "outputs": [ @@ -653,7 +655,7 @@ "text/plain": " monitor_id \\\n0 frequent-items-drift-monitor-x2hr9z \n1 discrete-distribution-22ef37c9-monitor \n2 smoggy-chartreuse-owl-3387 \n3 frequent-items-drift-monitor-bx6m80 \n4 frequent-items-drift-monitor-mat0jo \n5 frequent-items-drift-monitor-01rbfl \n6 frequent-items-drift-monitor-0foigt \n7 frequent-items-drift-monitor-3c0hc2 \n8 frequent-items-drift-monitor-9gmtix \n9 elated-palegreen-jaguar-6432 \n10 inferred-data-type-fec5a735-monitor \n11 unique-ratio-b7b84aee-monitor \n12 missing-values-ratio-35881327-monitor \n13 numerical-drift-monitor-6oxi83 \n14 numerical-drift-monitor-8yugth \n15 continuous-distribution-956a280c-monitor \n16 dull-floralwhite-raven-5521 \n\n analyzer_id metric column_count \\\n0 frequent-items-drift-analyzer-x2hr9z frequent_items 3 \n1 discrete-distribution-22ef37c9 frequent_items 3 \n2 smoggy-chartreuse-owl-3387-analyzer frequent_items 3 \n3 frequent-items-drift-analyzer-bx6m80 frequent_items 3 \n4 frequent-items-drift-analyzer-mat0jo frequent_items 3 \n5 frequent-items-drift-analyzer-01rbfl frequent_items 3 \n6 frequent-items-drift-analyzer-0foigt frequent_items 3 \n7 frequent-items-drift-analyzer-3c0hc2 frequent_items 3 \n8 frequent-items-drift-analyzer-9gmtix frequent_items 3 \n9 elated-palegreen-jaguar-6432-analyzer histogram 9 \n10 inferred-data-type-fec5a735 inferred_data_type 1 \n11 unique-ratio-b7b84aee unique_est_ratio 69 \n12 missing-values-ratio-35881327 count_null_ratio 21 \n13 numerical-drift-analyzer-6oxi83 histogram 1 \n14 numerical-drift-analyzer-8yugth histogram 1 \n15 continuous-distribution-956a280c histogram 1 \n16 dull-floralwhite-raven-5521-analyzer count 2 \n\n segment_count anomaly_count max_anomaly_per_column \\\n0 1 34 30 \n1 1 34 30 \n2 1 34 30 \n3 1 34 30 \n4 1 34 30 \n5 1 34 30 \n6 1 34 30 \n7 1 34 30 \n8 1 34 30 \n9 1 75 19 \n10 1 14 14 \n11 1 104 4 \n12 1 27 3 \n13 1 2 2 \n14 1 2 2 \n15 1 2 2 \n16 1 3 2 \n\n min_anomaly_per_column avg_anomaly_per_column action_count \\\n0 1 11 1 \n1 1 11 0 \n2 1 11 0 \n3 1 11 0 \n4 1 11 2 \n5 1 11 1 \n6 1 11 0 \n7 1 11 1 \n8 1 11 1 \n9 2 8 0 \n10 14 14 2 \n11 1 1 0 \n12 1 1 0 \n13 2 2 0 \n14 2 2 0 \n15 2 2 0 \n16 1 1 0 \n\n action_targets \n0 [email] \n1 [] \n2 [] \n3 [] \n4 [email, slack] \n5 [email] \n6 [] \n7 [email] \n8 [email] \n9 [] \n10 [email, slack] \n11 [] \n12 [] \n13 [] \n14 [] \n15 [] \n16 [] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1discrete-distribution-22ef37c9-monitordiscrete-distribution-22ef37c9frequent_items3134301110[]
2smoggy-chartreuse-owl-3387smoggy-chartreuse-owl-3387-analyzerfrequent_items3134301110[]
3frequent-items-drift-monitor-bx6m80frequent-items-drift-analyzer-bx6m80frequent_items3134301110[]
4frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
5frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
6frequent-items-drift-monitor-0foigtfrequent-items-drift-analyzer-0foigtfrequent_items3134301110[]
7frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
8frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
9elated-palegreen-jaguar-6432elated-palegreen-jaguar-6432-analyzerhistogram917519280[]
10inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
11unique-ratio-b7b84aee-monitorunique-ratio-b7b84aeeunique_est_ratio6911044110[]
12missing-values-ratio-35881327-monitormissing-values-ratio-35881327count_null_ratio211273110[]
13numerical-drift-monitor-6oxi83numerical-drift-analyzer-6oxi83histogram1122220[]
14numerical-drift-monitor-8yugthnumerical-drift-analyzer-8yugthhistogram1122220[]
15continuous-distribution-956a280c-monitorcontinuous-distribution-956a280chistogram1122220[]
16dull-floralwhite-raven-5521dull-floralwhite-raven-5521-analyzercount2132110[]
\n
" }, - "execution_count": 15, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -676,11 +678,11 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 13, "metadata": { "ExecuteTime": { - "end_time": "2024-05-03T19:26:28.850168Z", - "start_time": "2024-05-03T19:26:23.527095Z" + "end_time": "2024-05-03T20:44:45.451258Z", + "start_time": "2024-05-03T20:44:40.012742Z" } }, "outputs": [ @@ -720,10 +722,12 @@ "\t* Condition small_nonnull_batches (less than 500 non-null records in 50% or more of the batches) for 1 columns: ['desc']\n", "\n", "Anomalies for columns with these conditions:\n", - "| | 0 |\n", - "|:-----|----:|\n", - "| desc | 1 |\n", - "Accounting for 1 anomalies out of 34\n" + "| | 0 |\n", + "|:--------|----:|\n", + "| issue_d | 30 |\n", + "| url | 3 |\n", + "| desc | 1 |\n", + "Accounting for 34 anomalies out of 34\n" ] } ], @@ -745,12 +749,12 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "metadata": { "collapsed": false, "ExecuteTime": { - "end_time": "2024-05-03T19:26:28.860332Z", - "start_time": "2024-05-03T19:26:28.851199Z" + "end_time": "2024-05-03T20:44:45.461470Z", + "start_time": "2024-05-03T20:44:45.452512Z" } }, "outputs": [ @@ -759,7 +763,7 @@ "text/plain": " monitor_id analyzer_id \\\n0 frequent-items-drift-monitor-x2hr9z frequent-items-drift-analyzer-x2hr9z \n1 frequent-items-drift-monitor-mat0jo frequent-items-drift-analyzer-mat0jo \n2 frequent-items-drift-monitor-01rbfl frequent-items-drift-analyzer-01rbfl \n3 frequent-items-drift-monitor-3c0hc2 frequent-items-drift-analyzer-3c0hc2 \n4 frequent-items-drift-monitor-9gmtix frequent-items-drift-analyzer-9gmtix \n5 inferred-data-type-fec5a735-monitor inferred-data-type-fec5a735 \n\n metric column_count segment_count anomaly_count \\\n0 frequent_items 3 1 34 \n1 frequent_items 3 1 34 \n2 frequent_items 3 1 34 \n3 frequent_items 3 1 34 \n4 frequent_items 3 1 34 \n5 inferred_data_type 1 1 14 \n\n max_anomaly_per_column min_anomaly_per_column avg_anomaly_per_column \\\n0 30 1 11 \n1 30 1 11 \n2 30 1 11 \n3 30 1 11 \n4 30 1 11 \n5 14 14 14 \n\n action_count action_targets \n0 1 [email] \n1 2 [email, slack] \n2 1 [email] \n3 1 [email] \n4 1 [email] \n5 2 [email, slack] ", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
monitor_idanalyzer_idmetriccolumn_countsegment_countanomaly_countmax_anomaly_per_columnmin_anomaly_per_columnavg_anomaly_per_columnaction_countaction_targets
0frequent-items-drift-monitor-x2hr9zfrequent-items-drift-analyzer-x2hr9zfrequent_items3134301111[email]
1frequent-items-drift-monitor-mat0jofrequent-items-drift-analyzer-mat0jofrequent_items3134301112[email, slack]
2frequent-items-drift-monitor-01rbflfrequent-items-drift-analyzer-01rbflfrequent_items3134301111[email]
3frequent-items-drift-monitor-3c0hc2frequent-items-drift-analyzer-3c0hc2frequent_items3134301111[email]
4frequent-items-drift-monitor-9gmtixfrequent-items-drift-analyzer-9gmtixfrequent_items3134301111[email]
5inferred-data-type-fec5a735-monitorinferred-data-type-fec5a735inferred_data_type11141414142[email, slack]
\n
" }, - "execution_count": 17, + "execution_count": 14, "metadata": {}, "output_type": "execute_result" } diff --git a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py index da90c02..20a6d24 100644 --- a/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py +++ b/whylabs_toolkit/monitor/diagnoser/models/diagnosis_report.py @@ -186,14 +186,14 @@ def describe_quality_issues(self) -> str: def describe_conditions(self) -> str: if len(self.conditions) == 0: return "No conditions related to noise were detected." + condition_cols: List[str] = [] text = "Conditions that may contribute to noise include:\n" for condition in self.conditions: - condition_cols: List[str] = [] text += f"\t* Condition {condition.name} ({condition.summary})" if condition.columns is not None: condition_cols += condition.columns - col_text = describe_truncated_list(condition_cols, 10) - text += f" for {len(condition_cols)} columns: {col_text}" + col_text = describe_truncated_list(condition.columns, 10) + text += f" for {len(condition.columns)} columns: {col_text}" text += "\n" cols = pd.Series(condition_cols).unique()