From c3075c8a4be55df5468d8f47a8fc424dd318346d Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Wed, 27 Dec 2023 21:08:56 +0530 Subject: [PATCH 01/10] pydocs-update1 --- h2o-bindings/bin/custom/python/gen_dt.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/h2o-bindings/bin/custom/python/gen_dt.py b/h2o-bindings/bin/custom/python/gen_dt.py index 1b781ad19880..98d867e42d64 100644 --- a/h2o-bindings/bin/custom/python/gen_dt.py +++ b/h2o-bindings/bin/custom/python/gen_dt.py @@ -6,3 +6,5 @@ Builds a Decision Tree (DT) on a preprocessed dataset. """ ) +examples = dict( + \ No newline at end of file From a484197982169cb1250a4656106606ceff7e6264 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Thu, 28 Dec 2023 11:13:00 +0530 Subject: [PATCH 02/10] pydocs-update2 --- h2o-bindings/bin/custom/python/gen_dt.py | 49 +++++++++++++++++++++++- 1 file changed, 48 insertions(+), 1 deletion(-) diff --git a/h2o-bindings/bin/custom/python/gen_dt.py b/h2o-bindings/bin/custom/python/gen_dt.py index 98d867e42d64..e0e4574286f1 100644 --- a/h2o-bindings/bin/custom/python/gen_dt.py +++ b/h2o-bindings/bin/custom/python/gen_dt.py @@ -7,4 +7,51 @@ """ ) examples = dict( - \ No newline at end of file + algorithm_params=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""" + data_fraction=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, data_fraction=0.7) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""" + net_information_thresholdn=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() + From 70eb94de700633a171f0ff6cd641bc14ee39bbb5 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Thu, 28 Dec 2023 11:36:40 +0530 Subject: [PATCH 03/10] pydocs-update3 --- h2o-bindings/bin/custom/python/gen_dt.py | 88 +++++++++++++++++++++++- 1 file changed, 85 insertions(+), 3 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_dt.py b/h2o-bindings/bin/custom/python/gen_dt.py index e0e4574286f1..dbf75bf48f05 100644 --- a/h2o-bindings/bin/custom/python/gen_dt.py +++ b/h2o-bindings/bin/custom/python/gen_dt.py @@ -22,7 +22,7 @@ >>> ig = H2OInfogram(protected_columns=pcols) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() -""" +""", data_fraction=""" >>> import h2o >>> from h2o.estimators.infogram import H2OInfogram @@ -38,8 +38,8 @@ >>> ig = H2OInfogram(protected_columns=pcols, data_fraction=0.7) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() -""" - net_information_thresholdn=""" +""", + net_information_threshold=""" >>> import h2o >>> from h2o.estimators.infogram import H2OInfogram >>> h2o.init() @@ -54,4 +54,86 @@ >>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() +""", + relevance_index_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + safety_index_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + safety_index_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + top_n_features=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, top_n_features=30) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + total_information_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", +) From 8ffbafb4e204dc4617300cce0b8858f9495e1539 Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Wed, 3 Jan 2024 11:37:31 -0600 Subject: [PATCH 04/10] ht/switched file over; minor fixes; built gradle --- h2o-bindings/bin/custom/python/gen_dt.py | 131 ------------------ .../bin/custom/python/gen_infogram.py | 116 ++++++++++++++++ h2o-py/h2o/estimators/infogram.py | 119 ++++++++++++++++ 3 files changed, 235 insertions(+), 131 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_dt.py b/h2o-bindings/bin/custom/python/gen_dt.py index dbf75bf48f05..1b781ad19880 100644 --- a/h2o-bindings/bin/custom/python/gen_dt.py +++ b/h2o-bindings/bin/custom/python/gen_dt.py @@ -6,134 +6,3 @@ Builds a Decision Tree (DT) on a preprocessed dataset. """ ) -examples = dict( - algorithm_params=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - data_fraction=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, data_fraction=0.7) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - net_information_threshold=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - relevance_index_threshold=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - safety_index_threshold=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - safety_index_threshold=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - top_n_features=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, top_n_features=30) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", - total_information_threshold=""" ->>> import h2o ->>> from h2o.estimators.infogram import H2OInfogram ->>> h2o.init() ->>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" ->>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} ->>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) ->>> y = "default_payment_next_month" ->>> x = train.columns ->>> x.remove(y) ->>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) ->>> ig.train(y=y, x=x, training_frame=train) ->>> ig.plot() -""", -) - diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 70fa33c9d27d..8e155956ccfb 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -435,3 +435,119 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec feature set. Admissible models are also less susceptible to overfitting and train faster, while providing similar accuracy as models built using all available features. """ ) +examples = dict( + algorithm_params=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + data_fraction=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, data_fraction=0.7) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + net_information_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + relevance_index_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + safety_index_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + top_n_features=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, top_n_features=30) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""", + total_information_threshold=""" +>>> import h2o +>>> from h2o.estimators.infogram import H2OInfogram +>>> h2o.init() +>>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" +>>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} +>>> df = h2o.import_file(path=f, col_types=col_types) +>>> train, test = df.split_frame(seed=1) +>>> y = "default_payment_next_month" +>>> x = train.columns +>>> x.remove(y) +>>> pcols = ["SEX", "MARRIAGE", "AGE"] +>>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) +>>> ig.train(y=y, x=x, training_frame=train) +>>> ig.plot() +""" +) + + diff --git a/h2o-py/h2o/estimators/infogram.py b/h2o-py/h2o/estimators/infogram.py index 6fe54b08c1a7..e3465e065444 100644 --- a/h2o-py/h2o/estimators/infogram.py +++ b/h2o-py/h2o/estimators/infogram.py @@ -700,6 +700,23 @@ def algorithm_params(self): Customized parameters for the machine learning algorithm specified in the algorithm parameter. Type: ``dict``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ if self._parms.get("algorithm_params") != None: algorithm_params_dict = ast.literal_eval(self._parms.get("algorithm_params")) @@ -745,6 +762,23 @@ def total_information_threshold(self): information is the x-axis of the Core Infogram. Default is -1 which gets set to 0.1. Type: ``float``, defaults to ``-1.0``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("total_information_threshold") @@ -768,6 +802,23 @@ def net_information_threshold(self): the y-axis of the Core Infogram. Default is -1 which gets set to 0.1. Type: ``float``, defaults to ``-1.0``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("net_information_threshold") @@ -792,6 +843,23 @@ def relevance_index_threshold(self): which gets set to 0.1. Type: ``float``, defaults to ``-1.0``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("relevance_index_threshold") @@ -816,6 +884,23 @@ def safety_index_threshold(self): gets set to 0.1. Type: ``float``, defaults to ``-1.0``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("safety_index_threshold") @@ -837,6 +922,23 @@ def data_fraction(self): and less than or equal to 1.0 is acceptable. Type: ``float``, defaults to ``1.0``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, data_fraction=0.7) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("data_fraction") @@ -852,6 +954,23 @@ def top_n_features(self): importance, and the top N are evaluated. Defaults to 50. Type: ``int``, defaults to ``50``. + + :examples: + + >>> import h2o + >>> from h2o.estimators.infogram import H2OInfogram + >>> h2o.init() + >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" + >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} + >>> df = h2o.import_file(path=f, col_types=col_types) + >>> train, test = df.split_frame(seed=1) + >>> y = "default_payment_next_month" + >>> x = train.columns + >>> x.remove(y) + >>> pcols = ["SEX", "MARRIAGE", "AGE"] + >>> ig = H2OInfogram(protected_columns=pcols, top_n_features=30) + >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.plot() """ return self._parms.get("top_n_features") From c7ca607326c25e875844276dc2a6c09ca64f412a Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Fri, 5 Jan 2024 07:40:52 -0600 Subject: [PATCH 05/10] ht/rerun checks --- h2o-bindings/bin/custom/python/gen_infogram.py | 2 +- h2o-py/h2o/estimators/infogram.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 8e155956ccfb..4273a0fd3c0a 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -21,7 +21,7 @@ def _extract_x_from_model(self): def plot(self, train=True, valid=False, xval=False, figsize=(10, 10), title="Infogram", legend_on=False, server=False): """ - Plot the infogram. By default, it will plot the infogram calculated from training dataset. + Plot the infogram. By default, it will plot the infogram calculated from training dataset. Note that the frame rel_cmi_frame contains the following columns: - 0: predictor names - 1: admissible diff --git a/h2o-py/h2o/estimators/infogram.py b/h2o-py/h2o/estimators/infogram.py index e3465e065444..47d69ee630a5 100644 --- a/h2o-py/h2o/estimators/infogram.py +++ b/h2o-py/h2o/estimators/infogram.py @@ -993,7 +993,7 @@ def _extract_x_from_model(self): def plot(self, train=True, valid=False, xval=False, figsize=(10, 10), title="Infogram", legend_on=False, server=False): """ - Plot the infogram. By default, it will plot the infogram calculated from training dataset. + Plot the infogram. By default, it will plot the infogram calculated from training dataset. Note that the frame rel_cmi_frame contains the following columns: - 0: predictor names - 1: admissible From f410ca3b25592c3a81e64163995ee194aedcde45 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Wed, 20 Mar 2024 11:20:18 +0530 Subject: [PATCH 06/10] review-comments-suggestions --- .../bin/custom/python/gen_infogram.py | 23 ++++++++++--------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 4273a0fd3c0a..585df2afdf33 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -443,7 +443,8 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> algorithm_params = ig.algorithm_params +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) @@ -459,7 +460,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) @@ -475,12 +476,12 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) +>>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-2.0) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -491,12 +492,12 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) +>>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-3.0) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -507,12 +508,12 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-2.0) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -523,7 +524,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) @@ -539,12 +540,12 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> train, test = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) +>>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-2.0) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ From 06387e30feda615bbffd464ac3f2f91ed90a3430 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Mon, 1 Apr 2024 12:32:29 +0530 Subject: [PATCH 07/10] Include-algorithm_params --- h2o-bindings/bin/custom/python/gen_infogram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 585df2afdf33..48ea8e4e1a02 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -443,14 +443,14 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) ->>> algorithm_params = ig.algorithm_params ->>> train = df.split_frame(seed=1) +>>> train = df.split_frame(seed=1)[0] >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) +>>> gbm_params = {'ntrees':3} >>> pcols = ["SEX", "MARRIAGE", "AGE"] >>> ig = H2OInfogram(protected_columns=pcols) ->>> ig.train(y=y, x=x, training_frame=train) +>>> ig.train(y=y, x=x, training_frame=train, algorithm_params=gbm_params) >>> ig.plot() """, data_fraction=""" From db1ba1efce4f4bc05ce6f58be87c6f576e10626c Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Mon, 10 Jun 2024 18:48:32 +0530 Subject: [PATCH 08/10] Update gen_infogram.py --- h2o-bindings/bin/custom/python/gen_infogram.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 48ea8e4e1a02..553624eb2dd9 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -481,7 +481,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-2.0) +>>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=0.2) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -497,7 +497,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-3.0) +>>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=0.2) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -513,7 +513,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-2.0) +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=0.2) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -545,7 +545,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-2.0) +>>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ From a64e21559105b6fee0f9a24dffbaa5b2ef5ee522 Mon Sep 17 00:00:00 2001 From: Shaun <124687868+shaunyogeshwaran@users.noreply.github.com> Date: Fri, 2 Aug 2024 14:03:48 +0530 Subject: [PATCH 09/10] Update gen_infogram.py --- h2o-bindings/bin/custom/python/gen_infogram.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/h2o-bindings/bin/custom/python/gen_infogram.py b/h2o-bindings/bin/custom/python/gen_infogram.py index 553624eb2dd9..626f7907e3b9 100644 --- a/h2o-bindings/bin/custom/python/gen_infogram.py +++ b/h2o-bindings/bin/custom/python/gen_infogram.py @@ -481,7 +481,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=0.2) +>>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -497,7 +497,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=0.2) +>>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, @@ -513,7 +513,7 @@ def train_subset_models(self, model_class, y, training_frame, test_frame, protec >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] ->>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=0.2) +>>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """, From b8c9fd49fa2d4f7455c4b6d778e12c23a486aca8 Mon Sep 17 00:00:00 2001 From: Hannah Tillman Date: Thu, 15 Aug 2024 11:26:57 -0500 Subject: [PATCH 10/10] ht/gradle built --- h2o-py/h2o/estimators/infogram.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/h2o-py/h2o/estimators/infogram.py b/h2o-py/h2o/estimators/infogram.py index 47d69ee630a5..25bae354983d 100644 --- a/h2o-py/h2o/estimators/infogram.py +++ b/h2o-py/h2o/estimators/infogram.py @@ -709,13 +709,14 @@ def algorithm_params(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1)[0] >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) + >>> gbm_params = {'ntrees':3} >>> pcols = ["SEX", "MARRIAGE", "AGE"] >>> ig = H2OInfogram(protected_columns=pcols) - >>> ig.train(y=y, x=x, training_frame=train) + >>> ig.train(y=y, x=x, training_frame=train, algorithm_params=gbm_params) >>> ig.plot() """ if self._parms.get("algorithm_params") != None: @@ -771,12 +772,12 @@ def total_information_threshold(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] - >>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=-1.0) + >>> ig = H2OInfogram(protected_columns=pcols, total_information_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ @@ -811,12 +812,12 @@ def net_information_threshold(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] - >>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=-1.0) + >>> ig = H2OInfogram(protected_columns=pcols, net_information_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ @@ -852,12 +853,12 @@ def relevance_index_threshold(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] - >>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=-1.0) + >>> ig = H2OInfogram(protected_columns=pcols, relevance_index_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ @@ -893,12 +894,12 @@ def safety_index_threshold(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) >>> pcols = ["SEX", "MARRIAGE", "AGE"] - >>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=-1.0) + >>> ig = H2OInfogram(protected_columns=pcols, safety_index_threshold=0.5) >>> ig.train(y=y, x=x, training_frame=train) >>> ig.plot() """ @@ -931,7 +932,7 @@ def data_fraction(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y) @@ -963,7 +964,7 @@ def top_n_features(self): >>> f = "https://erin-data.s3.amazonaws.com/admissible/data/taiwan_credit_card_uci.csv" >>> col_types = {'SEX': "enum", 'MARRIAGE': "enum", 'default_payment_next_month': "enum"} >>> df = h2o.import_file(path=f, col_types=col_types) - >>> train, test = df.split_frame(seed=1) + >>> train = df.split_frame(seed=1) >>> y = "default_payment_next_month" >>> x = train.columns >>> x.remove(y)