Skip to content

Commit

Permalink
limit number of points in decision tree regressor visualization (#462)
Browse files Browse the repository at this point in the history
  • Loading branch information
pplonski committed Sep 2, 2021
1 parent ea24a47 commit 839367f
Showing 1 changed file with 20 additions and 8 deletions.
28 changes: 20 additions & 8 deletions supervised/algorithms/decision_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from sklearn.tree import _tree
from dtreeviz.trees import dtreeviz
from supervised.utils.subsample import subsample


def get_rules(tree, feature_names, class_names):
Expand Down Expand Up @@ -204,14 +205,25 @@ def interpret(
if explain_level == 0:
return
try:

viz = dtreeviz(
self.model,
X_train,
y_train,
target_name="target",
feature_names=X_train.columns,
)
# 250 is hard limit for number of points used in visualization
# if too many points are used then final SVG plot is very large (can be > 100MB)
if X_train.shape[0] > 250:
x, _, y, _ = subsample(X_train, y_train, REGRESSION, 250)
viz = dtreeviz(
self.model,
x,
y,
target_name="target",
feature_names=x.columns,
)
else:
viz = dtreeviz(
self.model,
X_train,
y_train,
target_name="target",
feature_names=X_train.columns,
)
tree_file_plot = os.path.join(model_file_path, learner_name + "_tree.svg")
viz.save(tree_file_plot)
except Exception as e:
Expand Down

0 comments on commit 839367f

Please sign in to comment.