Convert scikit-learn decision trees to JSON

SKLearn has a function to convert decision trees to “graphviz” (for rendering) but I find JSON more helpful, as you can read it more easily, as well as use it in web apps. The function below will give you JSON.

The reason this is necessary (vs the JSON.dumps) library is that the Decision Tree interfaces don’t support the interfaces the JSON library needs to run. Additionally, even if it did, the JSON library in python dies on very small floating point numbers, which is why it’s not used at all in my version.


def treeToJson(decision_tree, feature_names=None):
  from warnings import warn

  js = ""

  def node_to_str(tree, node_id, criterion):
    if not isinstance(criterion, sklearn.tree.tree.six.string_types):
      criterion = "impurity"

    value = tree.value[node_id]
    if tree.n_outputs == 1:
      value = value[0, :]

    jsonValue = ', '.join([str(x) for x in value])

    if tree.children_left[node_id] == sklearn.tree._tree.TREE_LEAF:
      return '"id": "%s", "criterion": "%s", "impurity": "%s", "samples": "%s", "value": [%s]' \
             % (node_id, 
                criterion,
                tree.impurity[node_id],
                tree.n_node_samples[node_id],
                jsonValue)
    else:
      if feature_names is not None:
        feature = feature_names[tree.feature[node_id]]
      else:
        feature = tree.feature[node_id]

      if "=" in feature:
        ruleType = "="
        ruleValue = "false"
      else:
        ruleType = "<="
        ruleValue = "%.4f" % tree.threshold[node_id]

      return '"id": "%s", "rule": "%s %s %s", "%s": "%s", "samples": "%s"' \
             % (node_id, 
                feature,
                ruleType,
                ruleValue,
                criterion,
                tree.impurity[node_id],
                tree.n_node_samples[node_id])

  def recurse(tree, node_id, criterion, parent=None, depth=0):
    tabs = "  " * depth
    js = ""

    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    js = js + "\n" + \
         tabs + "{\n" + \
         tabs + "  " + node_to_str(tree, node_id, criterion)

    if left_child != sklearn.tree._tree.TREE_LEAF:
      js = js + ",\n" + \
           tabs + '  "left": ' + \
           recurse(tree, \
                   left_child, \
                   criterion=criterion, \
                   parent=node_id, \
                   depth=depth + 1) + ",\n" + \
           tabs + '  "right": ' + \
           recurse(tree, \
                   right_child, \
                   criterion=criterion, \
                   parent=node_id,
                   depth=depth + 1)

    js = js + tabs + "\n" + \
         tabs + "}"

    return js

  if isinstance(decision_tree, sklearn.tree.tree.Tree):
    js = js + recurse(decision_tree, 0, criterion="impurity")
  else:
    js = js + recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

  return js

3 Replies to “Convert scikit-learn decision trees to JSON”

  1. here’s my code with your funcion,I can NOT run through it,please help,thanks
    ——————————————————
    # #-*- coding:utf-8 -*-
    # import sys
    # reload(sys)
    # sys.setdefaultencoding(‘utf-8’)
    import numpy as np
    import pandas as pd
    from sklearn.datasets import make_classification
    from sklearn.ensemble import RandomForestClassifier
    from sklearn.tree import DecisionTreeClassifier
    import sklearn
    import numpy
    from IPython.display import display, Image
    import pydotplus
    from sklearn import tree
    from sklearn.tree import _tree
    from sklearn import tree
    import collections
    import drawtree
    import os
    from sklearn.tree._tree import TREE_LEAF

    def treeToJson(decision_tree, feature_names=None):
    from warnings import warn

    js = “”

    def node_to_str(tree, node_id, criterion):
    if not isinstance(criterion, sklearn.tree.tree.six.string_types):
    criterion = “impurity”

    value = tree.value[node_id]
    if tree.n_outputs == 1:
    value = value[0, :]

    jsonValue = ‘, ‘.join([str(x) for x in value])

    if tree.children_left[node_id] == sklearn.tree._tree.TREE_LEAF:
    return ‘”id”: “%s”, “criterion”: “%s”, “impurity”: “%s”, “samples”: “%s”, “value”: [%s]’ \
    % (node_id,
    criterion,
    tree.impurity[node_id],
    tree.n_node_samples[node_id],
    jsonValue)
    else:
    if feature_names is not None:
    feature = feature_names[tree.feature[node_id]]
    else:
    feature = tree.feature[node_id]
    print(“feature=”)
    if “=” in feature:
    ruleType = “=”
    ruleValue = “false”
    else:
    ruleType = “<="
    ruleValue = "%.4f" % tree.threshold[node_id]

    return '"id": "%s", "rule": "%s %s %s", "%s": "%s", "samples": "%s"' \
    % (node_id,
    feature,
    ruleType,
    ruleValue,
    criterion,
    tree.impurity[node_id],
    tree.n_node_samples[node_id])

    def recurse(tree, node_id, criterion, parent=None, depth=0):
    tabs = " " * depth
    js = ""

    left_child = tree.children_left[node_id]
    right_child = tree.children_right[node_id]

    js = js + "\n" + \
    tabs + "{\n" + \
    tabs + " " + node_to_str(tree, node_id, criterion)

    if left_child != sklearn.tree._tree.TREE_LEAF:
    js = js + ",\n" + \
    tabs + ' "left": ' + \
    recurse(tree, \
    left_child, \
    criterion=criterion, \
    parent=node_id, \
    depth=depth + 1) + ",\n" + \
    tabs + ' "right": ' + \
    recurse(tree, \
    right_child, \
    criterion=criterion, \
    parent=node_id,
    depth=depth + 1)

    js = js + tabs + "\n" + \
    tabs + "}"

    return js

    if isinstance(decision_tree, sklearn.tree.tree.Tree):
    js = js + recurse(decision_tree, 0, criterion="impurity")
    else:
    js = js + recurse(decision_tree.tree_, 0, criterion=decision_tree.criterion)

    return j

    def train():
    X, y = make_classification(n_samples=1000,
    n_features=6,
    n_informative=3,
    n_classes=2,
    random_state=0,
    shuffle=False)
    # print"y=",y
    # Creating a dataFrame
    df = pd.DataFrame({'Feature 1':X[:,0],
    'Feature 2':X[:,1],
    'Feature 3':X[:,2],
    'Feature 4':X[:,3],
    'Feature 5':X[:,4],
    'Feature 6':X[:,5],
    'Class':y})
    y_train = df['Class']
    X_train = df.drop('Class',axis = 1)

    dt = DecisionTreeClassifier( random_state=42)
    dt.fit(X_train, y_train)
    return dt,X_train
    #——————上面是生成决策树模型———————————–
    # os.environ["PATH"] += os.pathsep + 'C:\\Anaconda3\\Library\\bin\\graphviz'
    def draw_file(model,dot_file,png_file,X_train):
    dot_data = tree.export_graphviz(model, out_file =dot_file ,
    feature_names=X_train.columns, filled = True
    , rounded = True
    , special_characters = True)

    graph = pydotplus.graph_from_dot_file(dot_file)

    thisIsTheImage = Image(graph.create_png())
    display(thisIsTheImage)
    #print(dt.tree_.feature)

    from subprocess import check_call
    check_call(['dot','-Tpng',dot_file,'-o',png_file])

    # 剪枝函数(这里使用的不是著名的CCP剪枝,而是根据的当前的子树剩余的样本数是否超过阈值,如果小于阈值,就进行剪枝)
    def prune_index(inner_tree, index, threshold):
    if inner_tree.value[index].min() < threshold:
    # turn node into a leaf by "unlinking" its children
    inner_tree.children_left[index] = TREE_LEAF#对左子树进行剪枝操作
    inner_tree.children_right[index] = TREE_LEAF#对右子树进行剪枝操作
    # if there are shildren, visit them as well
    if inner_tree.children_left[index] != TREE_LEAF:
    prune_index(inner_tree, inner_tree.children_left[index], threshold)#对左子树进行递归
    prune_index(inner_tree, inner_tree.children_right[index], threshold)#对右子树进行递归

    #***************************************************************

    if __name__ == '__main__':
    model,X_train=train()
    model_json=treeToJson(model)
    print("model_json=",model_json)

Leave a Reply

Your email address will not be published.