123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119 |
- """ GBDT (Gradient Boosted Decision Tree).
- Author: Aymeric Damien
- Project: https://github.com/aymericdamien/TensorFlow-Examples/
- """
- from __future__ import print_function
- import numpy as np
- import tensorflow as tf
- from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
- from tensorflow.contrib.boosted_trees.proto import learner_pb2
- from tensorflow.contrib.learn import learn_runner
- # Ignore all GPUs, tf random forest does not benefit from it.
- # import os
- # os.environ["CUDA_VISIBLE_DEVICES"] = ""
- # Import MNIST data
- from tensorflow.examples.tutorials.mnist import input_data
- mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
- source_url='http://yann.lecun.com/exdb/mnist/')
- # Parameters
- log_dir = "/tmp/tf_gbdt"
- num_steps = 500 # Total steps to train
- batch_size = 1024 # The number of samples per batch
- num_classes = 10 # The 10 digits
- num_features = 784 # Each image is 28x28 pixels
- # GBDT Parameters
- learning_rate = 0.1
- l1_regul = 0.
- l2_regul = 1.
- examples_per_layer = 1000
- num_trees = 10
- max_depth = 4
- def get_input_fn(x, y):
- """Input function over MNIST data."""
- def input_fn():
- images_batch, labels_batch = tf.train.shuffle_batch(
- tensors=[x, y],
- batch_size=batch_size,
- capacity=batch_size * 10,
- min_after_dequeue=batch_size * 2,
- enqueue_many=True,
- num_threads=4)
- features_map = {"images": images_batch}
- return features_map, labels_batch
- return input_fn
- learner_config = learner_pb2.LearnerConfig()
- learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
- learner_config.num_classes = num_classes
- learner_config.regularization.l1 = l1_regul
- learner_config.regularization.l2 = l2_regul / examples_per_layer
- learner_config.constraints.max_tree_depth = max_depth
- growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
- learner_config.growing_mode = growing_mode
- run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
- learner_config.multi_class_strategy = (
- learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)
- # Create a TF Boosted trees estimator that can take in custom loss.
- estimator = GradientBoostedDecisionTreeClassifier(
- learner_config=learner_config,
- n_classes=num_classes,
- examples_per_layer=examples_per_layer,
- model_dir=log_dir,
- num_trees=num_trees,
- center_bias=False,
- config=run_config)
- def _make_experiment_fn(output_dir):
- """Creates experiment for gradient boosted decision trees."""
- train_input_fn = get_input_fn(mnist.train.images,
- mnist.train.labels.astype(np.int32))
- eval_input_fn = get_input_fn(mnist.test.images,
- mnist.test.labels.astype(np.int32))
- return tf.contrib.learn.Experiment(
- estimator=estimator,
- train_input_fn=train_input_fn,
- eval_input_fn=eval_input_fn,
- train_steps=None,
- eval_steps=1,
- eval_metrics=None)
- # Training
- learn_runner.run(
- experiment_fn=_make_experiment_fn,
- output_dir=log_dir,
- schedule="train_and_evaluate")
- # Accuracy
- test_input_fn = get_input_fn(
- mnist.test.images, mnist.test.labels.astype(np.int32))
- results = estimator.predict(x=mnist.test.images)
- acc = 0.
- n = 0
- for i, r in enumerate(results):
- if np.argmax(r['probabilities']) == int(mnist.test.labels[i]):
- acc += 1
- n += 1
- print(acc / n)
|