gradient_boosted_decision_tree.py 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119
  1. """ GBDT (Gradient Boosted Decision Tree).
  2. Author: Aymeric Damien
  3. Project: https://github.com/aymericdamien/TensorFlow-Examples/
  4. """
  5. from __future__ import print_function
  6. import numpy as np
  7. import tensorflow as tf
  8. from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
  9. from tensorflow.contrib.boosted_trees.proto import learner_pb2
  10. from tensorflow.contrib.learn import learn_runner
  11. # Ignore all GPUs, tf random forest does not benefit from it.
  12. # import os
  13. # os.environ["CUDA_VISIBLE_DEVICES"] = ""
  14. # Import MNIST data
  15. from tensorflow.examples.tutorials.mnist import input_data
  16. mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
  17. source_url='http://yann.lecun.com/exdb/mnist/')
  18. # Parameters
  19. log_dir = "/tmp/tf_gbdt"
  20. num_steps = 500 # Total steps to train
  21. batch_size = 1024 # The number of samples per batch
  22. num_classes = 10 # The 10 digits
  23. num_features = 784 # Each image is 28x28 pixels
  24. # GBDT Parameters
  25. learning_rate = 0.1
  26. l1_regul = 0.
  27. l2_regul = 1.
  28. examples_per_layer = 1000
  29. num_trees = 10
  30. max_depth = 4
  31. def get_input_fn(x, y):
  32. """Input function over MNIST data."""
  33. def input_fn():
  34. images_batch, labels_batch = tf.train.shuffle_batch(
  35. tensors=[x, y],
  36. batch_size=batch_size,
  37. capacity=batch_size * 10,
  38. min_after_dequeue=batch_size * 2,
  39. enqueue_many=True,
  40. num_threads=4)
  41. features_map = {"images": images_batch}
  42. return features_map, labels_batch
  43. return input_fn
  44. learner_config = learner_pb2.LearnerConfig()
  45. learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
  46. learner_config.num_classes = num_classes
  47. learner_config.regularization.l1 = l1_regul
  48. learner_config.regularization.l2 = l2_regul / examples_per_layer
  49. learner_config.constraints.max_tree_depth = max_depth
  50. growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
  51. learner_config.growing_mode = growing_mode
  52. run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)
  53. learner_config.multi_class_strategy = (
  54. learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)
  55. # Create a TF Boosted trees estimator that can take in custom loss.
  56. estimator = GradientBoostedDecisionTreeClassifier(
  57. learner_config=learner_config,
  58. n_classes=num_classes,
  59. examples_per_layer=examples_per_layer,
  60. model_dir=log_dir,
  61. num_trees=num_trees,
  62. center_bias=False,
  63. config=run_config)
  64. def _make_experiment_fn(output_dir):
  65. """Creates experiment for gradient boosted decision trees."""
  66. train_input_fn = get_input_fn(mnist.train.images,
  67. mnist.train.labels.astype(np.int32))
  68. eval_input_fn = get_input_fn(mnist.test.images,
  69. mnist.test.labels.astype(np.int32))
  70. return tf.contrib.learn.Experiment(
  71. estimator=estimator,
  72. train_input_fn=train_input_fn,
  73. eval_input_fn=eval_input_fn,
  74. train_steps=None,
  75. eval_steps=1,
  76. eval_metrics=None)
  77. # Training
  78. learn_runner.run(
  79. experiment_fn=_make_experiment_fn,
  80. output_dir=log_dir,
  81. schedule="train_and_evaluate")
  82. # Accuracy
  83. test_input_fn = get_input_fn(
  84. mnist.test.images, mnist.test.labels.astype(np.int32))
  85. results = estimator.predict(x=mnist.test.images)
  86. acc = 0.
  87. n = 0
  88. for i, r in enumerate(results):
  89. if np.argmax(r['probabilities']) == int(mnist.test.labels[i]):
  90. acc += 1
  91. n += 1
  92. print(acc / n)