""" GBDT (Gradient Boosted Decision Tree).


Author: Aymeric Damien
Project: https://github.com/aymericdamien/TensorFlow-Examples/
"""

from __future__ import print_function

import numpy as np
import tensorflow as tf
from tensorflow.contrib.boosted_trees.estimator_batch.estimator import GradientBoostedDecisionTreeClassifier
from tensorflow.contrib.boosted_trees.proto import learner_pb2
from tensorflow.contrib.learn import learn_runner

# Ignore all GPUs, tf random forest does not benefit from it.
# import os
# os.environ["CUDA_VISIBLE_DEVICES"] = ""

# Import MNIST data
from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("/tmp/data/", one_hot=False,
                                  source_url='http://yann.lecun.com/exdb/mnist/')

# Parameters
log_dir = "/tmp/tf_gbdt"
num_steps = 500 # Total steps to train
batch_size = 1024 # The number of samples per batch
num_classes = 10 # The 10 digits
num_features = 784 # Each image is 28x28 pixels

# GBDT Parameters
learning_rate = 0.1
l1_regul = 0.
l2_regul = 1.
examples_per_layer = 1000
num_trees = 10
max_depth = 4


def get_input_fn(x, y):
    """Input function over MNIST data."""

    def input_fn():
        images_batch, labels_batch = tf.train.shuffle_batch(
                tensors=[x, y],
                batch_size=batch_size,
                capacity=batch_size * 10,
                min_after_dequeue=batch_size * 2,
                enqueue_many=True,
                num_threads=4)
        features_map = {"images": images_batch}
        return features_map, labels_batch

    return input_fn


learner_config = learner_pb2.LearnerConfig()
learner_config.learning_rate_tuner.fixed.learning_rate = learning_rate
learner_config.num_classes = num_classes
learner_config.regularization.l1 = l1_regul
learner_config.regularization.l2 = l2_regul / examples_per_layer
learner_config.constraints.max_tree_depth = max_depth

growing_mode = learner_pb2.LearnerConfig.LAYER_BY_LAYER
learner_config.growing_mode = growing_mode
run_config = tf.contrib.learn.RunConfig(save_checkpoints_secs=300)

learner_config.multi_class_strategy = (
    learner_pb2.LearnerConfig.DIAGONAL_HESSIAN)

# Create a TF Boosted trees estimator that can take in custom loss.
estimator = GradientBoostedDecisionTreeClassifier(
    learner_config=learner_config,
    n_classes=num_classes,
    examples_per_layer=examples_per_layer,
    model_dir=log_dir,
    num_trees=num_trees,
    center_bias=False,
    config=run_config)


def _make_experiment_fn(output_dir):
  """Creates experiment for gradient boosted decision trees."""
  train_input_fn = get_input_fn(mnist.train.images,
                            mnist.train.labels.astype(np.int32))
  eval_input_fn = get_input_fn(mnist.test.images,
                           mnist.test.labels.astype(np.int32))

  return tf.contrib.learn.Experiment(
      estimator=estimator,
      train_input_fn=train_input_fn,
      eval_input_fn=eval_input_fn,
      train_steps=None,
      eval_steps=1,
      eval_metrics=None)

# Training
learn_runner.run(
      experiment_fn=_make_experiment_fn,
      output_dir=log_dir,
      schedule="train_and_evaluate")


# Accuracy
test_input_fn = get_input_fn(
    mnist.test.images, mnist.test.labels.astype(np.int32))
results = estimator.predict(x=mnist.test.images)

acc = 0.
n = 0
for i, r in enumerate(results):
    if np.argmax(r['probabilities']) == int(mnist.test.labels[i]):
        acc += 1
    n += 1

print(acc / n)