9 年之前 · 052e5e8b6e
--- a/autoencoder/autoencoder_models/Autoencoder.py
+++ b/autoencoder/autoencoder_models/Autoencoder.py
@@ -18,7 +18,7 @@ class Autoencoder(object):
 
				         self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
			
 
				 
			
 
				         # cost
			
 
				-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
			
 
				+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
			
 
				         self.optimizer = optimizer.minimize(self.cost)
			
 
				 
			
 
				         init = tf.global_variables_initializer()
			
--- a/autoencoder/autoencoder_models/DenoisingAutoencoder.py
+++ b/autoencoder/autoencoder_models/DenoisingAutoencoder.py
@@ -22,7 +22,7 @@ class AdditiveGaussianNoiseAutoencoder(object):
 
				         self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
			
 
				 
			
 
				         # cost
			
 
				-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
			
 
				+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
			
 
				         self.optimizer = optimizer.minimize(self.cost)
			
 
				 
			
 
				         init = tf.global_variables_initializer()
			
@@ -89,7 +89,7 @@ class MaskingNoiseAutoencoder(object):
 
				         self.reconstruction = tf.add(tf.matmul(self.hidden, self.weights['w2']), self.weights['b2'])
			
 
				 
			
 
				         # cost
			
 
				-        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
			
 
				+        self.cost = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
			
 
				         self.optimizer = optimizer.minimize(self.cost)
			
 
				 
			
 
				         init = tf.global_variables_initializer()
			
--- a/autoencoder/autoencoder_models/VariationalAutoencoder.py
+++ b/autoencoder/autoencoder_models/VariationalAutoencoder.py
@@ -17,13 +17,13 @@ class VariationalAutoencoder(object):
 
				         self.z_log_sigma_sq = tf.add(tf.matmul(self.x, self.weights['log_sigma_w1']), self.weights['log_sigma_b1'])
			
 
				 
			
 
				         # sample from gaussian distribution
			
 
				-        eps = tf.random_normal(tf.pack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
			
 
				-        self.z = tf.add(self.z_mean, tf.mul(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))
			
 
				+        eps = tf.random_normal(tf.stack([tf.shape(self.x)[0], self.n_hidden]), 0, 1, dtype = tf.float32)
			
 
				+        self.z = tf.add(self.z_mean, tf.multiply(tf.sqrt(tf.exp(self.z_log_sigma_sq)), eps))
			
 
				 
			
 
				         self.reconstruction = tf.add(tf.matmul(self.z, self.weights['w2']), self.weights['b2'])
			
 
				 
			
 
				         # cost
			
 
				-        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.sub(self.reconstruction, self.x), 2.0))
			
 
				+        reconstr_loss = 0.5 * tf.reduce_sum(tf.pow(tf.subtract(self.reconstruction, self.x), 2.0))
			
 
				         latent_loss = -0.5 * tf.reduce_sum(1 + self.z_log_sigma_sq
			
 
				                                            - tf.square(self.z_mean)
			
 
				                                            - tf.exp(self.z_log_sigma_sq), 1)
			
--- a/compression/decoder.py
+++ b/compression/decoder.py
--- a/compression/encoder.py
+++ b/compression/encoder.py
--- a/compression/msssim.py
+++ b/compression/msssim.py
--- a/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
+++ b/differential_privacy/dp_sgd/dp_mnist/dp_mnist.py
@@ -273,7 +273,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
 
				         images, network_parameters)
			
 
				 
			
 
				     cost = tf.nn.softmax_cross_entropy_with_logits(
			
 
				-        logits, tf.one_hot(labels, 10))
			
 
				+        logits=logits, labels=tf.one_hot(labels, 10))
			
 
				 
			
 
				     # The actual cost is the average across the examples.
			
 
				     cost = tf.reduce_sum(cost, [0]) / batch_size
			
@@ -343,7 +343,7 @@ def Train(mnist_train_file, mnist_test_file, network_parameters, num_steps,
 
				 
			
 
				     # We need to maintain the intialization sequence.
			
 
				     for v in tf.trainable_variables():
			
 
				-      sess.run(tf.initialize_variables([v]))
			
 
				+      sess.run(tf.variables_initializer([v]))
			
 
				     sess.run(tf.global_variables_initializer())
			
 
				     sess.run(init_ops)
			
 
				 
			
--- a/differential_privacy/dp_sgd/dp_optimizer/utils.py
+++ b/differential_privacy/dp_sgd/dp_optimizer/utils.py
@@ -236,7 +236,7 @@ def BatchClipByL2norm(t, upper_bound, name=None):
 
				   with tf.op_scope([t, upper_bound], name, "batch_clip_by_l2norm") as name:
			
 
				     saved_shape = tf.shape(t)
			
 
				     batch_size = tf.slice(saved_shape, [0], [1])
			
 
				-    t2 = tf.reshape(t, tf.concat(0, [batch_size, [-1]]))
			
 
				+    t2 = tf.reshape(t, tf.concat(axis=0, values=[batch_size, [-1]]))
			
 
				     upper_bound_inv = tf.fill(tf.slice(saved_shape, [0], [1]),
			
 
				                               tf.constant(1.0/upper_bound))
			
 
				     # Add a small number to avoid divide by 0
			
@@ -266,7 +266,7 @@ def SoftThreshold(t, threshold_ratio, name=None):
 
				   assert threshold_ratio >= 0
			
 
				   with tf.op_scope([t, threshold_ratio], name, "soft_thresholding") as name:
			
 
				     saved_shape = tf.shape(t)
			
 
				-    t2 = tf.reshape(t, tf.concat(0, [tf.slice(saved_shape, [0], [1]), -1]))
			
 
				+    t2 = tf.reshape(t, tf.concat(axis=0, values=[tf.slice(saved_shape, [0], [1]), -1]))
			
 
				     t_abs = tf.abs(t2)
			
 
				     t_x = tf.sign(t2) * tf.nn.relu(t_abs -
			
 
				                                    (tf.reduce_mean(t_abs, [0],
			
--- a/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
+++ b/differential_privacy/dp_sgd/per_example_gradients/per_example_gradients.py
@@ -189,7 +189,7 @@ class MatMulPXG(object):
 
				     z_grads, = z_grads
			
 
				     x_expanded = tf.expand_dims(x, 2)
			
 
				     z_grads_expanded = tf.expand_dims(z_grads, 1)
			
 
				-    return tf.mul(x_expanded, z_grads_expanded)
			
 
				+    return tf.multiply(x_expanded, z_grads_expanded)
			
 
				 
			
 
				 
			
 
				 pxg_registry.Register("MatMul", MatMulPXG)
			
@@ -245,7 +245,7 @@ class Conv2DPXG(object):
 
				       num_x = int(conv_x.get_shape()[0])
			
 
				       assert num_x == 1, num_x
			
 
				     assert len(conv_px) == batch_size
			
 
				-    conv = tf.concat(0, conv_px)
			
 
				+    conv = tf.concat(axis=0, values=conv_px)
			
 
				     assert int(conv.get_shape()[0]) == batch_size
			
 
				     return conv, w_px
			
 
				 
			
@@ -274,7 +274,7 @@ class Conv2DPXG(object):
 
				                                   self.colocate_gradients_with_ops,
			
 
				                                   gate_gradients=self.gate_gradients)
			
 
				 
			
 
				-    return tf.pack(gradients_list)
			
 
				+    return tf.stack(gradients_list)
			
 
				 
			
 
				 pxg_registry.Register("Conv2D", Conv2DPXG)
			
 
				 
			
--- a/differential_privacy/multiple_teachers/deep_cnn.py
+++ b/differential_privacy/multiple_teachers/deep_cnn.py
@@ -75,7 +75,7 @@ def _variable_with_weight_decay(name, shape, stddev, wd):
 
				   var = _variable_on_cpu(name, shape,
			
 
				                          tf.truncated_normal_initializer(stddev=stddev))
			
 
				   if wd is not None:
			
 
				-    weight_decay = tf.mul(tf.nn.l2_loss(var), wd, name='weight_loss')
			
 
				+    weight_decay = tf.multiply(tf.nn.l2_loss(var), wd, name='weight_loss')
			
 
				     tf.add_to_collection('losses', weight_decay)
			
 
				   return var
			
 
				 
			
@@ -398,7 +398,7 @@ def train_op_fun(total_loss, global_step):
 
				                                   decay_steps,
			
 
				                                   LEARNING_RATE_DECAY_FACTOR,
			
 
				                                   staircase=True)
			
 
				-  tf.scalar_summary('learning_rate', lr)
			
 
				+  tf.summary.scalar('learning_rate', lr)
			
 
				 
			
 
				   # Generate moving averages of all losses and associated summaries.
			
 
				   loss_averages_op = moving_av(total_loss)
			
@@ -413,7 +413,7 @@ def train_op_fun(total_loss, global_step):
 
				 
			
 
				   # Add histograms for trainable variables.
			
 
				   for var in tf.trainable_variables():
			
 
				-    tf.histogram_summary(var.op.name, var)
			
 
				+    tf.summary.histogram(var.op.name, var)
			
 
				 
			
 
				   # Track the moving averages of all trainable variables.
			
 
				   variable_averages = tf.train.ExponentialMovingAverage(
			
@@ -485,7 +485,7 @@ def train(images, labels, ckpt_path, dropout=False):
 
				     train_op = train_op_fun(loss, global_step)
			
 
				 
			
 
				     # Create a saver.
			
 
				-    saver = tf.train.Saver(tf.all_variables())
			
 
				+    saver = tf.train.Saver(tf.global_variables())
			
 
				 
			
 
				     print("Graph constructed and saver created")
			
 
				 
			
--- a/differential_privacy/privacy_accountant/tf/accountant.py
+++ b/differential_privacy/privacy_accountant/tf/accountant.py
@@ -361,12 +361,12 @@ class GaussianMomentsAccountant(MomentsAccountant):
 
				     exponents = tf.constant([j * (j + 1.0 - 2.0 * s) / (2.0 * sigma * sigma)
			
 
				                              for j in range(t + 1)], dtype=tf.float64)
			
 
				     # x[i, j] = binomial[i, j] * signs[i, j] = (i choose j) * (-1)^{i-j}
			
 
				-    x = tf.mul(binomial, signs)
			
 
				+    x = tf.multiply(binomial, signs)
			
 
				     # y[i, j] = x[i, j] * exp(exponents[j])
			
 
				     #         = (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
			
 
				     # Note: this computation is done by broadcasting pointwise multiplication
			
 
				     # between [t+1, t+1] tensor and [t+1] tensor.
			
 
				-    y = tf.mul(x, tf.exp(exponents))
			
 
				+    y = tf.multiply(x, tf.exp(exponents))
			
 
				     # z[i] = sum_j y[i, j]
			
 
				     #      = sum_j (i choose j) * (-1)^{i-j} * exp(j(j-1)/(2 sigma^2))
			
 
				     z = tf.reduce_sum(y, 1)
			
--- a/im2txt/im2txt/show_and_tell_model.py
+++ b/im2txt/im2txt/show_and_tell_model.py
@@ -264,7 +264,7 @@ class ShowAndTellModel(object):
 
				       if self.mode == "inference":
			
 
				         # In inference mode, use concatenated states for convenient feeding and
			
 
				         # fetching.
			
 
				-        tf.concat(initial_state, 1, name="initial_state")
			
 
				+        tf.concat(axis=initial_state, values=1, name="initial_state")
			
 
				 
			
 
				         # Placeholder for feeding a batch of concatenated states.
			
 
				         state_feed = tf.placeholder(dtype=tf.float32,
			
@@ -274,11 +274,11 @@ class ShowAndTellModel(object):
 
				 
			
 
				         # Run a single LSTM step.
			
 
				         lstm_outputs, state_tuple = lstm_cell(
			
 
				-            inputs=tf.squeeze(self.seq_embeddings, squeeze_dims=[1]),
			
 
				+            inputs=tf.squeeze(self.seq_embeddings, axis=[1]),
			
 
				             state=state_tuple)
			
 
				 
			
 
				         # Concatentate the resulting state.
			
 
				-        tf.concat(state_tuple, 1, name="state")
			
 
				+        tf.concat(axis=state_tuple, values=1, name="state")
			
 
				       else:
			
 
				         # Run the batch of sequence embeddings through the LSTM.
			
 
				         sequence_length = tf.reduce_sum(self.input_mask, 1)
			
--- a/inception/inception/data/preprocess_imagenet_validation_data.py
+++ b/inception/inception/data/preprocess_imagenet_validation_data.py
--- a/inception/inception/data/process_bounding_boxes.py
+++ b/inception/inception/data/process_bounding_boxes.py
--- a/inception/inception/image_processing.py
+++ b/inception/inception/image_processing.py
@@ -221,7 +221,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
 
				     if not thread_id:
			
 
				       image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
			
 
				                                                     bbox)
			
 
				-      tf.image_summary('image_with_bounding_boxes', image_with_box)
			
 
				+      tf.summary.image('image_with_bounding_boxes', image_with_box)
			
 
				 
			
 
				   # A large fraction of image datasets contain a human-annotated bounding
			
 
				   # box delineating the region of the image containing the object of interest.
			
@@ -242,7 +242,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
 
				     if not thread_id:
			
 
				       image_with_distorted_box = tf.image.draw_bounding_boxes(
			
 
				           tf.expand_dims(image, 0), distort_bbox)
			
 
				-      tf.image_summary('images_with_distorted_bounding_box',
			
 
				+      tf.summary.image('images_with_distorted_bounding_box',
			
 
				                        image_with_distorted_box)
			
 
				 
			
 
				     # Crop the image to the specified bounding box.
			
@@ -259,7 +259,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
 
				     # the third dimension.
			
 
				     distorted_image.set_shape([height, width, 3])
			
 
				     if not thread_id:
			
 
				-      tf.image_summary('cropped_resized_image',
			
 
				+      tf.summary.image('cropped_resized_image',
			
 
				                        tf.expand_dims(distorted_image, 0))
			
 
				 
			
 
				     # Randomly flip the image horizontally.
			
@@ -269,7 +269,7 @@ def distort_image(image, height, width, bbox, thread_id=0, scope=None):
 
				     distorted_image = distort_color(distorted_image, thread_id)
			
 
				 
			
 
				     if not thread_id:
			
 
				-      tf.image_summary('final_distorted_image',
			
 
				+      tf.summary.image('final_distorted_image',
			
 
				                        tf.expand_dims(distorted_image, 0))
			
 
				     return distorted_image
			
 
				 
			
@@ -328,8 +328,8 @@ def image_preprocessing(image_buffer, bbox, train, thread_id=0):
 
				     image = eval_image(image, height, width)
			
 
				 
			
 
				   # Finally, rescale to [-1,1] instead of [0, 1)
			
 
				-  image = tf.sub(image, 0.5)
			
 
				-  image = tf.mul(image, 2.0)
			
 
				+  image = tf.subtract(image, 0.5)
			
 
				+  image = tf.multiply(image, 2.0)
			
 
				   return image
			
 
				 
			
 
				 
			
@@ -394,7 +394,7 @@ def parse_example_proto(example_serialized):
 
				   ymax = tf.expand_dims(features['image/object/bbox/ymax'].values, 0)
			
 
				 
			
 
				   # Note that we impose an ordering of (y, x) just to make life difficult.
			
 
				-  bbox = tf.concat(0, [ymin, xmin, ymax, xmax])
			
 
				+  bbox = tf.concat(axis=0, values=[ymin, xmin, ymax, xmax])
			
 
				 
			
 
				   # Force the variable number of bounding boxes into the shape
			
 
				   # [1, num_boxes, coords].
			
@@ -505,6 +505,6 @@ def batch_inputs(dataset, batch_size, train, num_preprocess_threads=None,
 
				     images = tf.reshape(images, shape=[batch_size, height, width, depth])
			
 
				 
			
 
				     # Display the training images in the visualizer.
			
 
				-    tf.image_summary('images', images)
			
 
				+    tf.summary.image('images', images)
			
 
				 
			
 
				     return images, tf.reshape(label_index_batch, [batch_size])
			
--- a/inception/inception/inception_distributed_train.py
+++ b/inception/inception/inception_distributed_train.py
@@ -133,7 +133,7 @@ def train(target, dataset, cluster_spec):
 
				                                       FLAGS.learning_rate_decay_factor,
			
 
				                                       staircase=True)
			
 
				       # Add a summary to track the learning rate.
			
 
				-      tf.scalar_summary('learning_rate', lr)
			
 
				+      tf.summary.scalar('learning_rate', lr)
			
 
				 
			
 
				       # Create an optimizer that performs gradient descent.
			
 
				       opt = tf.train.RMSPropOptimizer(lr,
			
@@ -171,8 +171,8 @@ def train(target, dataset, cluster_spec):
 
				           loss_name = l.op.name
			
 
				           # Name each loss as '(raw)' and name the moving average version of the
			
 
				           # loss as the original loss name.
			
 
				-          tf.scalar_summary(loss_name + ' (raw)', l)
			
 
				-          tf.scalar_summary(loss_name, loss_averages.average(l))
			
 
				+          tf.summary.scalar(loss_name + ' (raw)', l)
			
 
				+          tf.summary.scalar(loss_name, loss_averages.average(l))
			
 
				 
			
 
				         # Add dependency to compute loss_averages.
			
 
				         with tf.control_dependencies([loss_averages_op]):
			
@@ -191,7 +191,7 @@ def train(target, dataset, cluster_spec):
 
				 
			
 
				       # Add histograms for model variables.
			
 
				       for var in variables_to_average:
			
 
				-        tf.histogram_summary(var.op.name, var)
			
 
				+        tf.summary.histogram(var.op.name, var)
			
 
				 
			
 
				       # Create synchronous replica optimizer.
			
 
				       opt = tf.train.SyncReplicasOptimizer(
			
@@ -215,7 +215,7 @@ def train(target, dataset, cluster_spec):
 
				       # Add histograms for gradients.
			
 
				       for grad, var in grads:
			
 
				         if grad is not None:
			
 
				-          tf.histogram_summary(var.op.name + '/gradients', grad)
			
 
				+          tf.summary.histogram(var.op.name + '/gradients', grad)
			
 
				 
			
 
				       apply_gradients_op = opt.apply_gradients(grads, global_step=global_step)
			
 
				 
			
@@ -233,7 +233,7 @@ def train(target, dataset, cluster_spec):
 
				       saver = tf.train.Saver()
			
 
				 
			
 
				       # Build the summary operation based on the TF collection of Summaries.
			
 
				-      summary_op = tf.merge_all_summaries()
			
 
				+      summary_op = tf.summary.merge_all()
			
 
				 
			
 
				       # Build an initialization operation to run below.
			
 
				       init_op = tf.global_variables_initializer()
			
--- a/inception/inception/inception_eval.py
+++ b/inception/inception/inception_eval.py
@@ -158,10 +158,10 @@ def evaluate(dataset):
 
				     saver = tf.train.Saver(variables_to_restore)
			
 
				 
			
 
				     # Build the summary operation based on the TF collection of Summaries.
			
 
				-    summary_op = tf.merge_all_summaries()
			
 
				+    summary_op = tf.summary.merge_all()
			
 
				 
			
 
				     graph_def = tf.get_default_graph().as_graph_def()
			
 
				-    summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir,
			
 
				+    summary_writer = tf.summary.FileWriter(FLAGS.eval_dir,
			
 
				                                             graph_def=graph_def)
			
 
				 
			
 
				     while True:
			
--- a/inception/inception/inception_model.py
+++ b/inception/inception/inception_model.py
@@ -115,7 +115,7 @@ def loss(logits, labels, batch_size=None):
 
				   # shape [FLAGS.batch_size, num_classes].
			
 
				   sparse_labels = tf.reshape(labels, [batch_size, 1])
			
 
				   indices = tf.reshape(tf.range(batch_size), [batch_size, 1])
			
 
				-  concated = tf.concat(1, [indices, sparse_labels])
			
 
				+  concated = tf.concat(axis=1, values=[indices, sparse_labels])
			
 
				   num_classes = logits[0].get_shape()[-1].value
			
 
				   dense_labels = tf.sparse_to_dense(concated,
			
 
				                                     [batch_size, num_classes],
			
@@ -147,8 +147,8 @@ def _activation_summary(x):
 
				   # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
			
 
				   # session. This helps the clarity of presentation on tensorboard.
			
 
				   tensor_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', x.op.name)
			
 
				-  tf.contrib.deprecated.histogram_summary(tensor_name + '/activations', x)
			
 
				-  tf.contrib.deprecated.scalar_summary(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
			
 
				+  tf.summary.histogram(tensor_name + '/activations', x)
			
 
				+  tf.summary.scalar(tensor_name + '/sparsity', tf.nn.zero_fraction(x))
			
 
				 
			
 
				 
			
 
				 def _activation_summaries(endpoints):
			
--- a/inception/inception/inception_train.py
+++ b/inception/inception/inception_train.py
@@ -132,8 +132,8 @@ def _tower_loss(images, labels, num_classes, scope, reuse_variables=None):
 
				     loss_name = re.sub('%s_[0-9]*/' % inception.TOWER_NAME, '', l.op.name)
			
 
				     # Name each loss as '(raw)' and name the moving average version of the loss
			
 
				     # as the original loss name.
			
 
				-    tf.scalar_summary(loss_name +' (raw)', l)
			
 
				-    tf.scalar_summary(loss_name, loss_averages.average(l))
			
 
				+    tf.summary.scalar(loss_name +' (raw)', l)
			
 
				+    tf.summary.scalar(loss_name, loss_averages.average(l))
			
 
				 
			
 
				   with tf.control_dependencies([loss_averages_op]):
			
 
				     total_loss = tf.identity(total_loss)
			
@@ -166,7 +166,7 @@ def _average_gradients(tower_grads):
 
				       grads.append(expanded_g)
			
 
				 
			
 
				     # Average over the 'tower' dimension.
			
 
				-    grad = tf.concat(0, grads)
			
 
				+    grad = tf.concat(axis=0, values=grads)
			
 
				     grad = tf.reduce_mean(grad, 0)
			
 
				 
			
 
				     # Keep in mind that the Variables are redundant because they are shared
			
@@ -223,8 +223,8 @@ def train(dataset):
 
				     num_classes = dataset.num_classes() + 1
			
 
				 
			
 
				      # Split the batch of images and labels for towers.
			
 
				-    images_splits = tf.split(0, FLAGS.num_gpus, images)
			
 
				-    labels_splits = tf.split(0, FLAGS.num_gpus, labels)
			
 
				+    images_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=images)
			
 
				+    labels_splits = tf.split(axis=0, num_or_size_splits=FLAGS.num_gpus, value=labels)
			
 
				 
			
 
				     # Calculate the gradients for each model tower.
			
 
				     tower_grads = []
			
@@ -268,20 +268,20 @@ def train(dataset):
 
				     summaries.extend(input_summaries)
			
 
				 
			
 
				     # Add a summary to track the learning rate.
			
 
				-    summaries.append(tf.scalar_summary('learning_rate', lr))
			
 
				+    summaries.append(tf.summary.scalar('learning_rate', lr))
			
 
				 
			
 
				     # Add histograms for gradients.
			
 
				     for grad, var in grads:
			
 
				       if grad is not None:
			
 
				         summaries.append(
			
 
				-            tf.histogram_summary(var.op.name + '/gradients', grad))
			
 
				+            tf.summary.histogram(var.op.name + '/gradients', grad))
			
 
				 
			
 
				     # Apply the gradients to adjust the shared variables.
			
 
				     apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
			
 
				 
			
 
				     # Add histograms for trainable variables.
			
 
				     for var in tf.trainable_variables():
			
 
				-      summaries.append(tf.histogram_summary(var.op.name, var))
			
 
				+      summaries.append(tf.summary.histogram(var.op.name, var))
			
 
				 
			
 
				     # Track the moving averages of all trainable variables.
			
 
				     # Note that we maintain a "double-average" of the BatchNormalization
			
@@ -301,10 +301,10 @@ def train(dataset):
 
				                         batchnorm_updates_op)
			
 
				 
			
 
				     # Create a saver.
			
 
				-    saver = tf.train.Saver(tf.all_variables())
			
 
				+    saver = tf.train.Saver(tf.global_variables())
			
 
				 
			
 
				     # Build the summary operation from the last tower summaries.
			
 
				-    summary_op = tf.merge_summary(summaries)
			
 
				+    summary_op = tf.summary.merge(summaries)
			
 
				 
			
 
				     # Build an initialization operation to run below.
			
 
				     init = tf.global_variables_initializer()
			
@@ -329,7 +329,7 @@ def train(dataset):
 
				     # Start the queue runners.
			
 
				     tf.train.start_queue_runners(sess=sess)
			
 
				 
			
 
				-    summary_writer = tf.train.SummaryWriter(
			
 
				+    summary_writer = tf.summary.FileWriter(
			
 
				         FLAGS.train_dir,
			
 
				         graph_def=sess.graph.as_graph_def(add_shapes=True))
			
 
				 
			
--- a/inception/inception/slim/inception_model.py
+++ b/inception/inception/slim/inception_model.py
@@ -122,7 +122,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 32, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch5x5, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_35x35x256a'] = net
			
 
				         # mixed_1: 35 x 35 x 288.
			
 
				         with tf.variable_scope('mixed_35x35x288a'):
			
@@ -138,7 +138,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch5x5, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_35x35x288a'] = net
			
 
				         # mixed_2: 35 x 35 x 288.
			
 
				         with tf.variable_scope('mixed_35x35x288b'):
			
@@ -154,7 +154,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 64, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch5x5, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch5x5, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_35x35x288b'] = net
			
 
				         # mixed_3: 17 x 17 x 768.
			
 
				         with tf.variable_scope('mixed_17x17x768a'):
			
@@ -167,7 +167,7 @@ def inception_v3(inputs,
 
				                                       stride=2, padding='VALID')
			
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
			
 
				-          net = tf.concat([branch3x3, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch3x3, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x768a'] = net
			
 
				         # mixed4: 17 x 17 x 768.
			
 
				         with tf.variable_scope('mixed_17x17x768b'):
			
@@ -186,7 +186,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch7x7, branch7x7dbl, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x768b'] = net
			
 
				         # mixed_5: 17 x 17 x 768.
			
 
				         with tf.variable_scope('mixed_17x17x768c'):
			
@@ -205,7 +205,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch7x7, branch7x7dbl, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x768c'] = net
			
 
				         # mixed_6: 17 x 17 x 768.
			
 
				         with tf.variable_scope('mixed_17x17x768d'):
			
@@ -224,7 +224,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch7x7, branch7x7dbl, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x768d'] = net
			
 
				         # mixed_7: 17 x 17 x 768.
			
 
				         with tf.variable_scope('mixed_17x17x768e'):
			
@@ -243,7 +243,7 @@ def inception_v3(inputs,
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch7x7, branch7x7dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch7x7, branch7x7dbl, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x768e'] = net
			
 
				         # Auxiliary Head logits
			
 
				         aux_logits = tf.identity(end_points['mixed_17x17x768e'])
			
@@ -276,7 +276,7 @@ def inception_v3(inputs,
 
				                                      stride=2, padding='VALID')
			
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.max_pool(net, [3, 3], stride=2, padding='VALID')
			
 
				-          net = tf.concat([branch3x3, branch7x7x3, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch3x3, branch7x7x3, branch_pool], values=3)
			
 
				           end_points['mixed_17x17x1280a'] = net
			
 
				         # mixed_9: 8 x 8 x 2048.
			
 
				         with tf.variable_scope('mixed_8x8x2048a'):
			
@@ -284,17 +284,17 @@ def inception_v3(inputs,
 
				             branch1x1 = ops.conv2d(net, 320, [1, 1])
			
 
				           with tf.variable_scope('branch3x3'):
			
 
				             branch3x3 = ops.conv2d(net, 384, [1, 1])
			
 
				-            branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]),
			
 
				-                                   ops.conv2d(branch3x3, 384, [3, 1])], 3)
			
 
				+            branch3x3 = tf.concat(axis=[ops.conv2d(branch3x3, 384, [1, 3]),
			
 
				+                                   ops.conv2d(branch3x3, 384, [3, 1])], values=3)
			
 
				           with tf.variable_scope('branch3x3dbl'):
			
 
				             branch3x3dbl = ops.conv2d(net, 448, [1, 1])
			
 
				             branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
			
 
				-            branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]),
			
 
				-                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], 3)
			
 
				+            branch3x3dbl = tf.concat(axis=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
			
 
				+                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], values=3)
			
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch3x3, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_8x8x2048a'] = net
			
 
				         # mixed_10: 8 x 8 x 2048.
			
 
				         with tf.variable_scope('mixed_8x8x2048b'):
			
@@ -302,17 +302,17 @@ def inception_v3(inputs,
 
				             branch1x1 = ops.conv2d(net, 320, [1, 1])
			
 
				           with tf.variable_scope('branch3x3'):
			
 
				             branch3x3 = ops.conv2d(net, 384, [1, 1])
			
 
				-            branch3x3 = tf.concat([ops.conv2d(branch3x3, 384, [1, 3]),
			
 
				-                                   ops.conv2d(branch3x3, 384, [3, 1])], 3)
			
 
				+            branch3x3 = tf.concat(axis=[ops.conv2d(branch3x3, 384, [1, 3]),
			
 
				+                                   ops.conv2d(branch3x3, 384, [3, 1])], values=3)
			
 
				           with tf.variable_scope('branch3x3dbl'):
			
 
				             branch3x3dbl = ops.conv2d(net, 448, [1, 1])
			
 
				             branch3x3dbl = ops.conv2d(branch3x3dbl, 384, [3, 3])
			
 
				-            branch3x3dbl = tf.concat([ops.conv2d(branch3x3dbl, 384, [1, 3]),
			
 
				-                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], 3)
			
 
				+            branch3x3dbl = tf.concat(axis=[ops.conv2d(branch3x3dbl, 384, [1, 3]),
			
 
				+                                      ops.conv2d(branch3x3dbl, 384, [3, 1])], values=3)
			
 
				           with tf.variable_scope('branch_pool'):
			
 
				             branch_pool = ops.avg_pool(net, [3, 3])
			
 
				             branch_pool = ops.conv2d(branch_pool, 192, [1, 1])
			
 
				-          net = tf.concat([branch1x1, branch3x3, branch3x3dbl, branch_pool], 3)
			
 
				+          net = tf.concat(axis=[branch1x1, branch3x3, branch3x3dbl, branch_pool], values=3)
			
 
				           end_points['mixed_8x8x2048b'] = net
			
 
				         # Final pooling and prediction
			
 
				         with tf.variable_scope('logits'):
			
--- a/inception/inception/slim/ops.py
+++ b/inception/inception/slim/ops.py
@@ -331,9 +331,9 @@ def one_hot_encoding(labels, num_classes, scope=None):
 
				     batch_size = labels.get_shape()[0]
			
 
				     indices = tf.expand_dims(tf.range(0, batch_size), 1)
			
 
				     labels = tf.cast(tf.expand_dims(labels, 1), indices.dtype)
			
 
				-    concated = tf.concat([indices, labels], 1)
			
 
				+    concated = tf.concat(axis=[indices, labels], values=1)
			
 
				     onehot_labels = tf.sparse_to_dense(
			
 
				-        concated, tf.pack([batch_size, num_classes]), 1.0, 0.0)
			
 
				+        concated, tf.stack([batch_size, num_classes]), 1.0, 0.0)
			
 
				     onehot_labels.set_shape([batch_size, num_classes])
			
 
				     return onehot_labels
			
 
				 
			
--- a/inception/inception/slim/variables.py
+++ b/inception/inception/slim/variables.py
@@ -240,7 +240,7 @@ def global_step(device=''):
 
				     # Get the device for the variable.
			
 
				     with tf.device(variable_device(device, 'global_step')):
			
 
				       return tf.get_variable('global_step', shape=[], dtype=tf.int64,
			
 
				-                             initializer=tf.zeros_initializer,
			
 
				+                             initializer=tf.zeros_initializer(),
			
 
				                              trainable=False, collections=collections)
			
 
				 
			
 
				 
			
--- a/namignizer/model.py
+++ b/namignizer/model.py
@@ -64,7 +64,7 @@ class NamignizerModel(object):
 
				                 (cell_output, state) = cell(inputs[:, time_step, :], state)
			
 
				                 outputs.append(cell_output)
			
 
				 
			
 
				-        output = tf.reshape(tf.concat(outputs, 1), [-1, size])
			
 
				+        output = tf.reshape(tf.concat(axis=1, values=outputs), [-1, size])
			
 
				         softmax_w = tf.get_variable("softmax_w", [size, vocab_size])
			
 
				         softmax_b = tf.get_variable("softmax_b", [vocab_size])
			
 
				         logits = tf.matmul(output, softmax_w) + softmax_b
			
--- a/neural_gpu/neural_gpu.py
+++ b/neural_gpu/neural_gpu.py
@@ -36,7 +36,7 @@ def conv_linear(args, kw, kh, nin, nout, rate, do_bias, bias_start, prefix):
 
				     if len(args) == 1:
			
 
				       arg = args[0]
			
 
				     else:
			
 
				-      arg = tf.concat(args, 3)
			
 
				+      arg = tf.concat(axis=args, values=3)
			
 
				     res = tf.nn.convolution(arg, k, dilation_rate=(rate, 1), padding="SAME")
			
 
				     if not do_bias: return res
			
 
				     with tf.device("/cpu:0"):
			
@@ -71,14 +71,14 @@ def place_at14(decided, selected, it):
 
				   """Place selected at it-th coordinate of decided, dim=1 of 4."""
			
 
				   slice1 = decided[:, :it, :, :]
			
 
				   slice2 = decided[:, it + 1:, :, :]
			
 
				-  return tf.concat([slice1, selected, slice2], 1)
			
 
				+  return tf.concat(axis=[slice1, selected, slice2], values=1)
			
 
				 
			
 
				 
			
 
				 def place_at13(decided, selected, it):
			
 
				   """Place selected at it-th coordinate of decided, dim=1 of 3."""
			
 
				   slice1 = decided[:, :it, :]
			
 
				   slice2 = decided[:, it + 1:, :]
			
 
				-  return tf.concat([slice1, selected, slice2], 1)
			
 
				+  return tf.concat(axis=[slice1, selected, slice2], values=1)
			
 
				 
			
 
				 
			
 
				 def tanh_cutoff(x, cutoff):
			
@@ -211,7 +211,7 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
 
				   # beam_val is [batch_size x beam_size]; let b = batch_size * beam_size
			
 
				   # decided is len x b x a x b
			
 
				   # output is b x out_size; step is b x len x a x b;
			
 
				-  outputs = tf.split(tf.nn.log_softmax(output), beam_size, 0)
			
 
				+  outputs = tf.split(axis=tf.nn.log_softmax(output), num_or_size_splits=beam_size, value=0)
			
 
				   all_beam_vals, all_beam_idx = [], []
			
 
				   beam_range = 1 if is_first else beam_size
			
 
				   for i in xrange(beam_range):
			
@@ -221,9 +221,9 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
 
				                                  cur_beam_val], "GREPO", summarize=8)
			
 
				     all_beam_vals.append(top_out + tf.expand_dims(cur_beam_val, 1))
			
 
				     all_beam_idx.append(top_out_idx)
			
 
				-  all_beam_idx = tf.reshape(tf.transpose(tf.concat(all_beam_idx, 1), [1, 0]),
			
 
				+  all_beam_idx = tf.reshape(tf.transpose(tf.concat(axis=all_beam_idx, values=1), [1, 0]),
			
 
				                             [-1])
			
 
				-  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(all_beam_vals, 1), k=beam_size)
			
 
				+  top_beam, top_beam_idx = tf.nn.top_k(tf.concat(axis=all_beam_vals, values=1), k=beam_size)
			
 
				   top_beam_idx = tf.Print(top_beam_idx, [top_beam, top_beam_idx],
			
 
				                           "GREP", summarize=8)
			
 
				   reordered = [[] for _ in xrange(len(tensors_to_reorder) + 1)]
			
@@ -236,8 +236,8 @@ def reorder_beam(beam_size, batch_size, beam_val, output, is_first,
 
				     reordered[0].append(tf.gather(output, which_beam))
			
 
				     for i, t in enumerate(tensors_to_reorder):
			
 
				       reordered[i + 1].append(tf.gather(t, which_beam))
			
 
				-  new_tensors = [tf.concat(t, 0) for t in reordered]
			
 
				-  top_out_idx = tf.concat(top_out_idx, 0)
			
 
				+  new_tensors = [tf.concat(axis=t, values=0) for t in reordered]
			
 
				+  top_out_idx = tf.concat(axis=top_out_idx, values=0)
			
 
				   return (top_beam, new_tensors[0], top_out_idx, new_tensors[1:])
			
 
				 
			
 
				 
			
@@ -266,9 +266,9 @@ class NeuralGPU(object):
 
				     self.input = tf.placeholder(tf.int32, name="inp")
			
 
				     self.target = tf.placeholder(tf.int32, name="tgt")
			
 
				     self.prev_step = tf.placeholder(tf.float32, name="prev_step")
			
 
				-    gpu_input = tf.split(self.input, num_gpus, 0)
			
 
				-    gpu_target = tf.split(self.target, num_gpus, 0)
			
 
				-    gpu_prev_step = tf.split(self.prev_step, num_gpus, 0)
			
 
				+    gpu_input = tf.split(axis=self.input, num_or_size_splits=num_gpus, value=0)
			
 
				+    gpu_target = tf.split(axis=self.target, num_or_size_splits=num_gpus, value=0)
			
 
				+    gpu_prev_step = tf.split(axis=self.prev_step, num_or_size_splits=num_gpus, value=0)
			
 
				     batch_size = tf.shape(gpu_input[0])[0]
			
 
				 
			
 
				     if backward:
			
@@ -410,7 +410,7 @@ class NeuralGPU(object):
 
				       out_write = output_ta.write(it, output_l[:batch_size, :, :, :])
			
 
				       output = tf.gather(target_emb_weights, out)
			
 
				       output = tf.reshape(output, [-1, 1, nmaps])
			
 
				-      output = tf.concat([output] * height, 1)
			
 
				+      output = tf.concat(axis=[output] * height, values=1)
			
 
				       tgt = tgts[it, :, :, :]
			
 
				       selected = tf.cond(tf.less(tf.random_uniform([]), self.sampling),
			
 
				                          lambda: output, lambda: tgt)
			
@@ -419,7 +419,7 @@ class NeuralGPU(object):
 
				       out_idx = place_at13(
			
 
				           out_idx, tf.reshape(out, [beam_size * batch_size, 1, 1]), it)
			
 
				       if mem_size > 0:
			
 
				-        mem = tf.concat([mem] * height, 2)
			
 
				+        mem = tf.concat(axis=[mem] * height, values=2)
			
 
				         dec_write = place_at14(dec_write, mem, it_incr)
			
 
				       return (step, dec_write, out_write, mloss + mem_loss, nupd_in + nupd,
			
 
				               out_idx, beam_cost)
			
@@ -459,7 +459,7 @@ class NeuralGPU(object):
 
				                                               gpu_targets_tn)
			
 
				               embedded_targets_tn = tf.transpose(
			
 
				                   embedded_targets_tn, [2, 0, 1, 3])  # len x b x 1 x nmaps
			
 
				-              embedded_targets_tn = tf.concat([embedded_targets_tn] * height, 2)
			
 
				+              embedded_targets_tn = tf.concat(axis=[embedded_targets_tn] * height, values=2)
			
 
				 
			
 
				         # First image comes from start by applying convolution and adding 0s.
			
 
				         start = tf.transpose(start, [0, 2, 1, 3])  # Now b x len x h x vec_s
			
@@ -505,7 +505,7 @@ class NeuralGPU(object):
 
				               attn_res = attention_query(attn_q, tf.get_variable(
			
 
				                   "attn_v", [height * nmaps],
			
 
				                   initializer=tf.random_uniform_initializer(-0.1, 0.1)))
			
 
				-              concatenated = tf.reshape(tf.concat([cell_inp, attn_res], 1),
			
 
				+              concatenated = tf.reshape(tf.concat(axis=[cell_inp, attn_res], values=1),
			
 
				                                         [batch_size, 2 * height * nmaps])
			
 
				               cell_inp = tf.layers.dense(
			
 
				                   concatenated, height * nmaps, name="attn_merge")
			
@@ -519,14 +519,14 @@ class NeuralGPU(object):
 
				                 res = tf.gather(target_emb_weights, res)
			
 
				                 res *= tf.expand_dims(mask[:, 0], 1)
			
 
				                 output = tf.layers.dense(
			
 
				-                    tf.concat([output, res], 1), height * nmaps, name="rnnmem")
			
 
				+                    tf.concat(axis=[output, res], values=1), height * nmaps, name="rnnmem")
			
 
				 
			
 
				               return new_state, output, mem_loss
			
 
				             # pylint: enable=cell-var-from-loop
			
 
				             gpu_targets = tf.squeeze(gpu_target[gpu], [1])  # b x len
			
 
				             gpu_tgt_trans = tf.transpose(gpu_targets, [1, 0])
			
 
				             dec_zero = tf.zeros([batch_size, 1], dtype=tf.int32)
			
 
				-            dec_inp = tf.concat([dec_zero, gpu_targets], 1)
			
 
				+            dec_inp = tf.concat(axis=[dec_zero, gpu_targets], values=1)
			
 
				             dec_inp = dec_inp[:, :length]
			
 
				             embedded_dec_inp = tf.gather(target_emb_weights, dec_inp)
			
 
				             embedded_dec_inp_proj = tf.layers.dense(
			
@@ -573,9 +573,9 @@ class NeuralGPU(object):
 
				                                   height, vec_size])
			
 
				 
			
 
				             # Prepare for beam search.
			
 
				-            tgts = tf.concat([embedded_targets_tn] * beam_size, 1)
			
 
				+            tgts = tf.concat(axis=[embedded_targets_tn] * beam_size, values=1)
			
 
				             beam_cost = tf.zeros([batch_size, beam_size])
			
 
				-            step = tf.concat([step] * beam_size, 0)
			
 
				+            step = tf.concat(axis=[step] * beam_size, values=0)
			
 
				             # First step hard-coded.
			
 
				             step, decided_t, output_ta, mem_loss, nupd, oi, bc = dec_step(
			
 
				                 step, 0, 0, decided_t, output_ta, tgts, 0.0, 0, out_idx,
			
@@ -654,7 +654,7 @@ class NeuralGPU(object):
 
				                        % (gpu, time.time() - start_time))
			
 
				 
			
 
				     self.updates = []
			
 
				-    self.after_enc_step = tf.concat(self.after_enc_step, 0)  # Concat GPUs.
			
 
				+    self.after_enc_step = tf.concat(axis=self.after_enc_step, values=0)  # Concat GPUs.
			
 
				     if backward:
			
 
				       tf.get_variable_scope()._reuse = False
			
 
				       tf.get_variable_scope().set_caching_device(None)
			
@@ -667,10 +667,10 @@ class NeuralGPU(object):
 
				 
			
 
				     self.losses = [gpu_avg([gpu_losses[g][i] for g in xrange(num_gpus)])
			
 
				                    for i in xrange(len(gpu_losses[0]))]
			
 
				-    self.out_idx = tf.concat(gpu_out_idx, 0)
			
 
				+    self.out_idx = tf.concat(axis=gpu_out_idx, values=0)
			
 
				     self.grad_norms = [gpu_avg([gpu_grad_norms[g][i] for g in xrange(num_gpus)])
			
 
				                        for i in xrange(len(gpu_grad_norms[0]))]
			
 
				-    self.outputs = [tf.concat([gpu_outputs[g] for g in xrange(num_gpus)], 1)]
			
 
				+    self.outputs = [tf.concat(axis=[gpu_outputs[g] for g in xrange(num_gpus)], values=1)]
			
 
				     self.quantize_op = quantize_weights_op(512, 8)
			
 
				     if backward:
			
 
				       self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)
			
--- a/neural_programmer/data_utils.py
+++ b/neural_programmer/data_utils.py
--- a/neural_programmer/model.py
+++ b/neural_programmer/model.py
@@ -121,14 +121,14 @@ class Graph():
 
				       if (self.utility.FLAGS.rnn_dropout > 0.0):
			
 
				         question_hidden = question_hidden * rnn_dropout_mask
			
 
				       hidden_vectors.append(tf.expand_dims(question_hidden, 0))
			
 
				-    hidden_vectors = tf.concat(0, hidden_vectors)
			
 
				+    hidden_vectors = tf.concat(axis=0, values=hidden_vectors)
			
 
				     return question_hidden, hidden_vectors
			
 
				 
			
 
				   def history_recurrent_step(self, curr_hprev, hprev):
			
 
				     #A single RNN step for controller or history RNN
			
 
				     return tf.tanh(
			
 
				         tf.matmul(
			
 
				-            tf.concat(1, [hprev, curr_hprev]), self.params[
			
 
				+            tf.concat(axis=1, values=[hprev, curr_hprev]), self.params[
			
 
				                 "history_recurrent"])) + self.params["history_recurrent_bias"]
			
 
				 
			
 
				   def question_number_softmax(self, hidden_vectors):
			
@@ -150,13 +150,13 @@ class Graph():
 
				                             tf.expand_dims(
			
 
				                                 tf.transpose(self.batch_ordinal_question_one), 2
			
 
				                             ), [1, 1, self.utility.FLAGS.embedding_dims]), 0))))
			
 
				-      question_number_softmax = tf.nn.softmax(tf.concat(1, [first, second]))
			
 
				+      question_number_softmax = tf.nn.softmax(tf.concat(axis=1, values=[first, second]))
			
 
				       if (self.mode == "test"):
			
 
				         cond = tf.equal(question_number_softmax,
			
 
				                         tf.reshape(
			
 
				                             tf.reduce_max(question_number_softmax, 1),
			
 
				                             [self.batch_size, 1]))
			
 
				-        question_number_softmax = tf.select(
			
 
				+        question_number_softmax = tf.where(
			
 
				             cond,
			
 
				             tf.fill(tf.shape(question_number_softmax), 1.0),
			
 
				             tf.fill(tf.shape(question_number_softmax), 0.0))
			
@@ -164,7 +164,7 @@ class Graph():
 
				                                           self.data_type)
			
 
				       ans = tf.reshape(
			
 
				           tf.reduce_sum(question_number_softmax * tf.concat(
			
 
				-              1, [self.batch_question_number, self.batch_question_number_one]),
			
 
				+              axis=1, values=[self.batch_question_number, self.batch_question_number_one]),
			
 
				                         1), [self.batch_size, 1])
			
 
				       return ans
			
 
				 
			
@@ -225,7 +225,7 @@ class Graph():
 
				     column_controller_vector = nn_utils.apply_dropout(
			
 
				         column_controller_vector, self.utility.FLAGS.dropout, self.mode)
			
 
				     self.full_column_hidden_vectors = tf.concat(
			
 
				-        1, [self.column_hidden_vectors, self.word_column_hidden_vectors])
			
 
				+        axis=1, values=[self.column_hidden_vectors, self.word_column_hidden_vectors])
			
 
				     self.full_column_hidden_vectors += self.summary_text_entry_embeddings
			
 
				     self.full_column_hidden_vectors = nn_utils.apply_dropout(
			
 
				         self.full_column_hidden_vectors, self.utility.FLAGS.dropout, self.mode)
			
@@ -258,7 +258,7 @@ class Graph():
 
				           temp_ans.append(curr_prob)
			
 
				         else:
			
 
				           temp_ans.append(tf.zeros_like(curr_prob))
			
 
				-      temp_ans = tf.transpose(tf.concat(0, temp_ans))
			
 
				+      temp_ans = tf.transpose(tf.concat(axis=0, values=temp_ans))
			
 
				       answer += temp_ans
			
 
				     return answer
			
 
				 
			
@@ -266,7 +266,7 @@ class Graph():
 
				     #converts soft selection to hard selection. used at test time
			
 
				     cond = tf.equal(
			
 
				         softmax, tf.reshape(tf.reduce_max(softmax, 1), [self.batch_size, 1]))
			
 
				-    softmax = tf.select(
			
 
				+    softmax = tf.where(
			
 
				         cond, tf.fill(tf.shape(softmax), 1.0), tf.fill(tf.shape(softmax), 0.0))
			
 
				     softmax = tf.cast(softmax, self.data_type)
			
 
				     return softmax
			
@@ -297,7 +297,7 @@ class Graph():
 
				       curr_prob = curr_prob * tf.expand_dims((1 - sum_prob), 2)
			
 
				       curr_prob = curr_prob * tf.expand_dims(
			
 
				           tf.cast((1 - sum_prob) > 0.0, self.data_type), 2)
			
 
				-      answer = tf.select(select_mask, curr_prob, answer)
			
 
				+      answer = tf.where(select_mask, curr_prob, answer)
			
 
				       sum_prob += tf.reduce_sum(curr_prob, 2)
			
 
				     return answer
			
 
				 
			
@@ -335,11 +335,11 @@ class Graph():
 
				                                1)  #BS * max_elements
			
 
				     select_min = tf.reduce_sum(init_min * select_full_column_softmax,
			
 
				                                1)  #BS * max_elements
			
 
				-    select_prev = tf.concat(1, [
			
 
				+    select_prev = tf.concat(axis=1, values=[
			
 
				         tf.slice(select, [0, 1], [self.batch_size, self.max_elements - 1]),
			
 
				         tf.cast(tf.zeros([self.batch_size, 1]), self.data_type)
			
 
				     ])
			
 
				-    select_next = tf.concat(1, [
			
 
				+    select_next = tf.concat(axis=1, values=[
			
 
				         tf.cast(tf.zeros([self.batch_size, 1]), self.data_type), tf.slice(
			
 
				             select, [0, 0], [self.batch_size, self.max_elements - 1])
			
 
				     ])
			
@@ -352,11 +352,11 @@ class Graph():
 
				     length_content = 1
			
 
				     length_select = 13
			
 
				     length_print = 1
			
 
				-    values = tf.concat(1, [count])
			
 
				+    values = tf.concat(axis=1, values=[count])
			
 
				     softmax_content = tf.slice(softmax, [0, 0],
			
 
				                                [self.batch_size, length_content])
			
 
				     #compute scalar output
			
 
				-    output = tf.reduce_sum(tf.mul(softmax_content, values), 1)
			
 
				+    output = tf.reduce_sum(tf.multiply(softmax_content, values), 1)
			
 
				     #compute lookup answer
			
 
				     softmax_print = tf.slice(softmax, [0, length_content + length_select],
			
 
				                              [self.batch_size, length_print])
			
@@ -384,7 +384,7 @@ class Graph():
 
				     ]
			
 
				     select = tf.reduce_sum(
			
 
				         tf.tile(tf.expand_dims(softmax_select, 2), [1, 1, self.max_elements]) *
			
 
				-        tf.concat(1, select_lists), 1)
			
 
				+        tf.concat(axis=1, values=select_lists), 1)
			
 
				     select = select * self.select_whole_mask
			
 
				     return output, select
			
 
				 
			
@@ -396,11 +396,11 @@ class Graph():
 
				         self.batch_question_attention_mask)  #batch_size * embedding_dims
			
 
				     controller_vector = tf.nn.relu(
			
 
				         tf.matmul(hprev, self.params["controller_prev"]) + tf.matmul(
			
 
				-            tf.concat(1, [question_embedding, attention_vector]), self.params[
			
 
				+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
			
 
				                 "controller"]))
			
 
				     column_controller_vector = tf.nn.relu(
			
 
				         tf.matmul(hprev, self.params["column_controller_prev"]) + tf.matmul(
			
 
				-            tf.concat(1, [question_embedding, attention_vector]), self.params[
			
 
				+            tf.concat(axis=1, values=[question_embedding, attention_vector]), self.params[
			
 
				                 "column_controller"]))
			
 
				     controller_vector = nn_utils.apply_dropout(
			
 
				         controller_vector, self.utility.FLAGS.dropout, self.mode)
			
@@ -413,7 +413,7 @@ class Graph():
 
				         tf.matmul(tf.transpose(self.params_unit), tf.transpose(softmax)))
			
 
				     column_controller_vector = tf.nn.relu(
			
 
				         tf.matmul(
			
 
				-            tf.concat(1, [
			
 
				+            tf.concat(axis=1, values=[
			
 
				                 column_controller_vector, weighted_op_representation
			
 
				             ]), self.params["break_conditional"]))
			
 
				     full_column_softmax = self.compute_column_softmax(column_controller_vector,
			
@@ -429,7 +429,7 @@ class Graph():
 
				   def compute_lookup_error(self, val):
			
 
				     #computes lookup error.
			
 
				     cond = tf.equal(self.batch_print_answer, val)
			
 
				-    inter = tf.select(
			
 
				+    inter = tf.where(
			
 
				         cond, self.init_print_error,
			
 
				         tf.tile(
			
 
				             tf.reshape(tf.constant(1e10, self.data_type), [1, 1, 1]), [
			
@@ -450,12 +450,12 @@ class Graph():
 
				 
			
 
				   def error_computation(self):
			
 
				     #computes the error of each example in a batch
			
 
				-    math_error = 0.5 * tf.square(tf.sub(self.scalar_output, self.batch_answer))
			
 
				+    math_error = 0.5 * tf.square(tf.subtract(self.scalar_output, self.batch_answer))
			
 
				     #scale math error
			
 
				     math_error = math_error / self.rows
			
 
				     math_error = tf.minimum(math_error, self.utility.FLAGS.max_math_error *
			
 
				                             tf.ones(tf.shape(math_error), self.data_type))
			
 
				-    self.init_print_error = tf.select(
			
 
				+    self.init_print_error = tf.where(
			
 
				         self.batch_gold_select, -1 * tf.log(self.batch_lookup_answer + 1e-300 +
			
 
				                                             self.invert_select_full_mask), -1 *
			
 
				         tf.log(1 - self.batch_lookup_answer)) * self.select_full_mask
			
@@ -466,24 +466,24 @@ class Graph():
 
				       print_error += self.compute_lookup_error(val + 0.0)
			
 
				     print_error = print_error * self.utility.FLAGS.print_cost / self.num_entries
			
 
				     if (self.mode == "train"):
			
 
				-      error = tf.select(
			
 
				+      error = tf.where(
			
 
				           tf.logical_and(
			
 
				               tf.not_equal(self.batch_answer, 0.0),
			
 
				               tf.not_equal(
			
 
				                   tf.reduce_sum(tf.reduce_sum(self.batch_print_answer, 1), 1),
			
 
				                   0.0)),
			
 
				           self.soft_min(math_error, print_error),
			
 
				-          tf.select(
			
 
				+          tf.where(
			
 
				               tf.not_equal(self.batch_answer, 0.0), math_error, print_error))
			
 
				     else:
			
 
				-      error = tf.select(
			
 
				+      error = tf.where(
			
 
				           tf.logical_and(
			
 
				               tf.equal(self.scalar_output, 0.0),
			
 
				               tf.equal(
			
 
				                   tf.reduce_sum(tf.reduce_sum(self.batch_lookup_answer, 1), 1),
			
 
				                   0.0)),
			
 
				           tf.ones_like(math_error),
			
 
				-          tf.select(
			
 
				+          tf.where(
			
 
				               tf.equal(self.scalar_output, 0.0), print_error, math_error))
			
 
				     return error
			
 
				 
			
@@ -558,7 +558,7 @@ class Graph():
 
				       input_col = tf.reduce_sum(
			
 
				           tf.expand_dims(soft_column_softmax, 2) *
			
 
				           self.full_column_hidden_vectors, 1)
			
 
				-      history_input = tf.concat(1, [input_op, input_col])
			
 
				+      history_input = tf.concat(axis=1, values=[input_op, input_col])
			
 
				       history_input = nn_utils.apply_dropout(
			
 
				           history_input, self.utility.FLAGS.dropout, self.mode)
			
 
				       hprev = self.history_recurrent_step(history_input, hprev)
			
@@ -567,7 +567,7 @@ class Graph():
 
				     self.scalar_output = output
			
 
				     error = self.error_computation()
			
 
				     cond = tf.less(error, 0.0001, name="cond")
			
 
				-    correct_add = tf.select(
			
 
				+    correct_add = tf.where(
			
 
				         cond, tf.fill(tf.shape(cond), 1.0), tf.fill(tf.shape(cond), 0.0))
			
 
				     correct = tf.reduce_sum(correct_add)
			
 
				     error = error / batch_size
			
@@ -579,11 +579,11 @@ class Graph():
 
				     #Sets mask variables and performs batch processing
			
 
				     self.batch_gold_select = self.batch_print_answer > 0.0
			
 
				     self.full_column_mask = tf.concat(
			
 
				-        1, [self.batch_number_column_mask, self.batch_word_column_mask])
			
 
				+        axis=1, values=[self.batch_number_column_mask, self.batch_word_column_mask])
			
 
				     self.full_processed_column = tf.concat(
			
 
				-        1,
			
 
				-        [self.batch_processed_number_column, self.batch_processed_word_column])
			
 
				-    self.full_processed_sorted_index_column = tf.concat(1, [
			
 
				+        axis=1,
			
 
				+        values=[self.batch_processed_number_column, self.batch_processed_word_column])
			
 
				+    self.full_processed_sorted_index_column = tf.concat(axis=1, values=[
			
 
				         self.batch_processed_sorted_index_number_column,
			
 
				         self.batch_processed_sorted_index_word_column
			
 
				     ])
			
@@ -603,7 +603,7 @@ class Graph():
 
				             tf.equal(self.batch_word_column_entry_mask,
			
 
				                      self.utility.dummy_token_id)), self.data_type)
			
 
				     self.select_full_mask = tf.concat(
			
 
				-        1, [self.select_mask, self.select_word_mask])
			
 
				+        axis=1, values=[self.select_mask, self.select_word_mask])
			
 
				     self.select_whole_mask = tf.maximum(
			
 
				         tf.reshape(
			
 
				             tf.slice(self.select_mask, [0, 0, 0],
			
@@ -614,7 +614,7 @@ class Graph():
 
				                      [self.batch_size, 1, self.max_elements]),
			
 
				             [self.batch_size, self.max_elements]))
			
 
				     self.invert_select_full_mask = tf.cast(
			
 
				-        tf.concat(1, [
			
 
				+        tf.concat(axis=1, values=[
			
 
				             tf.equal(self.batch_number_column, self.utility.FLAGS.pad_int),
			
 
				             tf.equal(self.batch_word_column_entry_mask,
			
 
				                      self.utility.dummy_token_id)
			
--- a/neural_programmer/neural_programmer.py
+++ b/neural_programmer/neural_programmer.py
--- a/neural_programmer/nn_utils.py
+++ b/neural_programmer/nn_utils.py
--- a/neural_programmer/parameters.py
+++ b/neural_programmer/parameters.py
--- a/neural_programmer/wiki_data.py
+++ b/neural_programmer/wiki_data.py
--- a/next_frame_prediction/cross_conv/model.py
+++ b/next_frame_prediction/cross_conv/model.py
@@ -65,7 +65,7 @@ class CrossConvModel(object):
 
				       diff = diff * 2.0 - self.params['scale']
			
 
				       diff_output = self.diff_output * 2.0 - self.params['scale']
			
 
				       concat_image = tf.concat(
			
 
				-          1, [image, image + diff_output, image + diff, diff_output])
			
 
				+          axis=1, values=[image, image + diff_output, image + diff, diff_output])
			
 
				       tf.summary.image('origin_predict_expect_predictdiff', concat_image)
			
 
				       self.summary_op = tf.summary.merge_all()
			
 
				       return self.loss
			
@@ -113,7 +113,7 @@ class CrossConvModel(object):
 
				     assert shape[1] == shape[2] and shape[1] == 128
			
 
				     batch_size = shape[0]
			
 
				 
			
 
				-    net = tf.concat(3, [image, diff])
			
 
				+    net = tf.concat(axis=3, values=[image, diff])
			
 
				     with tf.variable_scope('motion_encoder'):
			
 
				       with slim.arg_scope([slim.conv2d], padding='VALID'):
			
 
				         net = slim.conv2d(net, 96, [5, 5], stride=1)
			
@@ -128,7 +128,7 @@ class CrossConvModel(object):
 
				 
			
 
				         z = tf.reshape(net, shape=[batch_size, -1])
			
 
				         self.z_mean, self.z_stddev_log = tf.split(
			
 
				-            split_dim=1, num_split=2, value=z)
			
 
				+            axis=1, num_or_size_splits=2, value=z)
			
 
				         self.z_stddev = tf.exp(self.z_stddev_log)
			
 
				 
			
 
				         epsilon = tf.random_normal(
			
@@ -174,7 +174,7 @@ class CrossConvModel(object):
 
				   def _CrossConv(self, encoded_images):
			
 
				     """Apply the motion kernel on the encoded_images."""
			
 
				     cross_conved_images = []
			
 
				-    kernels = tf.split(split_dim=3, num_split=4, value=self.kernel)
			
 
				+    kernels = tf.split(axis=3, num_or_size_splits=4, value=self.kernel)
			
 
				     for (i, encoded_image) in enumerate(encoded_images):
			
 
				       with tf.variable_scope('cross_conv_%d' % i):
			
 
				         kernel = kernels[i]
			
@@ -187,7 +187,7 @@ class CrossConvModel(object):
 
				         for j in xrange(len(encoded_image)):
			
 
				           conved_image.append(self._CrossConvHelper(
			
 
				               encoded_image[j], kernel[j]))
			
 
				-        cross_conved_images.append(tf.concat(0, conved_image))
			
 
				+        cross_conved_images.append(tf.concat(axis=0, values=conved_image))
			
 
				         sys.stderr.write('cross_conved shape: %s\n' %
			
 
				                          cross_conved_images[-1].get_shape())
			
 
				     return cross_conved_images
			
@@ -224,7 +224,7 @@ class CrossConvModel(object):
 
				         nets.append(self._Deconv(
			
 
				             cross_conved_image, 64, kernel_size=3, stride=stride))
			
 
				 
			
 
				-    net = tf.concat(3, nets)
			
 
				+    net = tf.concat(axis=3, values=nets)
			
 
				     net = slim.conv2d(net, 128, [9, 9], padding='SAME', stride=1)
			
 
				     net = slim.conv2d(net, 128, [1, 1], padding='SAME', stride=1)
			
 
				     net = slim.conv2d(net, 3, [1, 1], padding='SAME', stride=1)
			
--- a/next_frame_prediction/cross_conv/reader.py
+++ b/next_frame_prediction/cross_conv/reader.py
@@ -42,7 +42,7 @@ def SequenceToImageAndDiff(images):
 
				     for i in xrange(0, len(resized_images)-1):
			
 
				       diffs.append(resized_images[i+1] - resized_images[i])
			
 
				     image_diff_list.append(
			
 
				-        (tf.concat(0, resized_images[:-1]), tf.concat(0, diffs)))
			
 
				+        (tf.concat(axis=0, values=resized_images[:-1]), tf.concat(axis=0, values=diffs)))
			
 
				   return image_diff_list
			
 
				 
			
 
				 
			
--- a/real_nvp/real_nvp_multiscale_dataset.py
+++ b/real_nvp/real_nvp_multiscale_dataset.py
@@ -332,7 +332,7 @@ def masked_conv_aff_coupling(input_, mask_in, dim, name,
 
				                      residual_blocks=residual_blocks,
			
 
				                      bottleneck=bottleneck, skip=skip)
			
 
				         mask = tf.mod(mask_channel + mask, 2)
			
 
				-        res = tf.split(res, 2, 3)
			
 
				+        res = tf.split(axis=res, num_or_size_splits=2, value=3)
			
 
				         shift, log_rescaling = res[-2], res[-1]
			
 
				         scale = variable_on_cpu(
			
 
				             "rescaling_scale", [],
			
@@ -486,9 +486,9 @@ def conv_ch_aff_coupling(input_, dim, name,
 
				             scope.reuse_variables()
			
 
				 
			
 
				         if change_bottom:
			
 
				-            input_, canvas = tf.split(input_, 2, 3)
			
 
				+            input_, canvas = tf.split(axis=input_, num_or_size_splits=2, value=3)
			
 
				         else:
			
 
				-            canvas, input_ = tf.split(input_, 2, 3)
			
 
				+            canvas, input_ = tf.split(axis=input_, num_or_size_splits=2, value=3)
			
 
				         shape = input_.get_shape().as_list()
			
 
				         batch_size = shape[0]
			
 
				         height = shape[1]
			
@@ -509,7 +509,7 @@ def conv_ch_aff_coupling(input_, dim, name,
 
				                      train=train, weight_norm=weight_norm,
			
 
				                      residual_blocks=residual_blocks,
			
 
				                      bottleneck=bottleneck, skip=skip)
			
 
				-        shift, log_rescaling = tf.split(res, 2, 3)
			
 
				+        shift, log_rescaling = tf.split(axis=res, num_or_size_splits=2, value=3)
			
 
				         scale = variable_on_cpu(
			
 
				             "scale", [],
			
 
				             tf.constant_initializer(1.))
			
@@ -570,9 +570,9 @@ def conv_ch_add_coupling(input_, dim, name,
 
				             scope.reuse_variables()
			
 
				 
			
 
				         if change_bottom:
			
 
				-            input_, canvas = tf.split(input_, 2, 3)
			
 
				+            input_, canvas = tf.split(axis=input_, num_or_size_splits=2, value=3)
			
 
				         else:
			
 
				-            canvas, input_ = tf.split(input_, 2, 3)
			
 
				+            canvas, input_ = tf.split(axis=input_, num_or_size_splits=2, value=3)
			
 
				         shape = input_.get_shape().as_list()
			
 
				         channels = shape[3]
			
 
				         res = input_
			
@@ -736,8 +736,8 @@ def rec_masked_conv_coupling(input_, hps, scale_idx, n_scale,
 
				                 log_diff_1 = log_diff[:, :, :, :channels]
			
 
				                 log_diff_2 = log_diff[:, :, :, channels:]
			
 
				             else:
			
 
				-                res_1, res_2 = tf.split(res, 2, 3)
			
 
				-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
			
 
				+                res_1, res_2 = tf.split(axis=res, num_or_size_splits=2, value=3)
			
 
				+                log_diff_1, log_diff_2 = tf.split(axis=log_diff, num_or_size_splits=2, value=3)
			
 
				             res_1, inc_log_diff = rec_masked_conv_coupling(
			
 
				                 input_=res_1, hps=hps, scale_idx=scale_idx + 1, n_scale=n_scale,
			
 
				                 use_batch_norm=use_batch_norm, weight_norm=weight_norm,
			
@@ -798,8 +798,8 @@ def rec_masked_deconv_coupling(input_, hps, scale_idx, n_scale,
 
				                 log_diff_1 = log_diff[:, :, :, :channels]
			
 
				                 log_diff_2 = log_diff[:, :, :, channels:]
			
 
				             else:
			
 
				-                res_1, res_2 = tf.split(res, 2, 3)
			
 
				-                log_diff_1, log_diff_2 = tf.split(log_diff, 2, 3)
			
 
				+                res_1, res_2 = tf.split(axis=res, num_or_size_splits=2, value=3)
			
 
				+                log_diff_1, log_diff_2 = tf.split(axis=log_diff, num_or_size_splits=2, value=3)
			
 
				             res_1, log_diff_1 = rec_masked_deconv_coupling(
			
 
				                 input_=res_1, hps=hps,
			
 
				                 scale_idx=scale_idx + 1, n_scale=n_scale,
			
@@ -1305,7 +1305,7 @@ class RealNVP(object):
 
				             z_lost = z_complete
			
 
				             for scale_idx in xrange(hps.n_scale - 1):
			
 
				                 z_lost = squeeze_2x2_ordered(z_lost)
			
 
				-                z_lost, _ = tf.split(z_lost, 2, 3)
			
 
				+                z_lost, _ = tf.split(axis=z_lost, num_or_size_splits=2, value=3)
			
 
				                 z_compressed = z_lost
			
 
				                 z_noisy = z_lost
			
 
				                 for _ in xrange(scale_idx + 1):
			
--- a/real_nvp/real_nvp_utils.py
+++ b/real_nvp/real_nvp_utils.py
@@ -99,8 +99,8 @@ def conv_layer(input_,
 
				                     filter_size[1] - input_.get_shape().as_list()[2],
			
 
				                     input_.get_shape().as_list()[3]
			
 
				                 ])
			
 
				-                res = tf.concat(1, [pad_1, res])
			
 
				-                res = tf.concat(2, [pad_2, res])
			
 
				+                res = tf.concat(axis=1, values=[pad_1, res])
			
 
				+                res = tf.concat(axis=2, values=[pad_2, res])
			
 
				         res = tf.nn.conv2d(
			
 
				             input=res,
			
 
				             filter=weights,
			
@@ -139,8 +139,8 @@ def depool_2x2(input_, stride=2):
 
				     channels = shape[3]
			
 
				     res = tf.reshape(input_, [batch_size, height, 1, width, 1, channels])
			
 
				     res = tf.concat(
			
 
				-        2, [res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
			
 
				-    res = tf.concat(4, [
			
 
				+        axis=2, values=[res, tf.zeros([batch_size, height, stride - 1, width, 1, channels])])
			
 
				+    res = tf.concat(axis=4, values=[
			
 
				         res, tf.zeros([batch_size, height, stride, width, stride - 1, channels])
			
 
				     ])
			
 
				     res = tf.reshape(res, [batch_size, stride * height, stride * width, channels])
			
@@ -158,11 +158,11 @@ def batch_random_flip(input_):
 
				     height = shape[1]
			
 
				     width = shape[2]
			
 
				     channels = shape[3]
			
 
				-    res = tf.split(0, batch_size, input_)
			
 
				+    res = tf.split(axis=0, num_or_size_splits=batch_size, value=input_)
			
 
				     res = [elem[0, :, :, :] for elem in res]
			
 
				     res = [tf.image.random_flip_left_right(elem) for elem in res]
			
 
				     res = [tf.reshape(elem, [1, height, width, channels]) for elem in res]
			
 
				-    res = tf.concat(0, res)
			
 
				+    res = tf.concat(axis=0, values=res)
			
 
				 
			
 
				     return res
			
 
				 
			
@@ -175,7 +175,7 @@ def as_one_hot(input_, n_indices):
 
				     n_elem = numpy.prod(shape)
			
 
				     indices = tf.range(n_elem)
			
 
				     indices = tf.cast(indices, tf.int64)
			
 
				-    indices_input = tf.concat(0, [indices, tf.reshape(input_, [-1])])
			
 
				+    indices_input = tf.concat(axis=0, values=[indices, tf.reshape(input_, [-1])])
			
 
				     indices_input = tf.reshape(indices_input, [2, -1])
			
 
				     indices_input = tf.transpose(indices_input)
			
 
				     res = tf.sparse_to_dense(
			
--- a/slim/deployment/model_deploy.py
+++ b/slim/deployment/model_deploy.py
@@ -232,10 +232,10 @@ def _gather_clone_loss(clone, num_clones, regularization_losses):
 
				       sum_loss = tf.add_n(all_losses)
			
 
				   # Add the summaries out of the clone device block.
			
 
				   if clone_loss is not None:
			
 
				-    tf.scalar_summary(clone.scope + '/clone_loss', clone_loss,
			
 
				+    tf.summary.scalar(clone.scope + '/clone_loss', clone_loss,
			
 
				                       name='clone_loss')
			
 
				   if regularization_loss is not None:
			
 
				-    tf.scalar_summary('regularization_loss', regularization_loss,
			
 
				+    tf.summary.scalar('regularization_loss', regularization_loss,
			
 
				                       name='regularization_loss')
			
 
				   return sum_loss
			
 
				 
			
@@ -404,12 +404,12 @@ def deploy(config,
 
				 
			
 
				     if total_loss is not None:
			
 
				       # Add total_loss to summary.
			
 
				-      summaries.add(tf.scalar_summary('total_loss', total_loss,
			
 
				+      summaries.add(tf.summary.scalar('total_loss', total_loss,
			
 
				                                       name='total_loss'))
			
 
				 
			
 
				     if summaries:
			
 
				       # Merge all summaries together.
			
 
				-      summary_op = tf.merge_summary(list(summaries), name='summary_op')
			
 
				+      summary_op = tf.summary.merge(list(summaries), name='summary_op')
			
 
				     else:
			
 
				       summary_op = None
			
 
				 
			
@@ -467,9 +467,9 @@ def _add_gradients_summaries(grads_and_vars):
 
				         grad_values = grad.values
			
 
				       else:
			
 
				         grad_values = grad
			
 
				-      summaries.append(tf.histogram_summary(var.op.name + ':gradient',
			
 
				+      summaries.append(tf.summary.histogram(var.op.name + ':gradient',
			
 
				                                             grad_values))
			
 
				-      summaries.append(tf.histogram_summary(var.op.name + ':gradient_norm',
			
 
				+      summaries.append(tf.summary.histogram(var.op.name + ':gradient_norm',
			
 
				                                             tf.global_norm([grad_values])))
			
 
				     else:
			
 
				       tf.logging.info('Var %s has no gradient', var.op.name)
			
--- a/slim/eval_image_classifier.py
+++ b/slim/eval_image_classifier.py
@@ -160,7 +160,7 @@ def main(_):
 
				     # Print the summaries to screen.
			
 
				     for name, value in names_to_values.iteritems():
			
 
				       summary_name = 'eval/%s' % name
			
 
				-      op = tf.scalar_summary(summary_name, value, collections=[])
			
 
				+      op = tf.summary.scalar(summary_name, value, collections=[])
			
 
				       op = tf.Print(op, [value], summary_name)
			
 
				       tf.add_to_collection(tf.GraphKeys.SUMMARIES, op)
			
 
				 
			
--- a/slim/nets/alexnet.py
+++ b/slim/nets/alexnet.py
@@ -113,7 +113,7 @@ def alexnet_v2(inputs,
 
				         net = slim.conv2d(net, num_classes, [1, 1],
			
 
				                           activation_fn=None,
			
 
				                           normalizer_fn=None,
			
 
				-                          biases_initializer=tf.zeros_initializer,
			
 
				+                          biases_initializer=tf.zeros_initializer(),
			
 
				                           scope='fc8')
			
 
				 
			
 
				       # Convert end_points_collection into a end_point dict.
			
--- a/slim/nets/cifarnet.py
+++ b/slim/nets/cifarnet.py
@@ -77,7 +77,7 @@ def cifarnet(images, num_classes=10, is_training=False,
 
				     net = slim.fully_connected(net, 192, scope='fc4')
			
 
				     end_points['fc4'] = net
			
 
				     logits = slim.fully_connected(net, num_classes,
			
 
				-                                  biases_initializer=tf.zeros_initializer,
			
 
				+                                  biases_initializer=tf.zeros_initializer(),
			
 
				                                   weights_initializer=trunc_normal(1/192.0),
			
 
				                                   weights_regularizer=None,
			
 
				                                   activation_fn=None,
			
--- a/slim/nets/inception_v1.py
+++ b/slim/nets/inception_v1.py
@@ -93,7 +93,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -110,7 +110,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -132,7 +132,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -149,7 +149,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -166,7 +166,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -183,7 +183,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -200,7 +200,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -222,7 +222,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				 
			
@@ -239,7 +239,7 @@ def inception_v1_base(inputs,
 
				           with tf.variable_scope('Branch_3'):
			
 
				             branch_3 = slim.max_pool2d(net, [3, 3], scope='MaxPool_0a_3x3')
			
 
				             branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-          net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+          net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if final_endpoint == end_point: return net, end_points
			
 
				     raise ValueError('Unknown final endpoint %s' % final_endpoint)
			
--- a/slim/nets/inception_v2.py
+++ b/slim/nets/inception_v2.py
@@ -145,7 +145,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(32), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 28 x 28 x 256
			
@@ -175,7 +175,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(64), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 28 x 28 x 320
			
@@ -200,7 +200,7 @@ def inception_v2_base(inputs,
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.max_pool2d(
			
 
				               net, [3, 3], stride=2, scope='MaxPool_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 14 x 14 x 576
			
@@ -230,7 +230,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(128), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 14 x 14 x 576
			
@@ -260,7 +260,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(128), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 14 x 14 x 576
			
@@ -290,7 +290,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(96), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -321,7 +321,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(96), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 14 x 14 x 576
			
@@ -346,7 +346,7 @@ def inception_v2_base(inputs,
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.max_pool2d(net, [3, 3], stride=2,
			
 
				                                      scope='MaxPool_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				       # 7 x 7 x 1024
			
@@ -376,7 +376,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(128), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -407,7 +407,7 @@ def inception_v2_base(inputs,
 
				               branch_3, depth(128), [1, 1],
			
 
				               weights_initializer=trunc_normal(0.1),
			
 
				               scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				         end_points[end_point] = net
			
 
				         if end_point == final_endpoint: return net, end_points
			
 
				     raise ValueError('Unknown final endpoint %s' % final_endpoint)
			
--- a/slim/nets/inception_v3.py
+++ b/slim/nets/inception_v3.py
@@ -158,7 +158,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(32), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -182,7 +182,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -205,7 +205,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(64), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -224,7 +224,7 @@ def inception_v3_base(inputs,
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
			
 
				                                      scope='MaxPool_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -253,7 +253,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -282,7 +282,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				       # mixed_6: 17 x 17 x 768.
			
@@ -310,7 +310,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -339,7 +339,7 @@ def inception_v3_base(inputs,
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(branch_3, depth(192), [1, 1],
			
 
				                                  scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -361,7 +361,7 @@ def inception_v3_base(inputs,
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
			
 
				                                      scope='MaxPool_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				       # mixed_9: 8 x 8 x 2048.
			
@@ -371,21 +371,21 @@ def inception_v3_base(inputs,
 
				           branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
			
 
				         with tf.variable_scope('Branch_1'):
			
 
				           branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
			
 
				-          branch_1 = tf.concat(3, [
			
 
				+          branch_1 = tf.concat(axis=3, values=[
			
 
				               slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
			
 
				               slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0b_3x1')])
			
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
			
 
				           branch_2 = slim.conv2d(
			
 
				               branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
			
 
				-          branch_2 = tf.concat(3, [
			
 
				+          branch_2 = tf.concat(axis=3, values=[
			
 
				               slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
			
 
				               slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
			
 
				         with tf.variable_scope('Branch_3'):
			
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(
			
 
				               branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				 
			
@@ -396,21 +396,21 @@ def inception_v3_base(inputs,
 
				           branch_0 = slim.conv2d(net, depth(320), [1, 1], scope='Conv2d_0a_1x1')
			
 
				         with tf.variable_scope('Branch_1'):
			
 
				           branch_1 = slim.conv2d(net, depth(384), [1, 1], scope='Conv2d_0a_1x1')
			
 
				-          branch_1 = tf.concat(3, [
			
 
				+          branch_1 = tf.concat(axis=3, values=[
			
 
				               slim.conv2d(branch_1, depth(384), [1, 3], scope='Conv2d_0b_1x3'),
			
 
				               slim.conv2d(branch_1, depth(384), [3, 1], scope='Conv2d_0c_3x1')])
			
 
				         with tf.variable_scope('Branch_2'):
			
 
				           branch_2 = slim.conv2d(net, depth(448), [1, 1], scope='Conv2d_0a_1x1')
			
 
				           branch_2 = slim.conv2d(
			
 
				               branch_2, depth(384), [3, 3], scope='Conv2d_0b_3x3')
			
 
				-          branch_2 = tf.concat(3, [
			
 
				+          branch_2 = tf.concat(axis=3, values=[
			
 
				               slim.conv2d(branch_2, depth(384), [1, 3], scope='Conv2d_0c_1x3'),
			
 
				               slim.conv2d(branch_2, depth(384), [3, 1], scope='Conv2d_0d_3x1')])
			
 
				         with tf.variable_scope('Branch_3'):
			
 
				           branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
			
 
				           branch_3 = slim.conv2d(
			
 
				               branch_3, depth(192), [1, 1], scope='Conv2d_0b_1x1')
			
 
				-        net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				       end_points[end_point] = net
			
 
				       if end_point == final_endpoint: return net, end_points
			
 
				     raise ValueError('Unknown final endpoint %s' % final_endpoint)
			
--- a/slim/nets/inception_v4.py
+++ b/slim/nets/inception_v4.py
@@ -49,7 +49,7 @@ def block_inception_a(inputs, scope=None, reuse=None):
 
				       with tf.variable_scope('Branch_3'):
			
 
				         branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
			
 
				         branch_3 = slim.conv2d(branch_3, 96, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				 
			
 
				 
			
 
				 def block_reduction_a(inputs, scope=None, reuse=None):
			
@@ -69,7 +69,7 @@ def block_reduction_a(inputs, scope=None, reuse=None):
 
				       with tf.variable_scope('Branch_2'):
			
 
				         branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
			
 
				                                    scope='MaxPool_1a_3x3')
			
 
				-      return tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				 
			
 
				 
			
 
				 def block_inception_b(inputs, scope=None, reuse=None):
			
@@ -93,7 +93,7 @@ def block_inception_b(inputs, scope=None, reuse=None):
 
				       with tf.variable_scope('Branch_3'):
			
 
				         branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
			
 
				         branch_3 = slim.conv2d(branch_3, 128, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				 
			
 
				 
			
 
				 def block_reduction_b(inputs, scope=None, reuse=None):
			
@@ -115,7 +115,7 @@ def block_reduction_b(inputs, scope=None, reuse=None):
 
				       with tf.variable_scope('Branch_2'):
			
 
				         branch_2 = slim.max_pool2d(inputs, [3, 3], stride=2, padding='VALID',
			
 
				                                    scope='MaxPool_1a_3x3')
			
 
				-      return tf.concat(3, [branch_0, branch_1, branch_2])
			
 
				+      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2])
			
 
				 
			
 
				 
			
 
				 def block_inception_c(inputs, scope=None, reuse=None):
			
@@ -128,20 +128,20 @@ def block_inception_c(inputs, scope=None, reuse=None):
 
				         branch_0 = slim.conv2d(inputs, 256, [1, 1], scope='Conv2d_0a_1x1')
			
 
				       with tf.variable_scope('Branch_1'):
			
 
				         branch_1 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
			
 
				-        branch_1 = tf.concat(3, [
			
 
				+        branch_1 = tf.concat(axis=3, values=[
			
 
				             slim.conv2d(branch_1, 256, [1, 3], scope='Conv2d_0b_1x3'),
			
 
				             slim.conv2d(branch_1, 256, [3, 1], scope='Conv2d_0c_3x1')])
			
 
				       with tf.variable_scope('Branch_2'):
			
 
				         branch_2 = slim.conv2d(inputs, 384, [1, 1], scope='Conv2d_0a_1x1')
			
 
				         branch_2 = slim.conv2d(branch_2, 448, [3, 1], scope='Conv2d_0b_3x1')
			
 
				         branch_2 = slim.conv2d(branch_2, 512, [1, 3], scope='Conv2d_0c_1x3')
			
 
				-        branch_2 = tf.concat(3, [
			
 
				+        branch_2 = tf.concat(axis=3, values=[
			
 
				             slim.conv2d(branch_2, 256, [1, 3], scope='Conv2d_0d_1x3'),
			
 
				             slim.conv2d(branch_2, 256, [3, 1], scope='Conv2d_0e_3x1')])
			
 
				       with tf.variable_scope('Branch_3'):
			
 
				         branch_3 = slim.avg_pool2d(inputs, [3, 3], scope='AvgPool_0a_3x3')
			
 
				         branch_3 = slim.conv2d(branch_3, 256, [1, 1], scope='Conv2d_0b_1x1')
			
 
				-      return tf.concat(3, [branch_0, branch_1, branch_2, branch_3])
			
 
				+      return tf.concat(axis=3, values=[branch_0, branch_1, branch_2, branch_3])
			
 
				 
			
 
				 
			
 
				 def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
			
@@ -192,7 +192,7 @@ def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
 
				         with tf.variable_scope('Branch_1'):
			
 
				           branch_1 = slim.conv2d(net, 96, [3, 3], stride=2, padding='VALID',
			
 
				                                  scope='Conv2d_0a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1])
			
 
				         if add_and_check_final('Mixed_3a', net): return net, end_points
			
 
				 
			
 
				       # 73 x 73 x 160
			
@@ -207,7 +207,7 @@ def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
 
				           branch_1 = slim.conv2d(branch_1, 64, [7, 1], scope='Conv2d_0c_7x1')
			
 
				           branch_1 = slim.conv2d(branch_1, 96, [3, 3], padding='VALID',
			
 
				                                  scope='Conv2d_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1])
			
 
				         if add_and_check_final('Mixed_4a', net): return net, end_points
			
 
				 
			
 
				       # 71 x 71 x 192
			
@@ -218,7 +218,7 @@ def inception_v4_base(inputs, final_endpoint='Mixed_7d', scope=None):
 
				         with tf.variable_scope('Branch_1'):
			
 
				           branch_1 = slim.max_pool2d(net, [3, 3], stride=2, padding='VALID',
			
 
				                                      scope='MaxPool_1a_3x3')
			
 
				-        net = tf.concat(3, [branch_0, branch_1])
			
 
				+        net = tf.concat(axis=3, values=[branch_0, branch_1])
			
 
				         if add_and_check_final('Mixed_5a', net): return net, end_points
			
 
				 
			
 
				       # 35 x 35 x 384
			
--- a/slim/nets/overfeat.py
+++ b/slim/nets/overfeat.py
@@ -41,7 +41,7 @@ def overfeat_arg_scope(weight_decay=0.0005):
 
				   with slim.arg_scope([slim.conv2d, slim.fully_connected],
			
 
				                       activation_fn=tf.nn.relu,
			
 
				                       weights_regularizer=slim.l2_regularizer(weight_decay),
			
 
				-                      biases_initializer=tf.zeros_initializer):
			
 
				+                      biases_initializer=tf.zeros_initializer()):
			
 
				     with slim.arg_scope([slim.conv2d], padding='SAME'):
			
 
				       with slim.arg_scope([slim.max_pool2d], padding='VALID') as arg_sc:
			
 
				         return arg_sc
			
@@ -107,7 +107,7 @@ def overfeat(inputs,
 
				         net = slim.conv2d(net, num_classes, [1, 1],
			
 
				                           activation_fn=None,
			
 
				                           normalizer_fn=None,
			
 
				-                          biases_initializer=tf.zeros_initializer,
			
 
				+                          biases_initializer=tf.zeros_initializer(),
			
 
				                           scope='fc8')
			
 
				       # Convert end_points_collection into a end_point dict.
			
 
				       end_points = slim.utils.convert_collection_to_dict(end_points_collection)
			
--- a/slim/nets/vgg.py
+++ b/slim/nets/vgg.py
@@ -58,7 +58,7 @@ def vgg_arg_scope(weight_decay=0.0005):
 
				   with slim.arg_scope([slim.conv2d, slim.fully_connected],
			
 
				                       activation_fn=tf.nn.relu,
			
 
				                       weights_regularizer=slim.l2_regularizer(weight_decay),
			
 
				-                      biases_initializer=tf.zeros_initializer):
			
 
				+                      biases_initializer=tf.zeros_initializer()):
			
 
				     with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc:
			
 
				       return arg_sc
			
 
				 
			
--- a/slim/preprocessing/cifarnet_preprocessing.py
+++ b/slim/preprocessing/cifarnet_preprocessing.py
@@ -45,7 +45,7 @@ def preprocess_for_train(image,
 
				   Returns:
			
 
				     A preprocessed image.
			
 
				   """
			
 
				-  tf.image_summary('image', tf.expand_dims(image, 0))
			
 
				+  tf.summary.image('image', tf.expand_dims(image, 0))
			
 
				 
			
 
				   # Transform the image to floats.
			
 
				   image = tf.to_float(image)
			
@@ -58,7 +58,7 @@ def preprocess_for_train(image,
 
				   # Randomly flip the image horizontally.
			
 
				   distorted_image = tf.image.random_flip_left_right(distorted_image)
			
 
				 
			
 
				-  tf.image_summary('distorted_image', tf.expand_dims(distorted_image, 0))
			
 
				+  tf.summary.image('distorted_image', tf.expand_dims(distorted_image, 0))
			
 
				 
			
 
				   # Because these operations are not commutative, consider randomizing
			
 
				   # the order their operation.
			
@@ -67,7 +67,7 @@ def preprocess_for_train(image,
 
				   distorted_image = tf.image.random_contrast(distorted_image,
			
 
				                                              lower=0.2, upper=1.8)
			
 
				   # Subtract off the mean and divide by the variance of the pixels.
			
 
				-  return tf.image.per_image_whitening(distorted_image)
			
 
				+  return tf.image.per_image_standardization(distorted_image)
			
 
				 
			
 
				 
			
 
				 def preprocess_for_eval(image, output_height, output_width):
			
@@ -81,7 +81,7 @@ def preprocess_for_eval(image, output_height, output_width):
 
				   Returns:
			
 
				     A preprocessed image.
			
 
				   """
			
 
				-  tf.image_summary('image', tf.expand_dims(image, 0))
			
 
				+  tf.summary.image('image', tf.expand_dims(image, 0))
			
 
				   # Transform the image to floats.
			
 
				   image = tf.to_float(image)
			
 
				 
			
@@ -89,10 +89,10 @@ def preprocess_for_eval(image, output_height, output_width):
 
				   resized_image = tf.image.resize_image_with_crop_or_pad(image,
			
 
				                                                          output_width,
			
 
				                                                          output_height)
			
 
				-  tf.image_summary('resized_image', tf.expand_dims(resized_image, 0))
			
 
				+  tf.summary.image('resized_image', tf.expand_dims(resized_image, 0))
			
 
				 
			
 
				   # Subtract off the mean and divide by the variance of the pixels.
			
 
				-  return tf.image.per_image_whitening(resized_image)
			
 
				+  return tf.image.per_image_standardization(resized_image)
			
 
				 
			
 
				 
			
 
				 def preprocess_image(image, output_height, output_width, is_training=False):
			
--- a/slim/preprocessing/inception_preprocessing.py
+++ b/slim/preprocessing/inception_preprocessing.py
@@ -192,7 +192,7 @@ def preprocess_for_train(image, height, width, bbox,
 
				     # the coordinates are ordered [ymin, xmin, ymax, xmax].
			
 
				     image_with_box = tf.image.draw_bounding_boxes(tf.expand_dims(image, 0),
			
 
				                                                   bbox)
			
 
				-    tf.image_summary('image_with_bounding_boxes', image_with_box)
			
 
				+    tf.summary.image('image_with_bounding_boxes', image_with_box)
			
 
				 
			
 
				     distorted_image, distorted_bbox = distorted_bounding_box_crop(image, bbox)
			
 
				     # Restore the shape since the dynamic slice based upon the bbox_size loses
			
@@ -200,7 +200,7 @@ def preprocess_for_train(image, height, width, bbox,
 
				     distorted_image.set_shape([None, None, 3])
			
 
				     image_with_distorted_box = tf.image.draw_bounding_boxes(
			
 
				         tf.expand_dims(image, 0), distorted_bbox)
			
 
				-    tf.image_summary('images_with_distorted_bounding_box',
			
 
				+    tf.summary.image('images_with_distorted_bounding_box',
			
 
				                      image_with_distorted_box)
			
 
				 
			
 
				     # This resizing operation may distort the images because the aspect
			
@@ -215,7 +215,7 @@ def preprocess_for_train(image, height, width, bbox,
 
				         lambda x, method: tf.image.resize_images(x, [height, width], method=method),
			
 
				         num_cases=num_resize_cases)
			
 
				 
			
 
				-    tf.image_summary('cropped_resized_image',
			
 
				+    tf.summary.image('cropped_resized_image',
			
 
				                      tf.expand_dims(distorted_image, 0))
			
 
				 
			
 
				     # Randomly flip the image horizontally.
			
@@ -227,10 +227,10 @@ def preprocess_for_train(image, height, width, bbox,
 
				         lambda x, ordering: distort_color(x, ordering, fast_mode),
			
 
				         num_cases=4)
			
 
				 
			
 
				-    tf.image_summary('final_distorted_image',
			
 
				+    tf.summary.image('final_distorted_image',
			
 
				                      tf.expand_dims(distorted_image, 0))
			
 
				-    distorted_image = tf.sub(distorted_image, 0.5)
			
 
				-    distorted_image = tf.mul(distorted_image, 2.0)
			
 
				+    distorted_image = tf.subtract(distorted_image, 0.5)
			
 
				+    distorted_image = tf.multiply(distorted_image, 2.0)
			
 
				     return distorted_image
			
 
				 
			
 
				 
			
@@ -270,8 +270,8 @@ def preprocess_for_eval(image, height, width,
 
				       image = tf.image.resize_bilinear(image, [height, width],
			
 
				                                        align_corners=False)
			
 
				       image = tf.squeeze(image, [0])
			
 
				-    image = tf.sub(image, 0.5)
			
 
				-    image = tf.mul(image, 2.0)
			
 
				+    image = tf.subtract(image, 0.5)
			
 
				+    image = tf.multiply(image, 2.0)
			
 
				     return image
			
 
				 
			
 
				 
			
--- a/slim/preprocessing/lenet_preprocessing.py
+++ b/slim/preprocessing/lenet_preprocessing.py
@@ -39,6 +39,6 @@ def preprocess_image(image, output_height, output_width, is_training):
 
				   image = tf.to_float(image)
			
 
				   image = tf.image.resize_image_with_crop_or_pad(
			
 
				       image, output_width, output_height)
			
 
				-  image = tf.sub(image, 128.0)
			
 
				+  image = tf.subtract(image, 128.0)
			
 
				   image = tf.div(image, 128.0)
			
 
				   return image
			
--- a/slim/preprocessing/vgg_preprocessing.py
+++ b/slim/preprocessing/vgg_preprocessing.py
@@ -73,7 +73,7 @@ def _crop(image, offset_height, offset_width, crop_height, crop_width):
 
				       ['Rank of image must be equal to 3.'])
			
 
				   cropped_shape = control_flow_ops.with_dependencies(
			
 
				       [rank_assertion],
			
 
				-      tf.pack([crop_height, crop_width, original_shape[2]]))
			
 
				+      tf.stack([crop_height, crop_width, original_shape[2]]))
			
 
				 
			
 
				   size_assertion = tf.Assert(
			
 
				       tf.logical_and(
			
@@ -81,7 +81,7 @@ def _crop(image, offset_height, offset_width, crop_height, crop_width):
 
				           tf.greater_equal(original_shape[1], crop_width)),
			
 
				       ['Crop size greater than the image size.'])
			
 
				 
			
 
				-  offsets = tf.to_int32(tf.pack([offset_height, offset_width, 0]))
			
 
				+  offsets = tf.to_int32(tf.stack([offset_height, offset_width, 0]))
			
 
				 
			
 
				   # Use tf.slice instead of crop_to_bounding box as it accepts tensors to
			
 
				   # define the crop size.
			
@@ -227,10 +227,10 @@ def _mean_image_subtraction(image, means):
 
				   if len(means) != num_channels:
			
 
				     raise ValueError('len(means) must match the number of channels')
			
 
				 
			
 
				-  channels = tf.split(2, num_channels, image)
			
 
				+  channels = tf.split(axis=2, num_or_size_splits=num_channels, value=image)
			
 
				   for i in range(num_channels):
			
 
				     channels[i] -= means[i]
			
 
				-  return tf.concat(2, channels)
			
 
				+  return tf.concat(axis=2, values=channels)
			
 
				 
			
 
				 
			
 
				 def _smallest_size_at_least(height, width, smallest_side):
			
--- a/slim/train_image_classifier.py
+++ b/slim/train_image_classifier.py
@@ -316,8 +316,8 @@ def _configure_optimizer(learning_rate):
 
				 def _add_variables_summaries(learning_rate):
			
 
				   summaries = []
			
 
				   for variable in slim.get_model_variables():
			
 
				-    summaries.append(tf.histogram_summary(variable.op.name, variable))
			
 
				-  summaries.append(tf.scalar_summary('training/Learning Rate', learning_rate))
			
 
				+    summaries.append(tf.summary.histogram(variable.op.name, variable))
			
 
				+  summaries.append(tf.summary.scalar('training/Learning Rate', learning_rate))
			
 
				   return summaries
			
 
				 
			
 
				 
			
@@ -489,17 +489,17 @@ def main(_):
 
				     end_points = clones[0].outputs
			
 
				     for end_point in end_points:
			
 
				       x = end_points[end_point]
			
 
				-      summaries.add(tf.histogram_summary('activations/' + end_point, x))
			
 
				-      summaries.add(tf.scalar_summary('sparsity/' + end_point,
			
 
				+      summaries.add(tf.summary.histogram('activations/' + end_point, x))
			
 
				+      summaries.add(tf.summary.scalar('sparsity/' + end_point,
			
 
				                                       tf.nn.zero_fraction(x)))
			
 
				 
			
 
				     # Add summaries for losses.
			
 
				     for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
			
 
				-      summaries.add(tf.scalar_summary('losses/%s' % loss.op.name, loss))
			
 
				+      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))
			
 
				 
			
 
				     # Add summaries for variables.
			
 
				     for variable in slim.get_model_variables():
			
 
				-      summaries.add(tf.histogram_summary(variable.op.name, variable))
			
 
				+      summaries.add(tf.summary.histogram(variable.op.name, variable))
			
 
				 
			
 
				     #################################
			
 
				     # Configure the moving averages #
			
@@ -517,7 +517,7 @@ def main(_):
 
				     with tf.device(deploy_config.optimizer_device()):
			
 
				       learning_rate = _configure_learning_rate(dataset.num_samples, global_step)
			
 
				       optimizer = _configure_optimizer(learning_rate)
			
 
				-      summaries.add(tf.scalar_summary('learning_rate', learning_rate,
			
 
				+      summaries.add(tf.summary.scalar('learning_rate', learning_rate,
			
 
				                                       name='learning_rate'))
			
 
				 
			
 
				     if FLAGS.sync_replicas:
			
@@ -543,7 +543,7 @@ def main(_):
 
				         optimizer,
			
 
				         var_list=variables_to_train)
			
 
				     # Add total_loss to summary.
			
 
				-    summaries.add(tf.scalar_summary('total_loss', total_loss,
			
 
				+    summaries.add(tf.summary.scalar('total_loss', total_loss,
			
 
				                                     name='total_loss'))
			
 
				 
			
 
				     # Create gradient updates.
			
@@ -561,7 +561,7 @@ def main(_):
 
				                                        first_clone_scope))
			
 
				 
			
 
				     # Merge all summaries together.
			
 
				-    summary_op = tf.merge_summary(list(summaries), name='summary_op')
			
 
				+    summary_op = tf.summary.merge(list(summaries), name='summary_op')
			
 
				 
			
 
				 
			
 
				     ###########################
			
--- a/street/python/nn_ops.py
+++ b/street/python/nn_ops.py
@@ -92,7 +92,7 @@ def rnn_helper(inp,
 
				     elif direction == "backward":
			
 
				       out = backward
			
 
				     else:
			
 
				-      out = tf.concat(2, [forward, backward])
			
 
				+      out = tf.concat(axis=2, values=[forward, backward])
			
 
				   return out
			
 
				 
			
 
				 
			
@@ -183,7 +183,7 @@ def lstm_layer(inp,
 
				   with tf.variable_scope(name):
			
 
				     if backward:
			
 
				       if length is None:
			
 
				-        inp = tf.reverse(inp, [False, True, False])
			
 
				+        inp = tf.reverse(inp, [1])
			
 
				       else:
			
 
				         inp = tf.reverse_sequence(inp, length, 1, 0)
			
 
				 
			
@@ -217,14 +217,14 @@ def lstm_layer(inp,
 
				 
			
 
				     batch_size = shapes.tensor_dim(inp, dim=0)
			
 
				     num_frames = shapes.tensor_dim(inp, dim=1)
			
 
				-    prev = tf.reshape(inp, tf.pack([batch_size * num_frames, num_prev]))
			
 
				+    prev = tf.reshape(inp, tf.stack([batch_size * num_frames, num_prev]))
			
 
				 
			
 
				     if use_native_weights:
			
 
				       with tf.variable_scope("LSTMCell"):
			
 
				         b = tf.get_variable(
			
 
				             "B",
			
 
				             shape=[4 * num_nodes],
			
 
				-            initializer=tf.zeros_initializer,
			
 
				+            initializer=tf.zeros_initializer(),
			
 
				             dtype=tf.float32)
			
 
				       biases = tf.identity(b, name="biases")
			
 
				     else:
			
@@ -236,17 +236,17 @@ def lstm_layer(inp,
 
				               biases, name="biases_reg"))
			
 
				     prev = tf.nn.xw_plus_b(prev, w_i_m, biases)
			
 
				 
			
 
				-    prev = tf.reshape(prev, tf.pack([batch_size, num_frames, 4, num_nodes]))
			
 
				+    prev = tf.reshape(prev, tf.stack([batch_size, num_frames, 4, num_nodes]))
			
 
				     if state is None:
			
 
				-      state = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)
			
 
				+      state = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)
			
 
				     if memory is None:
			
 
				-      memory = tf.fill(tf.pack([batch_size, num_nodes]), 0.0)
			
 
				+      memory = tf.fill(tf.stack([batch_size, num_nodes]), 0.0)
			
 
				 
			
 
				     out, _, mem = rnn.variable_lstm(prev, state, memory, w_m_m, clip=clip)
			
 
				 
			
 
				     if backward:
			
 
				       if length is None:
			
 
				-        out = tf.reverse(out, [False, True, False])
			
 
				+        out = tf.reverse(out, [1])
			
 
				       else:
			
 
				         out = tf.reverse_sequence(out, length, 1, 0)
			
 
				 
			
--- a/street/python/vgsl_input.py
+++ b/street/python/vgsl_input.py
@@ -79,7 +79,7 @@ def ImageInput(input_pattern, num_threads, shape, using_ctc, reader=None):
 
				   # Give the images a nice name as well.
			
 
				   images = tf.identity(images, name='Images')
			
 
				 
			
 
				-  tf.image_summary('Images', images)
			
 
				+  tf.summary.image('Images', images)
			
 
				   return images, heights, widths, labels, sparse_labels, truths
			
 
				 
			
 
				 
			
@@ -145,6 +145,6 @@ def _ImageProcessing(image_buffer, shape):
 
				   image = tf.image.decode_png(image_buffer, channels=shape.depth)
			
 
				   image.set_shape([shape.height, shape.width, shape.depth])
			
 
				   image = tf.cast(image, tf.float32)
			
 
				-  image = tf.sub(image, 128.0)
			
 
				-  image = tf.mul(image, 1 / 100.0)
			
 
				+  image = tf.subtract(image, 128.0)
			
 
				+  image = tf.multiply(image, 1 / 100.0)
			
 
				   return image
			
--- a/street/python/vgsl_model.py
+++ b/street/python/vgsl_model.py
@@ -147,7 +147,7 @@ def Eval(train_dir,
 
				       sequence_error=None)
			
 
				   with tf.Graph().as_default():
			
 
				     model = InitNetwork(eval_data, model_str, 'eval', reader=reader)
			
 
				-    sw = tf.train.SummaryWriter(eval_dir)
			
 
				+    sw = tf.summary.FileWriter(eval_dir)
			
 
				 
			
 
				     while True:
			
 
				       sess = tf.Session('')
			
@@ -369,7 +369,7 @@ class VGSLImageModel(object):
 
				     if self.mode == 'train':
			
 
				       # Setup loss for training.
			
 
				       self.loss = self._AddLossFunction(logits, height_in, out_dims, out_func)
			
 
				-      tf.scalar_summary('loss', self.loss, name='loss')
			
 
				+      tf.summary.scalar('loss', self.loss, name='loss')
			
 
				     elif out_dims == 0:
			
 
				       # Be sure the labels match the output, even in eval mode.
			
 
				       self.labels = tf.slice(self.labels, [0, 0], [-1, 1])
			
@@ -484,7 +484,7 @@ class VGSLImageModel(object):
 
				       opt = tf.train.AdamOptimizer(learning_rate=learn_rate_dec)
			
 
				     else:
			
 
				       raise ValueError('Invalid optimizer type: ' + optimizer_type)
			
 
				-    tf.scalar_summary('learn_rate', learn_rate_dec, name='lr_summ')
			
 
				+    tf.summary.scalar('learn_rate', learn_rate_dec, name='lr_summ')
			
 
				 
			
 
				     self.train_op = opt.minimize(
			
 
				         self.loss, global_step=self.global_step, name='train')
			
--- a/street/python/vgslspecs.py
+++ b/street/python/vgslspecs.py
@@ -149,7 +149,7 @@ class VGSLSpecs(object):
 
				     else:
			
 
				       lengths = tf.ones_like(lengths)
			
 
				     if factor != 1:
			
 
				-      lengths = tf.mul(lengths, tf.cast(factor, tf.float32))
			
 
				+      lengths = tf.multiply(lengths, tf.cast(factor, tf.float32))
			
 
				     return tf.cast(lengths, tf.int32)
			
 
				 
			
 
				   def BuildFromString(self, prev_layer, index):
			
@@ -235,7 +235,7 @@ class VGSLSpecs(object):
 
				         final_factors = self.reduction_factors
			
 
				     if index == len(self.model_str):
			
 
				       raise ValueError('Missing ) at end of parallel!' + self.model_str)
			
 
				-    return tf.concat(num_dims - 1, layers), index + 1
			
 
				+    return tf.concat(axis=num_dims - 1, values=layers), index + 1
			
 
				 
			
 
				   def AddConvLayer(self, prev_layer, index):
			
 
				     """Add a single standard convolutional layer.
			
@@ -342,7 +342,7 @@ class VGSLSpecs(object):
 
				         factor1 = tf.cast(self.reduction_factors[i], tf.float32)
			
 
				         factor2 = tf.cast(prev_shape[i], tf.float32)
			
 
				         divisor = tf.cast(result_shape[i], tf.float32)
			
 
				-        self.reduction_factors[i] = tf.div(tf.mul(factor1, factor2), divisor)
			
 
				+        self.reduction_factors[i] = tf.div(tf.multiply(factor1, factor2), divisor)
			
 
				     return layer, m.end()
			
 
				 
			
 
				   def AddFCLayer(self, prev_layer, index):
			
@@ -401,7 +401,7 @@ class VGSLSpecs(object):
 
				                             name + '_forward')
			
 
				       back = self._LSTMLayer(prev_layer, 'backward', dim, True, depth,
			
 
				                              name + '_reverse')
			
 
				-      return tf.concat(3, [fwd, back], name=name + '_concat'), m.end()
			
 
				+      return tf.concat(axis=3, values=[fwd, back], name=name + '_concat'), m.end()
			
 
				     if direction == 'f':
			
 
				       direction = 'forward'
			
 
				     elif direction == 'r':
			
--- a/swivel/glove_to_shards.py
+++ b/swivel/glove_to_shards.py
--- a/swivel/nearest.py
+++ b/swivel/nearest.py
--- a/swivel/prep.py
+++ b/swivel/prep.py
--- a/swivel/swivel.py
+++ b/swivel/swivel.py
@@ -135,8 +135,8 @@ def count_matrix_input(filenames, submatrix_rows, submatrix_cols):
 
				   sparse_local_col = features['sparse_local_col'].values
			
 
				   sparse_count = features['sparse_value'].values
			
 
				 
			
 
				-  sparse_indices = tf.concat([tf.expand_dims(sparse_local_row, 1),
			
 
				-                              tf.expand_dims(sparse_local_col, 1)], 1)
			
 
				+  sparse_indices = tf.concat(axis=[tf.expand_dims(sparse_local_row, 1),
			
 
				+                              tf.expand_dims(sparse_local_col, 1)], values=1)
			
 
				   count = tf.sparse_to_dense(sparse_indices, [submatrix_rows, submatrix_cols],
			
 
				                              sparse_count)
			
 
				 
			
--- a/swivel/text2bin.py
+++ b/swivel/text2bin.py
--- a/swivel/wordsim.py
+++ b/swivel/wordsim.py
--- a/syntaxnet/syntaxnet/graph_builder.py
+++ b/syntaxnet/syntaxnet/graph_builder.py
@@ -69,7 +69,7 @@ def EmbeddingLookupFeatures(params, sparse_features, allow_weights):
 
				 
			
 
				   if allow_weights:
			
 
				     # Multiply by weights, reshaping to allow broadcast.
			
 
				-    broadcast_weights_shape = tf.concat([tf.shape(weights), [1]], 0)
			
 
				+    broadcast_weights_shape = tf.concat(axis=[tf.shape(weights), [1]], values=0)
			
 
				     embeddings *= tf.reshape(weights, broadcast_weights_shape)
			
 
				 
			
 
				   # Sum embeddings by index.
			
@@ -330,7 +330,7 @@ class GreedyParser(object):
 
				                                            i,
			
 
				                                            return_average=return_average))
			
 
				 
			
 
				-    last_layer = tf.concat(embeddings, 1)
			
 
				+    last_layer = tf.concat(axis=embeddings, values=1)
			
 
				     last_layer_size = self.embedding_size
			
 
				 
			
 
				     # Create ReLU layers.
			
--- a/textsum/seq2seq_attention.py
+++ b/textsum/seq2seq_attention.py
@@ -86,7 +86,7 @@ def _Train(model, data_batcher):
 
				     saver = tf.train.Saver()
			
 
				     # Train dir is different from log_root to avoid summary directory
			
 
				     # conflict with Supervisor.
			
 
				-    summary_writer = tf.train.SummaryWriter(FLAGS.train_dir)
			
 
				+    summary_writer = tf.summary.FileWriter(FLAGS.train_dir)
			
 
				     sv = tf.train.Supervisor(logdir=FLAGS.log_root,
			
 
				                              is_chief=True,
			
 
				                              saver=saver,
			
@@ -119,7 +119,7 @@ def _Eval(model, data_batcher, vocab=None):
 
				   """Runs model eval."""
			
 
				   model.build_graph()
			
 
				   saver = tf.train.Saver()
			
 
				-  summary_writer = tf.train.SummaryWriter(FLAGS.eval_dir)
			
 
				+  summary_writer = tf.summary.FileWriter(FLAGS.eval_dir)
			
 
				   sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
			
 
				   running_avg_loss = 0
			
 
				   step = 0
			
--- a/textsum/seq2seq_attention_model.py
+++ b/textsum/seq2seq_attention_model.py
@@ -139,10 +139,10 @@ class Seq2SeqAttentionModel(object):
 
				     vsize = self._vocab.NumIds()
			
 
				 
			
 
				     with tf.variable_scope('seq2seq'):
			
 
				-      encoder_inputs = tf.unpack(tf.transpose(self._articles))
			
 
				-      decoder_inputs = tf.unpack(tf.transpose(self._abstracts))
			
 
				-      targets = tf.unpack(tf.transpose(self._targets))
			
 
				-      loss_weights = tf.unpack(tf.transpose(self._loss_weights))
			
 
				+      encoder_inputs = tf.unstack(tf.transpose(self._articles))
			
 
				+      decoder_inputs = tf.unstack(tf.transpose(self._abstracts))
			
 
				+      targets = tf.unstack(tf.transpose(self._targets))
			
 
				+      loss_weights = tf.unstack(tf.transpose(self._loss_weights))
			
 
				       article_lens = self._article_lens
			
 
				 
			
 
				       # Embedding shared by the input and outputs.
			
@@ -195,7 +195,7 @@ class Seq2SeqAttentionModel(object):
 
				 
			
 
				         encoder_outputs = [tf.reshape(x, [hps.batch_size, 1, 2*hps.num_hidden])
			
 
				                            for x in encoder_outputs]
			
 
				-        self._enc_top_states = tf.concat(1, encoder_outputs)
			
 
				+        self._enc_top_states = tf.concat(axis=1, values=encoder_outputs)
			
 
				         self._dec_in_state = fw_state
			
 
				         # During decoding, follow up _dec_in_state are fed from beam_search.
			
 
				         # dec_out_state are stored by beam_search for next step feeding.
			
@@ -218,7 +218,7 @@ class Seq2SeqAttentionModel(object):
 
				           best_outputs = [tf.argmax(x, 1) for x in model_outputs]
			
 
				           tf.logging.info('best_outputs%s', best_outputs[0].get_shape())
			
 
				           self._outputs = tf.concat(
			
 
				-              1, [tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs])
			
 
				+              axis=1, values=[tf.reshape(x, [hps.batch_size, 1]) for x in best_outputs])
			
 
				 
			
 
				           self._topk_log_probs, self._topk_ids = tf.nn.top_k(
			
 
				               tf.log(tf.nn.softmax(model_outputs[-1])), hps.batch_size*2)
			
@@ -236,7 +236,7 @@ class Seq2SeqAttentionModel(object):
 
				         else:
			
 
				           self._loss = tf.nn.seq2seq.sequence_loss(
			
 
				               model_outputs, targets, loss_weights)
			
 
				-        tf.scalar_summary('loss', tf.minimum(12.0, self._loss))
			
 
				+        tf.summary.scalar('loss', tf.minimum(12.0, self._loss))
			
 
				 
			
 
				   def _add_train_op(self):
			
 
				     """Sets self._train_op, op to run for training."""
			
@@ -250,9 +250,9 @@ class Seq2SeqAttentionModel(object):
 
				     with tf.device(self._get_gpu(self._num_gpus-1)):
			
 
				       grads, global_norm = tf.clip_by_global_norm(
			
 
				           tf.gradients(self._loss, tvars), hps.max_grad_norm)
			
 
				-    tf.scalar_summary('global_norm', global_norm)
			
 
				+    tf.summary.scalar('global_norm', global_norm)
			
 
				     optimizer = tf.train.GradientDescentOptimizer(self._lr_rate)
			
 
				-    tf.scalar_summary('learning rate', self._lr_rate)
			
 
				+    tf.summary.scalar('learning rate', self._lr_rate)
			
 
				     self._train_op = optimizer.apply_gradients(
			
 
				         zip(grads, tvars), global_step=self.global_step, name='train_step')
			
 
				 
			
@@ -296,4 +296,4 @@ class Seq2SeqAttentionModel(object):
 
				     self.global_step = tf.Variable(0, name='global_step', trainable=False)
			
 
				     if self._hps.mode == 'train':
			
 
				       self._add_train_op()
			
 
				-    self._summaries = tf.merge_all_summaries()
			
 
				+    self._summaries = tf.summary.merge_all()
			
--- a/textsum/seq2seq_lib.py
+++ b/textsum/seq2seq_lib.py
@@ -127,7 +127,7 @@ def linear(args, output_size, bias, bias_start=0.0, scope=None):
 
				     if len(args) == 1:
			
 
				       res = tf.matmul(args[0], matrix)
			
 
				     else:
			
 
				-      res = tf.matmul(tf.concat(1, args), matrix)
			
 
				+      res = tf.matmul(tf.concat(axis=1, values=args), matrix)
			
 
				     if not bias:
			
 
				       return res
			
 
				     bias_term = tf.get_variable(
			
--- a/transformer/spatial_transformer.py
+++ b/transformer/spatial_transformer.py
@@ -53,7 +53,7 @@ def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 
				     def _repeat(x, n_repeats):
			
 
				         with tf.variable_scope('_repeat'):
			
 
				             rep = tf.transpose(
			
 
				-                tf.expand_dims(tf.ones(shape=tf.pack([n_repeats, ])), 1), [1, 0])
			
 
				+                tf.expand_dims(tf.ones(shape=tf.stack([n_repeats, ])), 1), [1, 0])
			
 
				             rep = tf.cast(rep, 'int32')
			
 
				             x = tf.matmul(tf.reshape(x, (-1, 1)), rep)
			
 
				             return tf.reshape(x, [-1])
			
@@ -102,7 +102,7 @@ def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 
				 
			
 
				             # use indices to lookup pixels in the flat image and restore
			
 
				             # channels dim
			
 
				-            im_flat = tf.reshape(im, tf.pack([-1, channels]))
			
 
				+            im_flat = tf.reshape(im, tf.stack([-1, channels]))
			
 
				             im_flat = tf.cast(im_flat, 'float32')
			
 
				             Ia = tf.gather(im_flat, idx_a)
			
 
				             Ib = tf.gather(im_flat, idx_b)
			
@@ -128,16 +128,16 @@ def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 
				             #                         np.linspace(-1, 1, height))
			
 
				             #  ones = np.ones(np.prod(x_t.shape))
			
 
				             #  grid = np.vstack([x_t.flatten(), y_t.flatten(), ones])
			
 
				-            x_t = tf.matmul(tf.ones(shape=tf.pack([height, 1])),
			
 
				+            x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])),
			
 
				                             tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0]))
			
 
				             y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1),
			
 
				-                            tf.ones(shape=tf.pack([1, width])))
			
 
				+                            tf.ones(shape=tf.stack([1, width])))
			
 
				 
			
 
				             x_t_flat = tf.reshape(x_t, (1, -1))
			
 
				             y_t_flat = tf.reshape(y_t, (1, -1))
			
 
				 
			
 
				             ones = tf.ones_like(x_t_flat)
			
 
				-            grid = tf.concat(0, [x_t_flat, y_t_flat, ones])
			
 
				+            grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones])
			
 
				             return grid
			
 
				 
			
 
				     def _transform(theta, input_dim, out_size):
			
@@ -157,11 +157,11 @@ def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 
				             grid = _meshgrid(out_height, out_width)
			
 
				             grid = tf.expand_dims(grid, 0)
			
 
				             grid = tf.reshape(grid, [-1])
			
 
				-            grid = tf.tile(grid, tf.pack([num_batch]))
			
 
				-            grid = tf.reshape(grid, tf.pack([num_batch, 3, -1]))
			
 
				+            grid = tf.tile(grid, tf.stack([num_batch]))
			
 
				+            grid = tf.reshape(grid, tf.stack([num_batch, 3, -1]))
			
 
				 
			
 
				             # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s)
			
 
				-            T_g = tf.batch_matmul(theta, grid)
			
 
				+            T_g = tf.matmul(theta, grid)
			
 
				             x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1])
			
 
				             y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1])
			
 
				             x_s_flat = tf.reshape(x_s, [-1])
			
@@ -172,7 +172,7 @@ def transformer(U, theta, out_size, name='SpatialTransformer', **kwargs):
 
				                 out_size)
			
 
				 
			
 
				             output = tf.reshape(
			
 
				-                input_transformed, tf.pack([num_batch, out_height, out_width, num_channels]))
			
 
				+                input_transformed, tf.stack([num_batch, out_height, out_width, num_channels]))
			
 
				             return output
			
 
				 
			
 
				     with tf.variable_scope(name):
			
--- a/tutorials/embedding/word2vec.py
+++ b/tutorials/embedding/word2vec.py
@@ -246,7 +246,7 @@ class Word2Vec(object):
 
				     sampled_b = tf.nn.embedding_lookup(sm_b, sampled_ids)
			
 
				 
			
 
				     # True logits: [batch_size, 1]
			
 
				-    true_logits = tf.reduce_sum(tf.mul(example_emb, true_w), 1) + true_b
			
 
				+    true_logits = tf.reduce_sum(tf.multiply(example_emb, true_w), 1) + true_b
			
 
				 
			
 
				     # Sampled logits: [batch_size, num_sampled]
			
 
				     # We replicate sampled noise labels for all examples in the batch
			
--- a/tutorials/image/cifar10/cifar10_multi_gpu_train.py
+++ b/tutorials/image/cifar10/cifar10_multi_gpu_train.py
@@ -124,7 +124,7 @@ def average_gradients(tower_grads):
 
				       grads.append(expanded_g)
			
 
				 
			
 
				     # Average over the 'tower' dimension.
			
 
				-    grad = tf.concat(grads, 0)
			
 
				+    grad = tf.concat(axis=grads, values=0)
			
 
				     grad = tf.reduce_mean(grad, 0)
			
 
				 
			
 
				     # Keep in mind that the Variables are redundant because they are shared
			
--- a/tutorials/rnn/ptb/ptb_word_lm.py
+++ b/tutorials/rnn/ptb/ptb_word_lm.py
@@ -146,7 +146,7 @@ class PTBModel(object):
 
				         (cell_output, state) = cell(inputs[:, time_step, :], state)
			
 
				         outputs.append(cell_output)
			
 
				 
			
 
				-    output = tf.reshape(tf.concat(outputs, 1), [-1, size])
			
 
				+    output = tf.reshape(tf.concat(axis=outputs, values=1), [-1, size])
			
 
				     softmax_w = tf.get_variable(
			
 
				         "softmax_w", [size, vocab_size], dtype=data_type())
			
 
				     softmax_b = tf.get_variable("softmax_b", [vocab_size], dtype=data_type())
			
--- a/video_prediction/lstm_ops.py
+++ b/video_prediction/lstm_ops.py
@@ -23,7 +23,7 @@ from tensorflow.contrib.slim import layers
 
				 
			
 
				 def init_state(inputs,
			
 
				                state_shape,
			
 
				-               state_initializer=tf.zeros_initializer,
			
 
				+               state_initializer=tf.zeros_initializer(),
			
 
				                dtype=tf.float32):
			
 
				   """Helper function to create an initial state given inputs.
			
 
				 
			
@@ -45,7 +45,7 @@ def init_state(inputs,
 
				     batch_size = 0
			
 
				 
			
 
				   initial_state = state_initializer(
			
 
				-      tf.pack([batch_size] + state_shape),
			
 
				+      tf.stack([batch_size] + state_shape),
			
 
				       dtype=dtype)
			
 
				   initial_state.set_shape([inferred_batch_size] + state_shape)
			
 
				 
			
@@ -89,8 +89,8 @@ def basic_conv_lstm_cell(inputs,
 
				                          reuse=reuse):
			
 
				     inputs.get_shape().assert_has_rank(4)
			
 
				     state.get_shape().assert_has_rank(4)
			
 
				-    c, h = tf.split(3, 2, state)
			
 
				-    inputs_h = tf.concat(3, [inputs, h])
			
 
				+    c, h = tf.split(axis=3, num_or_size_splits=2, value=state)
			
 
				+    inputs_h = tf.concat(axis=3, values=[inputs, h])
			
 
				     # Parameters of gates are concatenated into one conv for efficiency.
			
 
				     i_j_f_o = layers.conv2d(inputs_h,
			
 
				                             4 * num_channels, [filter_size, filter_size],
			
@@ -99,12 +99,12 @@ def basic_conv_lstm_cell(inputs,
 
				                             scope='Gates')
			
 
				 
			
 
				     # i = input_gate, j = new_input, f = forget_gate, o = output_gate
			
 
				-    i, j, f, o = tf.split(3, 4, i_j_f_o)
			
 
				+    i, j, f, o = tf.split(axis=3, num_or_size_splits=4, value=i_j_f_o)
			
 
				 
			
 
				     new_c = c * tf.sigmoid(f + forget_bias) + tf.sigmoid(i) * tf.tanh(j)
			
 
				     new_h = tf.tanh(new_c) * tf.sigmoid(o)
			
 
				 
			
 
				-    return new_h, tf.concat(3, [new_c, new_h])
			
 
				+    return new_h, tf.concat(axis=3, values=[new_c, new_h])
			
 
				 
			
 
				 
			
 
				 
			
--- a/video_prediction/prediction_input.py
+++ b/video_prediction/prediction_input.py
@@ -97,11 +97,11 @@ def build_tfrecord_input(training=True):
 
				       action = tf.reshape(features[action_name], shape=[1, STATE_DIM])
			
 
				       action_seq.append(action)
			
 
				 
			
 
				-  image_seq = tf.concat(0, image_seq)
			
 
				+  image_seq = tf.concat(axis=0, values=image_seq)
			
 
				 
			
 
				   if FLAGS.use_state:
			
 
				-    state_seq = tf.concat(0, state_seq)
			
 
				-    action_seq = tf.concat(0, action_seq)
			
 
				+    state_seq = tf.concat(axis=0, values=state_seq)
			
 
				+    action_seq = tf.concat(axis=0, values=action_seq)
			
 
				     [image_batch, action_batch, state_batch] = tf.train.batch(
			
 
				         [image_seq, action_seq, state_seq],
			
 
				         FLAGS.batch_size,
			
--- a/video_prediction/prediction_model.py
+++ b/video_prediction/prediction_model.py
@@ -109,7 +109,7 @@ def construct_model(images,
 
				         prev_image = image
			
 
				 
			
 
				       # Predicted state is always fed back in
			
 
				-      state_action = tf.concat(1, [action, current_state])
			
 
				+      state_action = tf.concat(axis=1, values=[action, current_state])
			
 
				 
			
 
				       enc0 = slim.layers.conv2d(
			
 
				           prev_image,
			
@@ -144,7 +144,7 @@ def construct_model(images,
 
				       smear = tf.tile(
			
 
				           smear, [1, int(enc2.get_shape()[1]), int(enc2.get_shape()[2]), 1])
			
 
				       if use_state:
			
 
				-        enc2 = tf.concat(3, [enc2, smear])
			
 
				+        enc2 = tf.concat(axis=3, values=[enc2, smear])
			
 
				       enc3 = slim.layers.conv2d(
			
 
				           enc2, hidden4.get_shape()[3], [1, 1], stride=1, scope='conv4')
			
 
				 
			
@@ -158,7 +158,7 @@ def construct_model(images,
 
				           enc4, lstm_state6, lstm_size[5], scope='state6')  # 16x16
			
 
				       hidden6 = tf_layers.layer_norm(hidden6, scope='layer_norm7')
			
 
				       # Skip connection.
			
 
				-      hidden6 = tf.concat(3, [hidden6, enc1])  # both 16x16
			
 
				+      hidden6 = tf.concat(axis=3, values=[hidden6, enc1])  # both 16x16
			
 
				 
			
 
				       enc5 = slim.layers.conv2d_transpose(
			
 
				           hidden6, hidden6.get_shape()[3], 3, stride=2, scope='convt2')
			
@@ -167,7 +167,7 @@ def construct_model(images,
 
				       hidden7 = tf_layers.layer_norm(hidden7, scope='layer_norm8')
			
 
				 
			
 
				       # Skip connection.
			
 
				-      hidden7 = tf.concat(3, [hidden7, enc0])  # both 32x32
			
 
				+      hidden7 = tf.concat(axis=3, values=[hidden7, enc0])  # both 32x32
			
 
				 
			
 
				       enc6 = slim.layers.conv2d_transpose(
			
 
				           hidden7,
			
@@ -207,7 +207,7 @@ def construct_model(images,
 
				       masks = tf.reshape(
			
 
				           tf.nn.softmax(tf.reshape(masks, [-1, num_masks + 1])),
			
 
				           [int(batch_size), int(img_height), int(img_width), num_masks + 1])
			
 
				-      mask_list = tf.split(3, num_masks + 1, masks)
			
 
				+      mask_list = tf.split(axis=3, num_or_size_splits=num_masks + 1, value=masks)
			
 
				       output = mask_list[0] * prev_image
			
 
				       for layer, mask in zip(transformed, mask_list[1:]):
			
 
				         output += layer * mask
			
@@ -277,8 +277,8 @@ def cdna_transformation(prev_image, cdna_input, num_masks, color_channels):
 
				   cdna_kerns /= norm_factor
			
 
				 
			
 
				   cdna_kerns = tf.tile(cdna_kerns, [1, 1, 1, color_channels, 1])
			
 
				-  cdna_kerns = tf.split(0, batch_size, cdna_kerns)
			
 
				-  prev_images = tf.split(0, batch_size, prev_image)
			
 
				+  cdna_kerns = tf.split(axis=0, num_or_size_splits=batch_size, value=cdna_kerns)
			
 
				+  prev_images = tf.split(axis=0, num_or_size_splits=batch_size, value=prev_image)
			
 
				 
			
 
				   # Transform image.
			
 
				   transformed = []
			
@@ -288,8 +288,8 @@ def cdna_transformation(prev_image, cdna_input, num_masks, color_channels):
 
				       kernel = tf.expand_dims(kernel, -1)
			
 
				     transformed.append(
			
 
				         tf.nn.depthwise_conv2d(preimg, kernel, [1, 1, 1, 1], 'SAME'))
			
 
				-  transformed = tf.concat(0, transformed)
			
 
				-  transformed = tf.split(3, num_masks, transformed)
			
 
				+  transformed = tf.concat(axis=0, values=transformed)
			
 
				+  transformed = tf.split(axis=3, num_or_size_splits=num_masks, value=transformed)
			
 
				   return transformed
			
 
				 
			
 
				 
			
@@ -314,7 +314,7 @@ def dna_transformation(prev_image, dna_input):
 
				           tf.expand_dims(
			
 
				               tf.slice(prev_image_pad, [0, xkern, ykern, 0],
			
 
				                        [-1, image_height, image_width, -1]), [3]))
			
 
				-  inputs = tf.concat(3, inputs)
			
 
				+  inputs = tf.concat(axis=3, values=inputs)
			
 
				 
			
 
				   # Normalize channels to 1.
			
 
				   kernel = tf.nn.relu(dna_input - RELU_SHIFT) + RELU_SHIFT
			
--- a/video_prediction/prediction_train.py
+++ b/video_prediction/prediction_train.py
@@ -113,11 +113,11 @@ class Model(object):
 
				     summaries = []
			
 
				 
			
 
				     # Split into timesteps.
			
 
				-    actions = tf.split(1, actions.get_shape()[1], actions)
			
 
				+    actions = tf.split(axis=1, num_or_size_splits=actions.get_shape()[1], value=actions)
			
 
				     actions = [tf.squeeze(act) for act in actions]
			
 
				-    states = tf.split(1, states.get_shape()[1], states)
			
 
				+    states = tf.split(axis=1, num_or_size_splits=states.get_shape()[1], value=states)
			
 
				     states = [tf.squeeze(st) for st in states]
			
 
				-    images = tf.split(1, images.get_shape()[1], images)
			
 
				+    images = tf.split(axis=1, num_or_size_splits=images.get_shape()[1], value=images)
			
 
				     images = [tf.squeeze(img) for img in images]
			
 
				 
			
 
				     if reuse_scope is None:
			
@@ -157,8 +157,8 @@ class Model(object):
 
				       psnr_i = peak_signal_to_noise_ratio(x, gx)
			
 
				       psnr_all += psnr_i
			
 
				       summaries.append(
			
 
				-          tf.scalar_summary(prefix + '_recon_cost' + str(i), recon_cost))
			
 
				-      summaries.append(tf.scalar_summary(prefix + '_psnr' + str(i), psnr_i))
			
 
				+          tf.summary.scalar(prefix + '_recon_cost' + str(i), recon_cost))
			
 
				+      summaries.append(tf.summary.scalar(prefix + '_psnr' + str(i), psnr_i))
			
 
				       loss += recon_cost
			
 
				 
			
 
				     for i, state, gen_state in zip(
			
@@ -166,19 +166,19 @@ class Model(object):
 
				         gen_states[FLAGS.context_frames - 1:]):
			
 
				       state_cost = mean_squared_error(state, gen_state) * 1e-4
			
 
				       summaries.append(
			
 
				-          tf.scalar_summary(prefix + '_state_cost' + str(i), state_cost))
			
 
				+          tf.summary.scalar(prefix + '_state_cost' + str(i), state_cost))
			
 
				       loss += state_cost
			
 
				-    summaries.append(tf.scalar_summary(prefix + '_psnr_all', psnr_all))
			
 
				+    summaries.append(tf.summary.scalar(prefix + '_psnr_all', psnr_all))
			
 
				     self.psnr_all = psnr_all
			
 
				 
			
 
				     self.loss = loss = loss / np.float32(len(images) - FLAGS.context_frames)
			
 
				 
			
 
				-    summaries.append(tf.scalar_summary(prefix + '_loss', loss))
			
 
				+    summaries.append(tf.summary.scalar(prefix + '_loss', loss))
			
 
				 
			
 
				     self.lr = tf.placeholder_with_default(FLAGS.learning_rate, ())
			
 
				 
			
 
				     self.train_op = tf.train.AdamOptimizer(self.lr).minimize(loss)
			
 
				-    self.summ_op = tf.merge_summary(summaries)
			
 
				+    self.summ_op = tf.summary.merge(summaries)
			
 
				 
			
 
				 
			
 
				 def main(unused_argv):
			
@@ -200,7 +200,7 @@ def main(unused_argv):
 
				 
			
 
				   # Make training session.
			
 
				   sess = tf.InteractiveSession()
			
 
				-  summary_writer = tf.train.SummaryWriter(
			
 
				+  summary_writer = tf.summary.FileWriter(
			
 
				       FLAGS.event_log_dir, graph=sess.graph, flush_secs=10)
			
 
				 
			
 
				   if FLAGS.pretrained_model: