dp_optimizer.py 9.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Differentially private optimizers.
  16. """
  17. from __future__ import division
  18. import tensorflow as tf
  19. from differential_privacy.dp_sgd.dp_optimizer import utils
  20. from differential_privacy.dp_sgd.per_example_gradients import per_example_gradients
  21. class DPGradientDescentOptimizer(tf.train.GradientDescentOptimizer):
  22. """Differentially private gradient descent optimizer.
  23. """
  24. def __init__(self, learning_rate, eps_delta, sanitizer,
  25. sigma=None, use_locking=False, name="DPGradientDescent",
  26. batches_per_lot=1):
  27. """Construct a differentially private gradient descent optimizer.
  28. The optimizer uses fixed privacy budget for each batch of training.
  29. Args:
  30. learning_rate: for GradientDescentOptimizer.
  31. eps_delta: EpsDelta pair for each epoch.
  32. sanitizer: for sanitizing the graident.
  33. sigma: noise sigma. If None, use eps_delta pair to compute sigma;
  34. otherwise use supplied sigma directly.
  35. use_locking: use locking.
  36. name: name for the object.
  37. batches_per_lot: Number of batches in a lot.
  38. """
  39. super(DPGradientDescentOptimizer, self).__init__(learning_rate,
  40. use_locking, name)
  41. # Also, if needed, define the gradient accumulators
  42. self._batches_per_lot = batches_per_lot
  43. self._grad_accum_dict = {}
  44. if batches_per_lot > 1:
  45. self._batch_count = tf.Variable(1, dtype=tf.int32, trainable=False,
  46. name="batch_count")
  47. var_list = tf.trainable_variables()
  48. with tf.variable_scope("grad_acc_for"):
  49. for var in var_list:
  50. v_grad_accum = tf.Variable(tf.zeros_like(var),
  51. trainable=False,
  52. name=utils.GetTensorOpName(var))
  53. self._grad_accum_dict[var.name] = v_grad_accum
  54. self._eps_delta = eps_delta
  55. self._sanitizer = sanitizer
  56. self._sigma = sigma
  57. def compute_sanitized_gradients(self, loss, var_list=None,
  58. add_noise=True):
  59. """Compute the sanitized gradients.
  60. Args:
  61. loss: the loss tensor.
  62. var_list: the optional variables.
  63. add_noise: if true, then add noise. Always clip.
  64. Returns:
  65. a pair of (list of sanitized gradients) and privacy spending accumulation
  66. operations.
  67. Raises:
  68. TypeError: if var_list contains non-variable.
  69. """
  70. self._assert_valid_dtypes([loss])
  71. xs = [tf.convert_to_tensor(x) for x in var_list]
  72. px_grads = per_example_gradients.PerExampleGradients(loss, xs)
  73. sanitized_grads = []
  74. for px_grad, v in zip(px_grads, var_list):
  75. tensor_name = utils.GetTensorOpName(v)
  76. sanitized_grad = self._sanitizer.sanitize(
  77. px_grad, self._eps_delta, sigma=self._sigma,
  78. tensor_name=tensor_name, add_noise=add_noise,
  79. num_examples=self._batches_per_lot * tf.slice(
  80. tf.shape(px_grad), [0], [1]))
  81. sanitized_grads.append(sanitized_grad)
  82. return sanitized_grads
  83. def minimize(self, loss, global_step=None, var_list=None,
  84. name=None):
  85. """Minimize using sanitized gradients.
  86. This gets a var_list which is the list of trainable variables.
  87. For each var in var_list, we defined a grad_accumulator variable
  88. during init. When batches_per_lot > 1, we accumulate the gradient
  89. update in those. At the end of each lot, we apply the update back to
  90. the variable. This has the effect that for each lot we compute
  91. gradients at the point at the beginning of the lot, and then apply one
  92. update at the end of the lot. In other words, semantically, we are doing
  93. SGD with one lot being the equivalent of one usual batch of size
  94. batch_size * batches_per_lot.
  95. This allows us to simulate larger batches than our memory size would permit.
  96. The lr and the num_steps are in the lot world.
  97. Args:
  98. loss: the loss tensor.
  99. global_step: the optional global step.
  100. var_list: the optional variables.
  101. name: the optional name.
  102. Returns:
  103. the operation that runs one step of DP gradient descent.
  104. """
  105. # First validate the var_list
  106. if var_list is None:
  107. var_list = tf.trainable_variables()
  108. for var in var_list:
  109. if not isinstance(var, tf.Variable):
  110. raise TypeError("Argument is not a variable.Variable: %s" % var)
  111. # Modification: apply gradient once every batches_per_lot many steps.
  112. # This may lead to smaller error
  113. if self._batches_per_lot == 1:
  114. sanitized_grads = self.compute_sanitized_gradients(
  115. loss, var_list=var_list)
  116. grads_and_vars = zip(sanitized_grads, var_list)
  117. self._assert_valid_dtypes([v for g, v in grads_and_vars if g is not None])
  118. apply_grads = self.apply_gradients(grads_and_vars,
  119. global_step=global_step, name=name)
  120. return apply_grads
  121. # Condition for deciding whether to accumulate the gradient
  122. # or actually apply it.
  123. # we use a private self_batch_count to keep track of number of batches.
  124. # global step will count number of lots processed.
  125. update_cond = tf.equal(tf.constant(0),
  126. tf.mod(self._batch_count,
  127. tf.constant(self._batches_per_lot)))
  128. # Things to do for batches other than last of the lot.
  129. # Add non-noisy clipped grads to shadow variables.
  130. def non_last_in_lot_op(loss, var_list):
  131. """Ops to do for a typical batch.
  132. For a batch that is not the last one in the lot, we simply compute the
  133. sanitized gradients and apply them to the grad_acc variables.
  134. Args:
  135. loss: loss function tensor
  136. var_list: list of variables
  137. Returns:
  138. A tensorflow op to do the updates to the gradient accumulators
  139. """
  140. sanitized_grads = self.compute_sanitized_gradients(
  141. loss, var_list=var_list, add_noise=False)
  142. update_ops_list = []
  143. for var, grad in zip(var_list, sanitized_grads):
  144. grad_acc_v = self._grad_accum_dict[var.name]
  145. update_ops_list.append(grad_acc_v.assign_add(grad))
  146. update_ops_list.append(self._batch_count.assign_add(1))
  147. return tf.group(*update_ops_list)
  148. # Things to do for last batch of a lot.
  149. # Add noisy clipped grads to accumulator.
  150. # Apply accumulated grads to vars.
  151. def last_in_lot_op(loss, var_list, global_step):
  152. """Ops to do for last batch in a lot.
  153. For the last batch in the lot, we first add the sanitized gradients to
  154. the gradient acc variables, and then apply these
  155. values over to the original variables (via an apply gradient)
  156. Args:
  157. loss: loss function tensor
  158. var_list: list of variables
  159. global_step: optional global step to be passed to apply_gradients
  160. Returns:
  161. A tensorflow op to push updates from shadow vars to real vars.
  162. """
  163. # We add noise in the last lot. This is why we need this code snippet
  164. # that looks almost identical to the non_last_op case here.
  165. sanitized_grads = self.compute_sanitized_gradients(
  166. loss, var_list=var_list, add_noise=True)
  167. normalized_grads = []
  168. for var, grad in zip(var_list, sanitized_grads):
  169. grad_acc_v = self._grad_accum_dict[var.name]
  170. # To handle the lr difference per lot vs per batch, we divide the
  171. # update by number of batches per lot.
  172. normalized_grad = tf.div(grad_acc_v.assign_add(grad),
  173. tf.to_float(self._batches_per_lot))
  174. normalized_grads.append(normalized_grad)
  175. with tf.control_dependencies(normalized_grads):
  176. grads_and_vars = zip(normalized_grads, var_list)
  177. self._assert_valid_dtypes(
  178. [v for g, v in grads_and_vars if g is not None])
  179. apply_san_grads = self.apply_gradients(grads_and_vars,
  180. global_step=global_step,
  181. name="apply_grads")
  182. # Now reset the accumulators to zero
  183. resets_list = []
  184. with tf.control_dependencies([apply_san_grads]):
  185. for _, acc in self._grad_accum_dict.items():
  186. reset = tf.assign(acc, tf.zeros_like(acc))
  187. resets_list.append(reset)
  188. resets_list.append(self._batch_count.assign_add(1))
  189. last_step_update = tf.group(*([apply_san_grads] + resets_list))
  190. return last_step_update
  191. # pylint: disable=g-long-lambda
  192. update_op = tf.cond(update_cond,
  193. lambda: last_in_lot_op(
  194. loss, var_list,
  195. global_step),
  196. lambda: non_last_in_lot_op(
  197. loss, var_list))
  198. return tf.group(update_op)