sanitizer.py 4.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """Defines Sanitizer class for sanitizing tensors.
  16. A sanitizer first limits the sensitivity of a tensor and then adds noise
  17. to the tensor. The parameters are determined by the privacy_spending and the
  18. other parameters. It also uses an accountant to keep track of the privacy
  19. spending.
  20. """
  21. from __future__ import division
  22. import collections
  23. import tensorflow as tf
  24. from differential_privacy.dp_sgd.dp_optimizer import utils
  25. ClipOption = collections.namedtuple("ClipOption",
  26. ["l2norm_bound", "clip"])
  27. class AmortizedGaussianSanitizer(object):
  28. """Sanitizer with Gaussian noise and amoritzed privacy spending accounting.
  29. This sanitizes a tensor by first clipping the tensor, summing the tensor
  30. and then adding appropriate amount of noise. It also uses an amortized
  31. accountant to keep track of privacy spending.
  32. """
  33. def __init__(self, accountant, default_option):
  34. """Construct an AmortizedGaussianSanitizer.
  35. Args:
  36. accountant: the privacy accountant. Expect an amortized one.
  37. default_option: the default ClipOptoin.
  38. """
  39. self._accountant = accountant
  40. self._default_option = default_option
  41. self._options = {}
  42. def set_option(self, tensor_name, option):
  43. """Set options for an individual tensor.
  44. Args:
  45. tensor_name: the name of the tensor.
  46. option: clip option.
  47. """
  48. self._options[tensor_name] = option
  49. def sanitize(self, x, eps_delta, sigma=None,
  50. option=ClipOption(None, None), tensor_name=None,
  51. num_examples=None, add_noise=True):
  52. """Sanitize the given tensor.
  53. This santize a given tensor by first applying l2 norm clipping and then
  54. adding Gaussian noise. It calls the privacy accountant for updating the
  55. privacy spending.
  56. Args:
  57. x: the tensor to sanitize.
  58. eps_delta: a pair of eps, delta for (eps,delta)-DP. Use it to
  59. compute sigma if sigma is None.
  60. sigma: if sigma is not None, use sigma.
  61. option: a ClipOption which, if supplied, used for
  62. clipping and adding noise.
  63. tensor_name: the name of the tensor.
  64. num_examples: if None, use the number of "rows" of x.
  65. add_noise: if True, then add noise, else just clip.
  66. Returns:
  67. a pair of sanitized tensor and the operation to accumulate privacy
  68. spending.
  69. """
  70. if sigma is None:
  71. # pylint: disable=unpacking-non-sequence
  72. eps, delta = eps_delta
  73. with tf.control_dependencies(
  74. [tf.Assert(tf.greater(eps, 0),
  75. ["eps needs to be greater than 0"]),
  76. tf.Assert(tf.greater(delta, 0),
  77. ["delta needs to be greater than 0"])]):
  78. # The following formula is taken from
  79. # Dwork and Roth, The Algorithmic Foundations of Differential
  80. # Privacy, Appendix A.
  81. # http://www.cis.upenn.edu/~aaroth/Papers/privacybook.pdf
  82. sigma = tf.sqrt(2.0 * tf.log(1.25 / delta)) / eps
  83. l2norm_bound, clip = option
  84. if l2norm_bound is None:
  85. l2norm_bound, clip = self._default_option
  86. if ((tensor_name is not None) and
  87. (tensor_name in self._options)):
  88. l2norm_bound, clip = self._options[tensor_name]
  89. if clip:
  90. x = utils.BatchClipByL2norm(x, l2norm_bound)
  91. if add_noise:
  92. if num_examples is None:
  93. num_examples = tf.slice(tf.shape(x), [0], [1])
  94. privacy_accum_op = self._accountant.accumulate_privacy_spending(
  95. eps_delta, sigma, num_examples)
  96. with tf.control_dependencies([privacy_accum_op]):
  97. saned_x = utils.AddGaussianNoise(tf.reduce_sum(x, 0),
  98. sigma * l2norm_bound)
  99. else:
  100. saned_x = tf.reduce_sum(x, 0)
  101. return saned_x