gaussian_moments.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. """A standalone utility for computing the log moments.
  16. The utility for computing the log moments. It consists of two methods.
  17. compute_log_moment(q, sigma, T, lmbd) computes the log moment with sampling
  18. probability q, noise sigma, order lmbd, and T steps. get_privacy_spent computes
  19. delta (or eps) given log moments and eps (or delta).
  20. Example use:
  21. Suppose that we have run an algorithm with parameters, an array of
  22. (q1, sigma1, T1) ... (qk, sigmak, Tk), and we wish to compute eps for a given
  23. delta. The example code would be:
  24. max_lmbd = 32
  25. lmbds = xrange(1, max_lmbd + 1)
  26. log_moments = []
  27. for lmbd in lmbds:
  28. log_moment = 0
  29. for q, sigma, T in parameters:
  30. log_moment += compute_log_moment(q, sigma, T, lmbd)
  31. log_moments.append((lmbd, log_moment))
  32. eps, delta = get_privacy_spent(log_moments, target_delta=delta)
  33. To verify that the I1 >= I2 (see comments in GaussianMomentsAccountant in
  34. accountant.py for the context), run the same loop above with verify=True
  35. passed to compute_log_moment.
  36. """
  37. import math
  38. import sys
  39. import numpy as np
  40. import scipy.integrate as integrate
  41. import scipy.stats
  42. from sympy.mpmath import mp
  43. def _to_np_float64(v):
  44. if math.isnan(v) or math.isinf(v):
  45. return np.inf
  46. return np.float64(v)
  47. ######################
  48. # FLOAT64 ARITHMETIC #
  49. ######################
  50. def pdf_gauss(x, sigma, mean=0):
  51. return scipy.stats.norm.pdf(x, loc=mean, scale=sigma)
  52. def cropped_ratio(a, b):
  53. if a < 1E-50 and b < 1E-50:
  54. return 1.
  55. else:
  56. return a / b
  57. def integral_inf(fn):
  58. integral, _ = integrate.quad(fn, -np.inf, np.inf)
  59. return integral
  60. def integral_bounded(fn, lb, ub):
  61. integral, _ = integrate.quad(fn, lb, ub)
  62. return integral
  63. def distributions(sigma, q):
  64. mu0 = lambda y: pdf_gauss(y, sigma=sigma, mean=0.0)
  65. mu1 = lambda y: pdf_gauss(y, sigma=sigma, mean=1.0)
  66. mu = lambda y: (1 - q) * mu0(y) + q * mu1(y)
  67. return mu0, mu1, mu
  68. def compute_a(sigma, q, lmbd, verbose=False):
  69. lmbd_int = int(math.ceil(lmbd))
  70. if lmbd_int == 0:
  71. return 1.0
  72. a_lambda_first_term_exact = 0
  73. a_lambda_second_term_exact = 0
  74. for i in xrange(lmbd_int + 1):
  75. coef_i = scipy.special.binom(lmbd_int, i) * (q ** i)
  76. s1, s2 = 0, 0
  77. for j in xrange(i + 1):
  78. coef_j = scipy.special.binom(i, j) * (-1) ** (i - j)
  79. s1 += coef_j * np.exp((j * j - j) / (2.0 * (sigma ** 2)))
  80. s2 += coef_j * np.exp((j * j + j) / (2.0 * (sigma ** 2)))
  81. a_lambda_first_term_exact += coef_i * s1
  82. a_lambda_second_term_exact += coef_i * s2
  83. a_lambda_exact = ((1.0 - q) * a_lambda_first_term_exact +
  84. q * a_lambda_second_term_exact)
  85. if verbose:
  86. print "A: by binomial expansion {} = {} + {}".format(
  87. a_lambda_exact,
  88. (1.0 - q) * a_lambda_first_term_exact,
  89. q * a_lambda_second_term_exact)
  90. return _to_np_float64(a_lambda_exact)
  91. def compute_b(sigma, q, lmbd, verbose=False):
  92. mu0, _, mu = distributions(sigma, q)
  93. b_lambda_fn = lambda z: mu0(z) * np.power(cropped_ratio(mu0(z), mu(z)), lmbd)
  94. b_lambda = integral_inf(b_lambda_fn)
  95. m = sigma ** 2 * (np.log((2. - q) / (1. - q)) + 1. / (2 * sigma ** 2))
  96. b_fn = lambda z: (np.power(mu0(z) / mu(z), lmbd) -
  97. np.power(mu(-z) / mu0(z), lmbd))
  98. if verbose:
  99. print "M =", m
  100. print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m))
  101. assert b_fn(-m) < 0 and b_fn(m) < 0
  102. b_lambda_int1_fn = lambda z: (mu0(z) *
  103. np.power(cropped_ratio(mu0(z), mu(z)), lmbd))
  104. b_lambda_int2_fn = lambda z: (mu0(z) *
  105. np.power(cropped_ratio(mu(z), mu0(z)), lmbd))
  106. b_int1 = integral_bounded(b_lambda_int1_fn, -m, m)
  107. b_int2 = integral_bounded(b_lambda_int2_fn, -m, m)
  108. a_lambda_m1 = compute_a(sigma, q, lmbd - 1)
  109. b_bound = a_lambda_m1 + b_int1 - b_int2
  110. if verbose:
  111. print "B: by numerical integration", b_lambda
  112. print "B must be no more than ", b_bound
  113. print b_lambda, b_bound
  114. return _to_np_float64(b_lambda)
  115. ###########################
  116. # MULTIPRECISION ROUTINES #
  117. ###########################
  118. def pdf_gauss_mp(x, sigma, mean):
  119. return mp.mpf(1.) / mp.sqrt(mp.mpf("2.") * sigma ** 2 * mp.pi) * mp.exp(
  120. - (x - mean) ** 2 / (mp.mpf("2.") * sigma ** 2))
  121. def integral_inf_mp(fn):
  122. integral, _ = mp.quad(fn, [-mp.inf, mp.inf], error=True)
  123. return integral
  124. def integral_bounded_mp(fn, lb, ub):
  125. integral, _ = mp.quad(fn, [lb, ub], error=True)
  126. return integral
  127. def distributions_mp(sigma, q):
  128. mu0 = lambda y: pdf_gauss_mp(y, sigma=sigma, mean=mp.mpf(0))
  129. mu1 = lambda y: pdf_gauss_mp(y, sigma=sigma, mean=mp.mpf(1))
  130. mu = lambda y: (1 - q) * mu0(y) + q * mu1(y)
  131. return mu0, mu1, mu
  132. def compute_a_mp(sigma, q, lmbd, verbose=False):
  133. lmbd_int = int(math.ceil(lmbd))
  134. if lmbd_int == 0:
  135. return 1.0
  136. mu0, mu1, mu = distributions_mp(sigma, q)
  137. a_lambda_fn = lambda z: mu(z) * (mu(z) / mu0(z)) ** lmbd_int
  138. a_lambda_first_term_fn = lambda z: mu0(z) * (mu(z) / mu0(z)) ** lmbd_int
  139. a_lambda_second_term_fn = lambda z: mu1(z) * (mu(z) / mu0(z)) ** lmbd_int
  140. a_lambda = integral_inf_mp(a_lambda_fn)
  141. a_lambda_first_term = integral_inf_mp(a_lambda_first_term_fn)
  142. a_lambda_second_term = integral_inf_mp(a_lambda_second_term_fn)
  143. if verbose:
  144. print "A: by numerical integration {} = {} + {}".format(
  145. a_lambda,
  146. (1 - q) * a_lambda_first_term,
  147. q * a_lambda_second_term)
  148. return _to_np_float64(a_lambda)
  149. def compute_b_mp(sigma, q, lmbd, verbose=False):
  150. lmbd_int = int(math.ceil(lmbd))
  151. if lmbd_int == 0:
  152. return 1.0
  153. mu0, _, mu = distributions_mp(sigma, q)
  154. b_lambda_fn = lambda z: mu0(z) * (mu0(z) / mu(z)) ** lmbd_int
  155. b_lambda = integral_inf_mp(b_lambda_fn)
  156. m = sigma ** 2 * (mp.log((2 - q) / (1 - q)) + 1 / (2 * (sigma ** 2)))
  157. b_fn = lambda z: ((mu0(z) / mu(z)) ** lmbd_int -
  158. (mu(-z) / mu0(z)) ** lmbd_int)
  159. if verbose:
  160. print "M =", m
  161. print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m))
  162. assert b_fn(-m) < 0 and b_fn(m) < 0
  163. b_lambda_int1_fn = lambda z: mu0(z) * (mu0(z) / mu(z)) ** lmbd_int
  164. b_lambda_int2_fn = lambda z: mu0(z) * (mu(z) / mu0(z)) ** lmbd_int
  165. b_int1 = integral_bounded_mp(b_lambda_int1_fn, -m, m)
  166. b_int2 = integral_bounded_mp(b_lambda_int2_fn, -m, m)
  167. a_lambda_m1 = compute_a_mp(sigma, q, lmbd - 1)
  168. b_bound = a_lambda_m1 + b_int1 - b_int2
  169. if verbose:
  170. print "B by numerical integration", b_lambda
  171. print "B must be no more than ", b_bound
  172. assert b_lambda < b_bound + 1e-5
  173. return _to_np_float64(b_lambda)
  174. def _compute_delta(log_moments, eps):
  175. """Compute delta for given log_moments and eps.
  176. Args:
  177. log_moments: the log moments of privacy loss, in the form of pairs
  178. of (moment_order, log_moment)
  179. eps: the target epsilon.
  180. Returns:
  181. delta
  182. """
  183. min_delta = 1.0
  184. for moment_order, log_moment in log_moments:
  185. if moment_order == 0:
  186. continue
  187. if math.isinf(log_moment) or math.isnan(log_moment):
  188. sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
  189. continue
  190. if log_moment < moment_order * eps:
  191. min_delta = min(min_delta,
  192. math.exp(log_moment - moment_order * eps))
  193. return min_delta
  194. def _compute_eps(log_moments, delta):
  195. """Compute epsilon for given log_moments and delta.
  196. Args:
  197. log_moments: the log moments of privacy loss, in the form of pairs
  198. of (moment_order, log_moment)
  199. delta: the target delta.
  200. Returns:
  201. epsilon
  202. """
  203. min_eps = float("inf")
  204. for moment_order, log_moment in log_moments:
  205. if moment_order == 0:
  206. continue
  207. if math.isinf(log_moment) or math.isnan(log_moment):
  208. sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order)
  209. continue
  210. min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order)
  211. return min_eps
  212. def compute_log_moment(q, sigma, steps, lmbd, verify=False, verbose=False):
  213. """Compute the log moment of Gaussian mechanism for given parameters.
  214. Args:
  215. q: the sampling ratio.
  216. sigma: the noise sigma.
  217. steps: the number of steps.
  218. lmbd: the moment order.
  219. verify: if False, only compute the symbolic version. If True, computes
  220. both symbolic and numerical solutions and verifies the results match.
  221. verbose: if True, print out debug information.
  222. Returns:
  223. the log moment with type np.float64, could be np.inf.
  224. """
  225. moment = compute_a(sigma, q, lmbd, verbose=verbose)
  226. if verify:
  227. mp.dps = 50
  228. moment_a_mp = compute_a_mp(sigma, q, lmbd, verbose=verbose)
  229. moment_b_mp = compute_b_mp(sigma, q, lmbd, verbose=verbose)
  230. np.testing.assert_allclose(moment, moment_a_mp, rtol=1e-10)
  231. if not np.isinf(moment_a_mp):
  232. # The following test fails for (1, np.inf)!
  233. np.testing.assert_array_less(moment_b_mp, moment_a_mp)
  234. if np.isinf(moment):
  235. return np.inf
  236. else:
  237. return np.log(moment) * steps
  238. def get_privacy_spent(log_moments, target_eps=None, target_delta=None):
  239. """Compute delta (or eps) for given eps (or delta) from log moments.
  240. Args:
  241. log_moments: array of (moment_order, log_moment) pairs.
  242. target_eps: if not None, the epsilon for which we would like to compute
  243. corresponding delta value.
  244. target_delta: if not None, the delta for which we would like to compute
  245. corresponding epsilon value. Exactly one of target_eps and target_delta
  246. is None.
  247. Returns:
  248. eps, delta pair
  249. """
  250. assert (target_eps is None) ^ (target_delta is None)
  251. assert not ((target_eps is None) and (target_delta is None))
  252. if target_eps is not None:
  253. return (target_eps, _compute_delta(log_moments, target_eps))
  254. else:
  255. return (_compute_eps(log_moments, target_delta), target_delta)