# Copyright 2016 The TensorFlow Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================== """A standalone utility for computing the log moments. The utility for computing the log moments. It consists of two methods. compute_log_moment(q, sigma, T, lmbd) computes the log moment with sampling probability q, noise sigma, order lmbd, and T steps. get_privacy_spent computes delta (or eps) given log moments and eps (or delta). Example use: Suppose that we have run an algorithm with parameters, an array of (q1, sigma1, T1) ... (qk, sigmak, Tk), and we wish to compute eps for a given delta. The example code would be: max_lmbd = 32 lmbds = xrange(1, max_lmbd + 1) log_moments = [] for lmbd in lmbds: log_moment = 0 for q, sigma, T in parameters: log_moment += compute_log_moment(q, sigma, T, lmbd) log_moments.append((lmbd, log_moment)) eps, delta = get_privacy_spent(log_moments, target_delta=delta) To verify that the I1 >= I2 (see comments in GaussianMomentsAccountant in accountant.py for the context), run the same loop above with verify=True passed to compute_log_moment. """ import math import sys import numpy as np import scipy.integrate as integrate import scipy.stats from sympy.mpmath import mp def _to_np_float64(v): if math.isnan(v) or math.isinf(v): return np.inf return np.float64(v) ###################### # FLOAT64 ARITHMETIC # ###################### def pdf_gauss(x, sigma, mean=0): return scipy.stats.norm.pdf(x, loc=mean, scale=sigma) def cropped_ratio(a, b): if a < 1E-50 and b < 1E-50: return 1. else: return a / b def integral_inf(fn): integral, _ = integrate.quad(fn, -np.inf, np.inf) return integral def integral_bounded(fn, lb, ub): integral, _ = integrate.quad(fn, lb, ub) return integral def distributions(sigma, q): mu0 = lambda y: pdf_gauss(y, sigma=sigma, mean=0.0) mu1 = lambda y: pdf_gauss(y, sigma=sigma, mean=1.0) mu = lambda y: (1 - q) * mu0(y) + q * mu1(y) return mu0, mu1, mu def compute_a(sigma, q, lmbd, verbose=False): lmbd_int = int(math.ceil(lmbd)) if lmbd_int == 0: return 1.0 a_lambda_first_term_exact = 0 a_lambda_second_term_exact = 0 for i in xrange(lmbd_int + 1): coef_i = scipy.special.binom(lmbd_int, i) * (q ** i) s1, s2 = 0, 0 for j in xrange(i + 1): coef_j = scipy.special.binom(i, j) * (-1) ** (i - j) s1 += coef_j * np.exp((j * j - j) / (2.0 * (sigma ** 2))) s2 += coef_j * np.exp((j * j + j) / (2.0 * (sigma ** 2))) a_lambda_first_term_exact += coef_i * s1 a_lambda_second_term_exact += coef_i * s2 a_lambda_exact = ((1.0 - q) * a_lambda_first_term_exact + q * a_lambda_second_term_exact) if verbose: print "A: by binomial expansion {} = {} + {}".format( a_lambda_exact, (1.0 - q) * a_lambda_first_term_exact, q * a_lambda_second_term_exact) return _to_np_float64(a_lambda_exact) def compute_b(sigma, q, lmbd, verbose=False): mu0, _, mu = distributions(sigma, q) b_lambda_fn = lambda z: mu0(z) * np.power(cropped_ratio(mu0(z), mu(z)), lmbd) b_lambda = integral_inf(b_lambda_fn) m = sigma ** 2 * (np.log((2. - q) / (1. - q)) + 1. / (2 * sigma ** 2)) b_fn = lambda z: (np.power(mu0(z) / mu(z), lmbd) - np.power(mu(-z) / mu0(z), lmbd)) if verbose: print "M =", m print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m)) assert b_fn(-m) < 0 and b_fn(m) < 0 b_lambda_int1_fn = lambda z: (mu0(z) * np.power(cropped_ratio(mu0(z), mu(z)), lmbd)) b_lambda_int2_fn = lambda z: (mu0(z) * np.power(cropped_ratio(mu(z), mu0(z)), lmbd)) b_int1 = integral_bounded(b_lambda_int1_fn, -m, m) b_int2 = integral_bounded(b_lambda_int2_fn, -m, m) a_lambda_m1 = compute_a(sigma, q, lmbd - 1) b_bound = a_lambda_m1 + b_int1 - b_int2 if verbose: print "B: by numerical integration", b_lambda print "B must be no more than ", b_bound print b_lambda, b_bound return _to_np_float64(b_lambda) ########################### # MULTIPRECISION ROUTINES # ########################### def pdf_gauss_mp(x, sigma, mean): return mp.mpf(1.) / mp.sqrt(mp.mpf("2.") * sigma ** 2 * mp.pi) * mp.exp( - (x - mean) ** 2 / (mp.mpf("2.") * sigma ** 2)) def integral_inf_mp(fn): integral, _ = mp.quad(fn, [-mp.inf, mp.inf], error=True) return integral def integral_bounded_mp(fn, lb, ub): integral, _ = mp.quad(fn, [lb, ub], error=True) return integral def distributions_mp(sigma, q): mu0 = lambda y: pdf_gauss_mp(y, sigma=sigma, mean=mp.mpf(0)) mu1 = lambda y: pdf_gauss_mp(y, sigma=sigma, mean=mp.mpf(1)) mu = lambda y: (1 - q) * mu0(y) + q * mu1(y) return mu0, mu1, mu def compute_a_mp(sigma, q, lmbd, verbose=False): lmbd_int = int(math.ceil(lmbd)) if lmbd_int == 0: return 1.0 mu0, mu1, mu = distributions_mp(sigma, q) a_lambda_fn = lambda z: mu(z) * (mu(z) / mu0(z)) ** lmbd_int a_lambda_first_term_fn = lambda z: mu0(z) * (mu(z) / mu0(z)) ** lmbd_int a_lambda_second_term_fn = lambda z: mu1(z) * (mu(z) / mu0(z)) ** lmbd_int a_lambda = integral_inf_mp(a_lambda_fn) a_lambda_first_term = integral_inf_mp(a_lambda_first_term_fn) a_lambda_second_term = integral_inf_mp(a_lambda_second_term_fn) if verbose: print "A: by numerical integration {} = {} + {}".format( a_lambda, (1 - q) * a_lambda_first_term, q * a_lambda_second_term) return _to_np_float64(a_lambda) def compute_b_mp(sigma, q, lmbd, verbose=False): lmbd_int = int(math.ceil(lmbd)) if lmbd_int == 0: return 1.0 mu0, _, mu = distributions_mp(sigma, q) b_lambda_fn = lambda z: mu0(z) * (mu0(z) / mu(z)) ** lmbd_int b_lambda = integral_inf_mp(b_lambda_fn) m = sigma ** 2 * (mp.log((2 - q) / (1 - q)) + 1 / (2 * (sigma ** 2))) b_fn = lambda z: ((mu0(z) / mu(z)) ** lmbd_int - (mu(-z) / mu0(z)) ** lmbd_int) if verbose: print "M =", m print "f(-M) = {} f(M) = {}".format(b_fn(-m), b_fn(m)) assert b_fn(-m) < 0 and b_fn(m) < 0 b_lambda_int1_fn = lambda z: mu0(z) * (mu0(z) / mu(z)) ** lmbd_int b_lambda_int2_fn = lambda z: mu0(z) * (mu(z) / mu0(z)) ** lmbd_int b_int1 = integral_bounded_mp(b_lambda_int1_fn, -m, m) b_int2 = integral_bounded_mp(b_lambda_int2_fn, -m, m) a_lambda_m1 = compute_a_mp(sigma, q, lmbd - 1) b_bound = a_lambda_m1 + b_int1 - b_int2 if verbose: print "B by numerical integration", b_lambda print "B must be no more than ", b_bound assert b_lambda < b_bound + 1e-5 return _to_np_float64(b_lambda) def _compute_delta(log_moments, eps): """Compute delta for given log_moments and eps. Args: log_moments: the log moments of privacy loss, in the form of pairs of (moment_order, log_moment) eps: the target epsilon. Returns: delta """ min_delta = 1.0 for moment_order, log_moment in log_moments: if moment_order == 0: continue if math.isinf(log_moment) or math.isnan(log_moment): sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order) continue if log_moment < moment_order * eps: min_delta = min(min_delta, math.exp(log_moment - moment_order * eps)) return min_delta def _compute_eps(log_moments, delta): """Compute epsilon for given log_moments and delta. Args: log_moments: the log moments of privacy loss, in the form of pairs of (moment_order, log_moment) delta: the target delta. Returns: epsilon """ min_eps = float("inf") for moment_order, log_moment in log_moments: if moment_order == 0: continue if math.isinf(log_moment) or math.isnan(log_moment): sys.stderr.write("The %d-th order is inf or Nan\n" % moment_order) continue min_eps = min(min_eps, (log_moment - math.log(delta)) / moment_order) return min_eps def compute_log_moment(q, sigma, steps, lmbd, verify=False, verbose=False): """Compute the log moment of Gaussian mechanism for given parameters. Args: q: the sampling ratio. sigma: the noise sigma. steps: the number of steps. lmbd: the moment order. verify: if False, only compute the symbolic version. If True, computes both symbolic and numerical solutions and verifies the results match. verbose: if True, print out debug information. Returns: the log moment with type np.float64, could be np.inf. """ moment = compute_a(sigma, q, lmbd, verbose=verbose) if verify: mp.dps = 50 moment_a_mp = compute_a_mp(sigma, q, lmbd, verbose=verbose) moment_b_mp = compute_b_mp(sigma, q, lmbd, verbose=verbose) np.testing.assert_allclose(moment, moment_a_mp, rtol=1e-10) if not np.isinf(moment_a_mp): # The following test fails for (1, np.inf)! np.testing.assert_array_less(moment_b_mp, moment_a_mp) if np.isinf(moment): return np.inf else: return np.log(moment) * steps def get_privacy_spent(log_moments, target_eps=None, target_delta=None): """Compute delta (or eps) for given eps (or delta) from log moments. Args: log_moments: array of (moment_order, log_moment) pairs. target_eps: if not None, the epsilon for which we would like to compute corresponding delta value. target_delta: if not None, the delta for which we would like to compute corresponding epsilon value. Exactly one of target_eps and target_delta is None. Returns: eps, delta pair """ assert (target_eps is None) ^ (target_delta is None) assert not ((target_eps is None) and (target_delta is None)) if target_eps is not None: return (target_eps, _compute_delta(log_moments, target_eps)) else: return (_compute_eps(log_moments, target_delta), target_delta)