aggregation.py 4.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132
  1. # Copyright 2016 The TensorFlow Authors. All Rights Reserved.
  2. #
  3. # Licensed under the Apache License, Version 2.0 (the "License");
  4. # you may not use this file except in compliance with the License.
  5. # You may obtain a copy of the License at
  6. #
  7. # http://www.apache.org/licenses/LICENSE-2.0
  8. #
  9. # Unless required by applicable law or agreed to in writing, software
  10. # distributed under the License is distributed on an "AS IS" BASIS,
  11. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  12. # See the License for the specific language governing permissions and
  13. # limitations under the License.
  14. # ==============================================================================
  15. from __future__ import absolute_import
  16. from __future__ import division
  17. from __future__ import print_function
  18. import numpy as np
  19. def labels_from_probs(probs):
  20. """
  21. Helper function: computes argmax along last dimension of array to obtain
  22. labels (max prob or max logit value)
  23. :param probs: numpy array where probabilities or logits are on last dimension
  24. :return: array with same shape as input besides last dimension with shape 1
  25. now containing the labels
  26. """
  27. # Compute last axis index
  28. last_axis = len(np.shape(probs)) - 1
  29. # Label is argmax over last dimension
  30. labels = np.argmax(probs, axis=last_axis)
  31. # Return as np.int32
  32. return np.asarray(labels, dtype=np.int32)
  33. def noisy_max(logits, lap_scale, return_clean_votes=False):
  34. """
  35. This aggregation mechanism takes the softmax/logit output of several models
  36. resulting from inference on identical inputs and computes the noisy-max of
  37. the votes for candidate classes to select a label for each sample: it
  38. adds Laplacian noise to label counts and returns the most frequent label.
  39. :param logits: logits or probabilities for each sample
  40. :param lap_scale: scale of the Laplacian noise to be added to counts
  41. :param return_clean_votes: if set to True, also returns clean votes (without
  42. Laplacian noise). This can be used to perform the
  43. privacy analysis of this aggregation mechanism.
  44. :return: pair of result and (if clean_votes is set to True) the clean counts
  45. for each class per sample and the the original labels produced by
  46. the teachers.
  47. """
  48. # Compute labels from logits/probs and reshape array properly
  49. labels = labels_from_probs(logits)
  50. labels_shape = np.shape(labels)
  51. labels = labels.reshape((labels_shape[0], labels_shape[1]))
  52. # Initialize array to hold final labels
  53. result = np.zeros(int(labels_shape[1]))
  54. if return_clean_votes:
  55. # Initialize array to hold clean votes for each sample
  56. clean_votes = np.zeros((int(labels_shape[1]), 10))
  57. # Parse each sample
  58. for i in xrange(int(labels_shape[1])):
  59. # Count number of votes assigned to each class
  60. label_counts = np.bincount(labels[:, i], minlength=10)
  61. if return_clean_votes:
  62. # Store vote counts for export
  63. clean_votes[i] = label_counts
  64. # Cast in float32 to prepare before addition of Laplacian noise
  65. label_counts = np.asarray(label_counts, dtype=np.float32)
  66. # Sample independent Laplacian noise for each class
  67. for item in xrange(10):
  68. label_counts[item] += np.random.laplace(loc=0.0, scale=float(lap_scale))
  69. # Result is the most frequent label
  70. result[i] = np.argmax(label_counts)
  71. # Cast labels to np.int32 for compatibility with deep_cnn.py feed dictionaries
  72. result = np.asarray(result, dtype=np.int32)
  73. if return_clean_votes:
  74. # Returns several array, which are later saved:
  75. # result: labels obtained from the noisy aggregation
  76. # clean_votes: the number of teacher votes assigned to each sample and class
  77. # labels: the labels assigned by teachers (before the noisy aggregation)
  78. return result, clean_votes, labels
  79. else:
  80. # Only return labels resulting from noisy aggregation
  81. return result
  82. def aggregation_most_frequent(logits):
  83. """
  84. This aggregation mechanism takes the softmax/logit output of several models
  85. resulting from inference on identical inputs and computes the most frequent
  86. label. It is deterministic (no noise injection like noisy_max() above.
  87. :param logits: logits or probabilities for each sample
  88. :return:
  89. """
  90. # Compute labels from logits/probs and reshape array properly
  91. labels = labels_from_probs(logits)
  92. labels_shape = np.shape(labels)
  93. labels = labels.reshape((labels_shape[0], labels_shape[1]))
  94. # Initialize array to hold final labels
  95. result = np.zeros(int(labels_shape[1]))
  96. # Parse each sample
  97. for i in xrange(int(labels_shape[1])):
  98. # Count number of votes assigned to each class
  99. label_counts = np.bincount(labels[:, i], minlength=10)
  100. label_counts = np.asarray(label_counts, dtype=np.int32)
  101. # Result is the most frequent label
  102. result[i] = np.argmax(label_counts)
  103. return np.asarray(result, dtype=np.int32)