Source code for jax_privacy.experimental.monte_carlo.delta_calculation

# Copyright 2026 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Utilities for calculating deltas in Monte Carlo accounting."""

from typing import Sequence

import numpy as np
import scipy


def _kl(q: float, p: float) -> float:
  return scipy.special.rel_entr(q, p) + scipy.special.rel_entr(1 - q, 1 - p)


def _hoeffding_bound(num_samples: int, tau: float, delta: float) -> float:
  """Bound on prob that sample mean <= delta if true mean > tau * delta."""
  # Fact 4.2 of https://arxiv.org/pdf/2412.16802.
  assert tau >= 1
  return np.exp(-num_samples * _kl(delta, tau * delta))



[docs]
def get_overall_delta(num_samples: int, base_delta: float) -> float:
  """The delta we can formally report using Monte Carlo verification.

  In more detail, if we use num_samples samples to verify that a mechanism
  satisfies (epsilon, base_delta)-DP, then we can formally report that the
  mechanism satisfies (epsilon, overall_delta)-DP.

  This method assumes we are doing many Monte Carlo verifications of different
  ordered mechanisms, and then picking the highest-utility mechanism that meets
  our target privacy guarantee, as in Appendix A of
  https://arxiv.org/abs/2602.09338. If we are only verifying one mechanism
  (which is undesirable in practice), a tighter bound is possible.

  Args:
    num_samples: The number of samples used in Monte Carlo verification.
    base_delta: The base_delta such that we use the samples to verify that each
      mechanism satisfies (epsilon, base_delta)-DP.

  Returns:
    overall_delta such that we can report the end-to-end pipeline of Monte Carlo
    verification and then running the best verified mechanism is (epsilon,
    overall_delta)-DP.
  """
  if base_delta <= 0 or base_delta > 1:
    raise ValueError('base_delta must be in (0, 1].')
  if num_samples <= 0:
    raise ValueError('num_samples must be positive.')

  def overall_delta_from_tau(tau):
    q = _hoeffding_bound(num_samples, tau, base_delta)
    return tau * base_delta + q * (1 - tau * base_delta)

  best_tau = scipy.optimize.minimize_scalar(
      overall_delta_from_tau, bounds=(1, 1 / base_delta), method='bounded'
  ).x
  return min(overall_delta_from_tau(best_tau), 1.0)




[docs]
def minimum_samples_to_calibrate(base_delta: float, target_delta: float) -> int:
  """The minimum number of samples needed to calibrate to target_delta.

  While for practical values of delta this method is stable, for very small
  target_delta (or base_delta very close to target_delta) this may binary search
  over a number of samples that results in errors due to floats having a maximum
  representable value.

  Args:
    base_delta: The base_delta such that we use Monte Carlo verification to
      check if each mechanism satisfies (epsilon, base_delta)-DP.
    target_delta: The value such that we want to report the overall mechanism of
      Monte Carlo verification and then running the best verified mechanism is
      (epsilon, target_delta)-DP.

  Returns:
    The minimum number of samples needed to calibrate to target_delta.
  """
  if base_delta <= 0 or base_delta > 1:
    raise ValueError('base_delta must be in (0, 1].')
  if target_delta <= 0 or target_delta > 1:
    raise ValueError('target_delta must be in (0, 1].')
  if target_delta <= base_delta:
    raise ValueError('target_delta must be > base_delta.')
  lower_bound = int(1 // target_delta)
  upper_bound = 2 * lower_bound

  # There are some stability issues in going back-and-forth between base_delta
  # and target_delta, so to be conservative we enforce both that we have a valid
  # base_delta and that we achieve the target_delta.
  def _enough_samples(num_samples):
    try:
      get_base_delta(num_samples, target_delta)
      return get_overall_delta(num_samples, base_delta) <= target_delta
    except ValueError:
      return False

  while not _enough_samples(upper_bound):
    lower_bound, upper_bound = 2 * lower_bound, 2 * upper_bound
  while lower_bound < upper_bound - 1:
    mid = (lower_bound + upper_bound) // 2
    if _enough_samples(mid):
      upper_bound = mid
    else:
      lower_bound = mid

  # Due to precision issues, upper_bound might be slightly too small. We fix it
  # post-hoc by increasing it by 1 until it achieves the target_delta and also
  # we have enough samples to find a valid base_delta.
  while not _enough_samples(upper_bound):
    upper_bound += 1
  return upper_bound




[docs]
def get_base_delta(num_samples: int, target_delta: float) -> float:
  """The base_delta for Monte Carlo verification to achieve target_delta.

  In more detail, if we use num_samples samples to verify that a mechanism
  satisfies (epsilon, base_delta)-DP, then we can formally report that the
  mechanism satisfies (epsilon, target_delta)-DP.

  This method assumes we are doing many Monte Carlo verifications of different
  ordered mechanisms, and then picking the highest-utility mechanism that meets
  our target privacy guarantee. If we are only verifying one mechanism (which
  requires some chance of an empty output which is usually undesirable in
  practice), a tighter bound is possible.

  Args:
    num_samples: The number of samples used in Monte Carlo verification.
    target_delta: The value such that we want to report the overall mechanism of
      Monte Carlo verification and then running the best verified mechanism is
      (epsilon, target_delta)-DP.

  Returns:
    The base_delta such that we use Monte Carlo verification to check if each
    mechanism satisfies (epsilon, base_delta)-DP.
  """
  if num_samples <= 0:
    raise ValueError('num_samples must be positive.')
  if target_delta < 0 or target_delta > 1:
    raise ValueError('target_delta must be in [0, 1].')
  tol = 1e-4 * target_delta
  base_delta = scipy.optimize.minimize_scalar(
      lambda d: abs(get_overall_delta(num_samples, d) - target_delta),
      bounds=(0, target_delta),
      method='bounded',
      options={'xatol': tol},
  ).x
  # Because of the tolerance, we may end up with base_delta that is slightly
  # too small. We report base_delta if it achieves the target_delta, otherwise
  # we try base_delta - tol to be conservative. If that also does not achieve
  # the target_delta, it is possible that the minimum of
  # |overall_delta - target_delta| may be greater than 0, i.e. we cannot find a
  # base_delta that is valid.
  if get_overall_delta(num_samples, base_delta) < target_delta:
    return base_delta
  if get_overall_delta(num_samples, base_delta - tol) < target_delta:
    return base_delta - tol
  raise ValueError(
      'Failed to find a valid base_delta. num_samples may be too small.'
  )




[docs]
def delta_from_epsilon_and_samples(
    epsilon: float,
    samples: Sequence[float],
    counts: Sequence[float] | None = None,
):
  """Calculate the delta parameter for a given epsilon and list of samples.

  Args:
    epsilon: The epsilon parameter of the DP guarantee.
    samples: Samples from the privacy loss distribution. That is, to estimate
      the e^epsilon-hockey stick divergence between P and Q, we sample x from P,
      and then compute ln(P(x)/Q(x)) to get a single sample.
    counts: A list of floats representing the counts of the samples. If None, we
      assume the samples are count 1. This can be helpful if the number of
      samples is very large, in which case one can discretize the samples and
      pass the counts of each discretization. If passed, should be the same
      length as samples.

  Returns:
    The delta parameter given by Monte Carlo estimation of the hockey-stick
    divergence.
  """
  samples = np.asarray(samples)
  if epsilon < 0:
    raise ValueError('epsilon must be non-negative.')
  if samples.ndim != 1:
    raise ValueError('samples must be a 1D array.')
  if counts is not None:
    counts = np.asarray(counts)
    if counts.ndim != 1:
      raise ValueError('counts must be a 1D array.')
    if samples.size != counts.size:
      raise ValueError('samples and counts must have the same size.')
  np_min = np.minimum
  return np.average(-np.expm1(np_min(epsilon - samples, 0.0)), weights=counts)




[docs]
def perform_calibration_from_samples(
    epsilon: float,
    delta: float,
    *,
    positive_samples: Sequence[Sequence[float]],
    positive_counts: Sequence[Sequence[float]] | None = None,
    negative_samples: Sequence[Sequence[float]] | None = None,
    negative_counts: Sequence[Sequence[float]] | None = None,
) -> tuple[bool, int | float]:
  """Perform calibration to find highest-utility parameter for DP target.

  This is Algorithm 5 in https://arxiv.org/pdf/2602.09338.

  In particular, this method assumes we have chosen a sweep of a hyperparameter
  (such as noise multiplier), for each value in the sweep independently
  generated some number of samples, and now want to find the highest-utility
  hyperparameter (e.g. the lowest noise multiplier or highest number of training
  iterations in DP-SGD) we can use and still report end-to-end
  (epsilon, delta)-DP using Monte Carlo verification.

  We assume that the samples are ordered from highest to lowest privacy of the
  hyperparameters, i.e. lowest to highest delta at a fixed epsilon
  (or vice-versa), even though the privacy parameters may not be explicitly
  known. e.g., in the example of calibrating the noise multiplier for
  DP-BandMF, we would order the samples from highest to lowest noise multiplier,
  even if we may not know the exact (epsilon, delta)-DP guarantee for each noise
  multiplier.

  If different numbers of samples were used for each hyperparameter, this method
  conservatively uses the minimum number of samples across all values of the
  hyperparameter for computing tail bounds.

  Example Usage (calibrating a Gaussian mechanism):
    >>> epsilon, delta = 4.0, 1e-3
    >>> nm_sweep = [2.0, 1.0, 0.5]
    >>> positive_samples = []
    >>> for nm in nm_sweep:
    ...   positive_samples.append(
    ...     np.random.normal(loc=0.5 / nm ** 2, size=1_000_000)
    ...   )
    >>> passes_verification, best_nm_index = perform_calibration_from_samples(
    ...   epsilon, delta, positive_samples=positive_samples
    ... )
    >>> if passes_verification:
    ...   print(nm_sweep[best_nm_index])
    1.0

  Args:
    epsilon: The epsilon parameter of the target DP guarantee.
    delta: The delta parameter of the target DP guarantee.
    positive_samples: A list of lists of privacy loss samples from the positive
      case, one list for each hyperparameter value.
    positive_counts: An optional list of lists of counts corresponding to each
      of the lists of positive samples. If None, we assume the samples are count
      1. If passed, each list should be the same length as the associated list
      of positive_samples.
    negative_samples: An optional list of lists of privacy loss samples from the
      negative case, one list for each hyperparameter value. If None, this means
      we assume the positive case has a worse DP guarantee than the negative
      case, so we only need to do accounting for the positive case.
    negative_counts: An optional list of lists of counts corresponding to each
      of the lists of negative samples. If None, we assume the samples are
      unweighted. If passed, each list should be the same length as the
      associated list of negative_samples. Ignored if negative_samples is None.

  Returns:
    Either (True, i), or (False, base_delta). If there is an associated set of
    hyperparameters and (True, i) is returned, i is the index of the
    highest-utility hyperparameter we can use while satisfying the target
    (epsilon, delta)-DP guarantee. If (False, base_delta) is returned, we should
    fall back to a mechanism that is known to be (epsilon, base_delta)-DP (i.e.,
    one that can be calibrated without Monte Carlo verification)
  """
  if not positive_samples:
    raise ValueError('positive_samples must be non-empty.')
  if positive_counts is not None and len(positive_samples) != len(
      positive_counts
  ):
    raise ValueError(
        'positive_samples and positive_counts must have the same length.'
    )
  if negative_samples is not None and len(positive_samples) != len(
      negative_samples
  ):
    raise ValueError(
        'positive_samples and negative_samples must have the same length.'
    )
  if negative_counts is not None and len(positive_samples) != len(
      negative_counts
  ):
    raise ValueError(
        'positive_samples and negative_counts must have the same length.'
    )
  if positive_counts is None:
    positive_counts = [np.ones_like(samples) for samples in positive_samples]
  if negative_samples is not None and negative_counts is None:
    negative_counts = [np.ones_like(samples) for samples in negative_samples]
  positive_sample_counts = [sum(counts) for counts in positive_counts]
  if negative_samples is None:
    negative_sample_counts = []
  else:
    negative_sample_counts = [sum(counts) for counts in negative_counts]
  min_sample_count = min(positive_sample_counts + negative_sample_counts)

  base_delta = get_base_delta(min_sample_count, delta)

  first_positive_delta = delta_from_epsilon_and_samples(
      epsilon, positive_samples[0], positive_counts[0]
  )
  if negative_samples is None:
    first_negative_delta = 0.0
  else:
    first_negative_delta = delta_from_epsilon_and_samples(
        epsilon, negative_samples[0], negative_counts[0]
    )
  if first_positive_delta > base_delta or first_negative_delta > base_delta:
    return False, base_delta

  for i in range(1, len(positive_samples)):
    positive_delta = delta_from_epsilon_and_samples(
        epsilon, positive_samples[i], positive_counts[i]
    )
    if negative_samples is None:
      negative_delta = 0.0
    else:
      negative_delta = delta_from_epsilon_and_samples(
          epsilon, negative_samples[i], negative_counts[i]
      )
    if positive_delta > base_delta or negative_delta > base_delta:
      # This hyperparameter does not pass verification, return the previous one.
      return True, i - 1
  return True, len(positive_samples) - 1