Source code for jax_privacy.auditing

# Copyright 2026 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Library for empirical privacy auditing/estimation.

This library provides functions for estimating the privacy of a model,
based on attack scores of held-in and held-out canaries
"""

from __future__ import annotations

from concurrent import futures
import dataclasses
import functools
from typing import Callable, TypeAlias

import dp_accounting
import numpy as np
import scipy.optimize
import scipy.special
import scipy.stats


_norm = scipy.stats.norm
_binom = scipy.stats.binom
_logistic = scipy.special.expit



[docs]
class ThresholdStrategy:
  """Base class for threshold selection strategies."""




[docs]
class Bonferroni(ThresholdStrategy):
  """Use Bonferroni correction across all possible thresholds."""




[docs]
@dataclasses.dataclass(frozen=True)
class Explicit(ThresholdStrategy):
  """Use a specific threshold value.

  Attributes:
    threshold: The threshold value to use.
  """

  threshold: float




[docs]
@dataclasses.dataclass(frozen=True)
class Split(ThresholdStrategy):
  """Split data to choose threshold and then compute the bound.

  Attributes:
    threshold_estimation_frac: The fraction of data to use for computing the
      threshold. The rest will be used for computing the bound.
    seed: The seed for the random number generator. If None, a seed will be
      chosen non-deterministically.
  """

  threshold_estimation_frac: float = 0.5
  seed: int | None = None




[docs]
@dataclasses.dataclass(frozen=True)
class MultiSplit(ThresholdStrategy):
  """Splits data multiple times with significance correction.

  Reports the median of lower bounds computed at significance level alpha/2
  [Meinshausen et al. (2009)]. Theorem 3.1 says that 2 * median(p_i) is a valid
  p-value, which implies that the median of bounds computed at significance
  alpha / 2 corresponds to the valid rejection threshold for an overall level of
  alpha. See proof of Theorem 3.1 in https://arxiv.org/pdf/0811.2177 for
  details.

  Attributes:
    num_samples: The number of splits to use.
    threshold_estimation_frac: The fraction of data to use for computing the
      threshold. The rest will be used for computing the bound.
    seed: The seed for the random number generator. If None, a seed will be
      chosen non-deterministically.
  """

  num_samples: int = 100
  threshold_estimation_frac: float = 0.5
  seed: int | None = None



AuditAllThresholdsMethod: TypeAlias = Callable[
    [
        'CanaryScoreAuditor',  # self
        float,  # significance
        float,  # delta
        bool,  # one_sided
        float | None,  # threshold or None for all thresholds
    ],
    tuple[float, float],
]


def _one_run_p_value(
    m: int, n_guess: int, n_correct: int, eps: float, delta: float
) -> float:
  """Computes p-value for one-shot audit.

  See https://arxiv.org/pdf/2305.08846 for details.

  Args:
    m: The number of canaries.
    n_guess: The number of guesses.
    n_correct: The number of correct guesses.
    eps: The epsilon value to compute the p-value for.
    delta: The delta value to compute the p-value for.

  Returns:
    The p-value for the one-shot audit.
  """
  q = _logistic(eps)
  beta = _binom.sf(n_correct - 1, n_guess, q)
  if delta == 0:
    return beta

  i_vals = np.arange(1, n_correct + 1)
  cum_sums = _binom.sf(n_correct - i_vals - 1, n_guess, q) - beta
  alpha = np.max(cum_sums / i_vals, initial=0)

  return min(beta + alpha * delta * 2 * m, 1)


def _epsilon_one_run(
    eps_lo: float,
    m: int,
    n_guess: int,
    n_correct: int,
    significance: float,
    delta: float,
) -> float:
  """Computes epsilon bound for one-shot audit for a given n_guess/n_correct.

  See https://arxiv.org/pdf/2305.08846 for details.

  Args:
    eps_lo: Smallest epsilon value to consider.
    m: The number of canaries.
    n_guess: The number of guesses.
    n_correct: The number of correct guesses.
    significance: The significance level.
    delta: DP delta parameter.

  Returns:
    The highest epsilon for which the (epsilon, delta) claim is falsified, or
    eps_lo if the claim is not falsified for any eps >= eps_lo.
  """
  if _one_run_p_value(m, n_guess, n_correct, eps_lo, 0) > significance:
    # Cheap fail fast: If even with delta=0 we can't reject the null at eps_lo,
    # we certainly can't reject it with delta > 0.
    return eps_lo

  def audit_objective(eps):
    # Returns a positive value if the (epsilon, delta) claim is falsified.
    return significance - _one_run_p_value(m, n_guess, n_correct, eps, delta)

  if audit_objective(eps_lo) <= 0:
    return eps_lo

  eps_hi = max(1.0, 2 * eps_lo)
  while audit_objective(eps_hi) > 0:
    eps_lo, eps_hi = eps_hi, eps_hi * 2

  return scipy.optimize.brentq(audit_objective, eps_lo, eps_hi, xtol=1e-6)


@functools.lru_cache(maxsize=1000)
def _gaussian_dp_blow_up_inverse(
    eps: float, delta: float
) -> Callable[[float], float]:
  """Computes the inverse of the Gaussian differential privacy blow-up."""
  sigma = dp_accounting.get_sigma_gaussian(eps, delta)
  if sigma == 0:
    return lambda x: float(x == 1.0)
  else:
    return lambda x: _norm.cdf(_norm.ppf(x) - 1 / sigma)


def _epsilon_one_run_fdp(
    eps_lo: float,
    m: int,
    n_guess: int,
    n_correct: int,
    significance: float,
    delta: float,
) -> float:
  """Computes the epsilon bound for a given number of true and false positives.

  See https://arxiv.org/pdf/2410.22235 for details.

  Args:
    eps_lo: Smallest epsilon value to consider.
    m: The number of canaries.
    n_guess: The number of guesses.
    n_correct: The number of correct guesses.
    significance: The significance level.
    delta: DP delta parameter.

  Returns:
    The highest epsilon for which the (epsilon, delta) claim is falsified, or
    eps_lo if the claim is not falsified for any eps >= eps_lo.
  """

  def audit_objective(eps):
    # Returns a positive value if attack scores falsify the f-DP claim.
    # See 'audit_rh_with_cap' in https://arxiv.org/pdf/2410.22235.
    blow_up_inv_fn = _gaussian_dp_blow_up_inverse(eps, delta)

    r = significance * n_correct / m
    h = significance * (n_guess - n_correct) / m

    for i in range(n_correct - 1, -1, -1):
      h_new = max(h, blow_up_inv_fn(r))
      if h == h_new:
        # If h is unchanged, then neither h nor r can ever change afterward.
        break
      r_new = min(r + (i / (n_guess - i)) * (h_new - h), 1.0)
      h, r = h_new, r_new

    return r + h - n_guess / m

  if audit_objective(eps_lo) <= 0:
    return eps_lo

  eps_hi = max(1.0, 2 * eps_lo)
  while audit_objective(eps_hi) > 0:
    eps_lo, eps_hi = eps_hi, eps_hi * 2

  return scipy.optimize.brentq(audit_objective, eps_lo, eps_hi, xtol=1e-6)



[docs]
@dataclasses.dataclass(frozen=True)
class BootstrapParams:
  """Parameters for bootstrapping.

  Several methods in this library that compute a privacy metric (e.g., AUROC,
  epsilon) optionally perform bootstrapping to estimate quantiles of the metric.
  The held-in and held-out canary scores are resampled with replacement, and
  metric is estimated from the resampled data. Repeating this sampling procedure
  many times produces an empirical distribution of the metric, from which we can
  estimate quantiles. This class configures the number of resamples, the
  quantiles to report, and the seed for the random number generator.

  Optionally performs bias correction and acceleration for the bootstrap
  [B. Efron, "Bootstrap Confidence Intervals", Statist. Sci. 2(3), 189-228
  (1987)]. Be aware that acceleration requires jackknife resampling, which
  computes the function n times for n total canaries.

  Attributes:
    num_samples: The number of times to resample the canary scores.
    quantiles: An array-like of quantiles to report. E.g., for a 95% confidence
      interval, use (0.025, 0.975).
    bias_correction: Whether to use bias correction.
    acceleration: Whether to use acceleration.
    seed: The seed for the random number generator.
  """

  num_samples: int = 1000
  quantiles: np.typing.ArrayLike = (0.025, 0.975)
  bias_correction: bool = True
  acceleration: bool = False
  seed: int | None = None

  def __post_init__(self):
    quantile_arr = np.asarray(self.quantiles)
    if quantile_arr.size == 0:
      raise ValueError('quantiles cannot be empty.')
    if not np.all((0 < quantile_arr) & (quantile_arr < 1)):
      raise ValueError(f'quantiles must be in (0, 1), got {self.quantiles}.')
    if self.acceleration and not self.bias_correction:
      raise ValueError('Cannot use acceleration without bias correction.')


[docs]
  @classmethod
  def confidence_interval(
      cls,
      num_samples: int = 1000,
      confidence: float = 0.95,
      seed: int | None = None,
  ) -> BootstrapParams:
    """Creates a BootstrapParams object for a confidence interval.

    Args:
      num_samples: The number of times to resample the canary scores.
      confidence: The desired confidence level.
      seed: The seed for the random number generator.

    Returns:
      A BootstrapParams object for computing a confidence interval.
    """
    if not 0 < confidence < 1:
      raise ValueError(f'confidence must be in (0, 1), got {confidence}.')
    significance = 1 - confidence
    quantiles = (significance / 2, 1 - significance / 2)
    return cls(num_samples=num_samples, quantiles=quantiles, seed=seed)




def _log_sub(x, y):
  # Stable computation of log(exp(x) - exp(y)).
  if np.any(y > x):
    raise ValueError(f'y must be less than or equal to x, got y={y} and x={x}.')
  with np.errstate(divide='ignore'):  # Okay to return -np.inf if x == y.
    return x + np.log1p(-np.exp(y - x))


def _clopper_pearson_upper(
    k: int | np.ndarray, n: int, significance: float
) -> np.ndarray:
  """Computes Clopper-Pearson one-sided upper binomial confidence interval.

  Args:
    k: The number of successes.
    n: The number of trials.
    significance: Allowed probability of failure (one minus confidence).

  Returns:
    A value p such that the probability of observing k or fewer successes out of
    n Bernoulli(p) trials is approximately significance.
  """
  return np.where(
      k < n, scipy.stats.beta.ppf(1 - significance, k + 1, n - k), 1.0
  )


def _pareto_frontier(points: np.ndarray) -> np.ndarray:
  """Computes the indices of the pareto frontier of a piecewise linear function.

  Given a piecewise linear function defined by a sequence of points, computes
  the set of points that are not weakly linearly dominated by any pair of outer
  points. That is, given a list of points (sorted by x coordinate), retain only
  points (x_i, y_i) for which there do not exist j < i and k > i and number a
  with 0 <= a <= 1 such that x_i = (1-a)x_j + ax_k and y_i <= (1-a)y_j + ay_k.

  The algorithm iteratively discards points that are dominated by their
  neighbors. Each iteration uses fast vectorized operations. It could be O(N^2)
  in pathological cases, but in practice it is very fast. If it needs to be
  optimized, Gemini suggests using a block-based divide and conquer approach:
  run the algorithm on blocks of about 1000 points and then run it again on the
  merged results.

  Args:
    points: An array of shape (N, 2) with N >= 2 containing the vertices
      defining the segments of the piecewise linear function, sorted by x
      coordinate.

  Returns:
    A numpy array of length M with 2 <= M <= N, containing the indices of the
    vertices on the pareto frontier.
  """
  if points.ndim != 2 or points.shape[0] < 2 or points.shape[1] != 2:
    raise ValueError(f'Expected at least two 2D points, got {points.shape}.')

  if not np.all(points[:-1, 0] <= points[1:, 0]):
    raise ValueError('Expected points to be sorted by x-coordinate.')

  indices = np.arange(points.shape[0])
  while True:
    if len(indices) <= 2:
      break

    diff = np.diff(points[indices], axis=0)
    cross_product = diff[:-1, 1] * diff[1:, 0] - diff[1:, 1] * diff[:-1, 0]
    dominated_mask = cross_product <= 0

    # If no points are dominated in this pass, the hull is stable.
    if not np.any(dominated_mask):
      break

    keep_mask = np.r_[True, ~dominated_mask, True]
    indices = indices[keep_mask]

  return indices


def _get_tn_fn_counts(
    in_canary_scores: np.typing.ArrayLike,
    out_canary_scores: np.typing.ArrayLike,
) -> tuple[np.ndarray, np.ndarray, np.ndarray]:
  """Computes true negative and false negative counts at each threshold.

  Args:
    in_canary_scores: Attack scores of held-in canaries.
    out_canary_scores: Attack scores of held-out canaries.

  Returns:
    A tuple (thresholds, true negative counts, false negative counts) for each
    threshold.
  """
  in_scores = np.asarray(in_canary_scores)
  out_scores = np.asarray(out_canary_scores)

  if in_scores.size == 0 and out_scores.size == 0:
    raise ValueError(
        'At least one of the canary score arrays should be non-empty.'
    )

  # Get the unique, sorted thresholds from both arrays.
  unique_scores_sorted = np.union1d(in_scores, out_scores)

  # Append np.inf to ensure thresholds > max_score are considered.
  thresholds = np.concatenate((unique_scores_sorted, [np.inf]))

  # Sort the score arrays for faster search.
  in_sorted = np.sort(in_scores)
  out_sorted = np.sort(out_scores)

  # For each threshold, find the index where it would be inserted to maintain
  # order. With side='left', this index is the first location with a value >=
  # threshold.
  fn_counts = np.searchsorted(in_sorted, thresholds, side='left')
  tn_counts = np.searchsorted(out_sorted, thresholds, side='left')

  counts = np.stack([fn_counts, tn_counts], axis=1)
  indices = _pareto_frontier(counts)
  return thresholds[indices], tn_counts[indices], fn_counts[indices]


def _tpr_at_given_fpr(
    fpr: np.typing.ArrayLike,
    tp_counts: np.ndarray,
    fp_counts: np.ndarray,
) -> np.ndarray | float:
  """Computes maximum TPR at a given FPR."""
  fpr = np.asarray(fpr)

  if not np.all((0 <= fpr) & (fpr <= 1)):
    raise ValueError(f'fpr must be in [0, 1], got {fpr}.')

  n_pos = tp_counts[-1]
  n_neg = fp_counts[-1]

  target_fp_count = n_neg * fpr

  # Find the index of the threshold where the false positive count is just
  # greater than the target_fp_count. In case fpr is 1.0, we can use the index
  # of the last threshold.
  threshold = np.minimum(
      np.searchsorted(fp_counts, target_fp_count, side='right'),
      np.size(fp_counts) - 1,
  )

  # Interpolate between the two thresholds.
  fp_left = fp_counts[threshold - 1]
  fp_right = fp_counts[threshold]
  q = (target_fp_count - fp_left) / (fp_right - fp_left)

  tp_left = tp_counts[threshold - 1]
  tp_right = tp_counts[threshold]
  return (tp_left + q * (tp_right - tp_left)) / n_pos


def _epsilon_raw_counts_helper(
    tp_counts: np.ndarray,
    fp_counts: np.ndarray,
    min_count: int,
    delta: float,
) -> float:
  """Estimates epsilon given true and false counts at each threshold."""
  n_pos = tp_counts[-1]
  n_neg = fp_counts[-1]

  if min_count >= n_neg:
    return 0.0

  min_fpr = min_count / n_neg
  tpr_at_min_fpr = _tpr_at_given_fpr(min_fpr, tp_counts, fp_counts)

  if delta == 0:
    return np.log(tpr_at_min_fpr / min_fpr)

  # Add the point (tpr_at_min_fpr, min_fpr). This point often corresponds to the
  # optimal epsilon, but it is not part of the truncated convex hull.
  if tpr_at_min_fpr > delta:
    initial_eps = max(0, np.log(tpr_at_min_fpr - delta) - np.log(min_fpr))
  else:
    initial_eps = 0

  tpr, fpr = tp_counts / n_pos, fp_counts / n_neg
  valid = (fp_counts >= min_count) & (tpr > delta)
  eps = np.log(tpr[valid] - delta) - np.log(fpr[valid])
  return np.max(eps, initial=initial_eps)


def _random_partition(
    scores: np.ndarray,
    rng: np.random.Generator,
    p: float,
) -> tuple[np.ndarray, np.ndarray]:
  """Randomly splits a score array into two parts."""
  if not 0 < p < 1:
    raise ValueError(f'p must be in (0, 1), got {p}.')

  perm = rng.permutation(len(scores))
  split_idx = int(len(scores) * p)
  return scores[perm[:split_idx]], scores[perm[split_idx:]]



[docs]
class CanaryScoreAuditor:
  """Class for auditing privacy based on attack scores.

  To use this library, create a CanaryScoreAuditor providing the attack scores
  of held-in and held-out canaries. Attack scores can be any value such that
  that held-in canaries are expected to have higher scores, for example the
  log-likelihood or the likelihood ratio to the pretrained model. Then the
  auditor can be used to compute privacy metrics, including various epsilon
  lower bounds from the literature, the maximum TPR at a given FPR, and the area
  under the receiver operating characteristic (ROC) curve.

  Example Usage:
    >>> out_canary_scores = np.arange(100)
    >>> in_canary_scores = np.arange(100) + 9.5
    >>> auditor = CanaryScoreAuditor(in_canary_scores, out_canary_scores)
    >>> tpr_at_low_fpr = auditor.tpr_at_given_fpr(0.01)
    >>> float(round(tpr_at_low_fpr, 2))
    0.11
    >>> auroc = auditor.attack_auroc()
    >>> float(auroc)
    0.595
  """

  def __init__(
      self,
      in_canary_scores: np.typing.ArrayLike,
      out_canary_scores: np.typing.ArrayLike,
  ):
    """Initializes the CanaryScoreAuditor.

    IMPORTANT: We consider decision rules that classify examples that score
    *higher* than the threshold as "in". If held-in canaries are expected to
    have lower scores than held-out canaries, then negate the score before
    constructing the auditor.

    Args:
      in_canary_scores: Attack scores of held-in canaries.
      out_canary_scores: Attack scores of held-out canaries.
    """
    self._in_canary_scores = np.asarray(in_canary_scores)
    self._out_canary_scores = np.asarray(out_canary_scores)
    if self._in_canary_scores.size == 0:
      raise ValueError('in_canary_scores must be non-empty.')
    if self._in_canary_scores.ndim != 1:
      raise ValueError(
          'in_canary_scores must be 1-dimensional, got'
          f' {self._in_canary_scores.ndim}.'
      )
    if self._out_canary_scores.size == 0:
      raise ValueError('out_canary_scores must be non-empty.')
    if self._out_canary_scores.ndim != 1:
      raise ValueError(
          'out_canary_scores must be 1-dimensional, got'
          f' {self._out_canary_scores.ndim}.'
      )

    self._thresholds, self._tn_counts, self._fn_counts = _get_tn_fn_counts(
        self._in_canary_scores, self._out_canary_scores
    )

  def _get_tp_counts(self) -> np.ndarray:
    """Returns the true positive counts at each threshold."""
    # Reverse the order so they are increasing.
    return (self._fn_counts[-1] - self._fn_counts)[::-1]

  def _get_fp_counts(self) -> np.ndarray:
    """Returns the false positive counts at each threshold."""
    # Reverse the order so they are increasing.
    return (self._tn_counts[-1] - self._tn_counts)[::-1]

  def _bootstrap(
      self,
      fn: Callable[['CanaryScoreAuditor'], float],
      params: BootstrapParams,
  ) -> np.ndarray:
    """Computes bootstrapped quantiles for a function, optionally applying BCa.

    Args:
      fn: A function of a CanaryScoreAuditor returning a scalar.
      params: The parameters for the bootstrap.

    Returns:
      An array of bootstrapped quantiles.
    """
    rng = np.random.default_rng(seed=params.seed)
    seeds = rng.integers(np.iinfo(np.int64).max, size=params.num_samples)

    # Alias for readability.
    in_scores = self._in_canary_scores
    out_scores = self._out_canary_scores

    def get_value(seed):
      inner_rng = np.random.default_rng(seed=seed)
      in_samples = inner_rng.choice(in_scores, size=in_scores.size)
      out_samples = inner_rng.choice(out_scores, size=out_scores.size)
      return fn(CanaryScoreAuditor(in_samples, out_samples))

    with futures.ThreadPoolExecutor() as pool:
      values = list(pool.map(get_value, seeds))

    if not params.bias_correction:
      return np.quantile(values, params.quantiles, method='linear')

    full_estimate = fn(self)
    # Use Laplace smoothing here to avoid computing ppf of 0 or 1.
    prop_less = (np.sum(values < full_estimate) + 1) / (params.num_samples + 2)
    z0 = _norm.ppf(prop_less)

    if params.acceleration:

      def eval_delete_in(i):
        return fn(CanaryScoreAuditor(np.delete(in_scores, i), out_scores))

      def eval_delete_out(i):
        return fn(CanaryScoreAuditor(in_scores, np.delete(out_scores, i)))

      with futures.ThreadPoolExecutor() as pool:
        jackknife_estimates = list(
            pool.map(eval_delete_in, range(len(in_scores)))
        )
        jackknife_estimates.extend(
            pool.map(eval_delete_out, range(len(out_scores)))
        )
      jackknife_mean = np.mean(jackknife_estimates)
      num = np.sum((jackknife_mean - jackknife_estimates) ** 3)
      denom = 6 * (np.sum((jackknife_mean - jackknife_estimates) ** 2)) ** 1.5
      accel = 0 if denom == 0 else num / denom
    else:
      accel = 0

    z_quantiles = _norm.ppf(params.quantiles)
    num = z0 + z_quantiles
    denom = 1 - accel * num
    corrected_quantiles = _norm.cdf(z0 + num / denom)
    return np.quantile(values, corrected_quantiles, method='linear')

  def _epsilon_clopper_pearson_all_thresholds(
      self,
      significance: float,
      delta: float,
      one_sided: bool,
      threshold: float | None = None,
  ) -> tuple[float, float]:
    """Estimates epsilon with C-P bound at one or all thresholds."""
    if threshold is None:
      fn_counts = self._fn_counts
      tn_counts = self._tn_counts
    else:
      fn_counts = np.array([np.sum(self._in_canary_scores < threshold)])
      tn_counts = np.array([np.sum(self._out_canary_scores < threshold)])

    n_pos = self._fn_counts[-1]
    n_neg = self._tn_counts[-1]

    fnr_ubs = _clopper_pearson_upper(fn_counts, n_pos, significance / 2)
    fp_counts = n_neg - tn_counts
    fpr_ubs = _clopper_pearson_upper(fp_counts, n_neg, significance / 2)

    def eps_and_idx(fnr_ubs, fpr_ubs):
      tpr_lbs = 1 - fnr_ubs
      valid = np.flatnonzero(tpr_lbs > delta)
      if valid.size == 0:
        return 0.0, -1
      eps_vals = np.log(tpr_lbs[valid] - delta) - np.log(fpr_ubs[valid])
      subidx = np.argmax(eps_vals)
      return max(0.0, eps_vals[subidx]), valid[subidx]

    eps, idx = eps_and_idx(fnr_ubs, fpr_ubs)

    if not one_sided:
      # pylint: disable=arguments-out-of-order
      new_eps, new_idx = eps_and_idx(fpr_ubs, fnr_ubs)
      # pylint: enable=arguments-out-of-order
      if new_eps > eps:
        idx = new_idx
        eps = new_eps

    if threshold is None:
      threshold = self._thresholds[idx]

    return eps, threshold


[docs]
  def epsilon_clopper_pearson(
      self,
      significance: float,
      delta: float = 0,
      one_sided: bool = True,
      *,
      threshold_strategy: ThresholdStrategy = Bonferroni(),
  ) -> float:
    """Finds epsilon lower bound from scores of held-in/held-out canaries.

    Described in https://arxiv.org/pdf/2101.04535.

    Args:
      significance: Allowed probability of failure (one minus confidence).
      delta: Approximate DP delta.
      one_sided: Whether to use only TPR/FPR (vs. max of TPR/FPR and TNR/FNR).
      threshold_strategy: How to select the threshold to use for the epsilon
        estimate.

    Returns:
      Optimal epsilon lower bound.
    """
    if not 0 < significance < 0.5:
      raise ValueError(f'significance must be in (0, 0.5), got {significance}.')
    if not 0 <= delta <= 1:
      raise ValueError(f'delta must be in [0, 1], got {delta}.')

    return self._audit_with_threshold_strategy(
        threshold_strategy,
        CanaryScoreAuditor._epsilon_clopper_pearson_all_thresholds,
        significance,
        delta,
        one_sided,
    )



[docs]
  def epsilon_raw_counts(
      self,
      min_count: int = 50,
      delta: float = 0,
      one_sided: bool = True,
      *,
      bootstrap_params: BootstrapParams | None = None,
  ) -> float | np.ndarray:
    """Estimates epsilon from raw count statistics of seen/unseen canaries.

    `min_count` is the minimum number of FP (or FN, if not one_sided) required
    to consider a threshold. If `min_count` is too high relative to the number
    of canaries, the estimate will be biased towards zero. If it is too low, the
    estimate will have high variance.

    Args:
      min_count: Only consider thresholds with this many TP/FP (TN/FN).
      delta: Approximate DP delta.
      one_sided: Whether to use only TPR/FPR (vs. max of TPR/FPR and TNR/FNR).
      bootstrap_params: If provided, compute and return bootstrapped quantiles
        of the estimate. Note that this should not be interpreted as a formal
        confidence interval on the true epsilon, merely a confidence interval of
        the estimate.

    Returns:
      Epsilon estimate ln(TPR/FPR).
    """
    if min_count < 1:
      raise ValueError(f'min_count must be positive, got {min_count}.')
    if not 0 <= delta <= 1:
      raise ValueError(f'delta must be in [0, 1], got {delta}.')

    if bootstrap_params is not None:
      return self._bootstrap(
          lambda a: a.epsilon_raw_counts(min_count, delta, one_sided),
          bootstrap_params,
      )

    eps = _epsilon_raw_counts_helper(
        self._get_tp_counts(), self._get_fp_counts(), min_count, delta
    )

    if not one_sided:
      # For a two-sided estimate, also consider the reverse decision rule, where
      # examples with scores *lower* than the threshold are classified as "in".

      eps_reverse = _epsilon_raw_counts_helper(
          self._tn_counts, self._fn_counts, min_count, delta
      )
      eps = max(eps, eps_reverse)

    return max(eps, 0)



[docs]
  def tpr_at_given_fpr(
      self,
      fpr: np.typing.ArrayLike,
      *,
      bootstrap_params: BootstrapParams | None = None,
  ) -> np.ndarray | float:
    """Computes maximum TPR at a given FPR.

    Args:
      fpr: The desired false positive rate. May be a scalar, or an array of
        independent FPR values, in which case an array of the same shape is
        returned with the TPR at each FPR.
      bootstrap_params: If provided, compute and return bootstrapped quantiles
        of the TPR. `fpr` must be a scalar in this case.

    Returns:
      The maximum true positive rate at the given false positive rate,
      allowing classifiers that randomize between two thresholds.
    """
    fpr = np.asarray(fpr)
    if not np.all((0 <= fpr) & (fpr <= 1)):
      raise ValueError(f'fpr must be in [0, 1], got {fpr}.')

    if bootstrap_params is not None:
      if fpr.ndim > 0:
        raise ValueError(
            f'fpr must be a scalar for bootstrap, got shape {fpr.shape}.'
        )
      return self._bootstrap(
          lambda auditor: auditor.tpr_at_given_fpr(fpr),
          bootstrap_params,
      )

    return _tpr_at_given_fpr(fpr, self._get_tp_counts(), self._get_fp_counts())



[docs]
  def attack_auroc(
      self,
      *,
      bootstrap_params: BootstrapParams | None = None,
  ) -> float | np.ndarray:
    """Computes the area under the ROC curve from the attack scores.

    Args:
      bootstrap_params: If provided, compute and return bootstrapped quantiles
        of the AUROC.

    Returns:
      The area under the ROC curve from the attack scores, allowing classifiers
      that randomize between two thresholds.
    """
    if bootstrap_params is not None:
      return self._bootstrap(CanaryScoreAuditor.attack_auroc, bootstrap_params)

    # Calculate AUROC using the trapezoidal rule. Since we have TN counts and FN
    # counts handy, we're actually computing the area under the curve of the TNR
    # as a function of FNR, which is equivalent.
    tnr = self._tn_counts / self._tn_counts[-1]
    fnr = self._fn_counts / self._fn_counts[-1]
    return 0.5 * np.dot(tnr[:-1] + tnr[1:], fnr[1:] - fnr[:-1])



[docs]
  def max_accuracy(
      self,
      *,
      prevalence: float | None = None,
      significance: float | None = None,
  ) -> float:
    """Computes the maximum accuracy achievable by a threshold-based classifier.

    Args:
      prevalence: The prevalence of the positive class. If not provided, the
        prevalence is taken to be the proportion of in-canary examples to the
        total.
      significance: If provided, compute and return the high probability upper
        bound on the maximum accuracy with this allowable probability of failure
        (one minus confidence).

    Returns:
      The maximum accuracy.
    """
    n_pos = self._fn_counts[-1]
    n_neg = self._tn_counts[-1]

    if prevalence is None:
      prevalence = n_pos / (n_pos + n_neg)

    tp_counts = n_pos - self._fn_counts
    if significance is None:
      tnr_ubs = self._tn_counts / n_neg
      tpr_ubs = tp_counts / n_pos
    else:
      tnr_ubs = _clopper_pearson_upper(self._tn_counts, n_neg, significance / 2)
      tpr_ubs = _clopper_pearson_upper(tp_counts, n_pos, significance / 2)

    return np.max(tpr_ubs * prevalence + tnr_ubs * (1 - prevalence))



[docs]
  def epsilon_from_gdp(
      self,
      significance: float,
      delta: float,
      eps_tol: float = 1e-6,
  ) -> float:
    """Calculates the an estimate for epsilon with GDP.

    This is the method used in https://arxiv.org/pdf/2302.07956 and described in
    https://arxiv.org/pdf/2406.04827.

    Args:
      significance: Allowed probability of failure (one minus confidence).
      delta: Approximate DP delta. Must be in (0, 1].
      eps_tol: The tolerance for epsilon (the privacy parameter). Defaults to
        1e-6.

    Returns:
        The estimated epsilon.
    """
    if not 0 < significance < 0.5:
      raise ValueError(f'significance must be in (0, 0.5), got {significance}.')
    if not 0 < delta <= 1:
      raise ValueError(f'delta must be in (0, 1], got {delta}.')
    if eps_tol <= 0:
      raise ValueError(f'eps_tol must be positive, got {eps_tol}.')

    n_pos = self._fn_counts[-1]
    n_neg = self._tn_counts[-1]

    n = len(self._fn_counts)

    # Apply Bonferroni correction over 2 * n hypotheses.
    fnr_ubs = _clopper_pearson_upper(
        self._fn_counts, n_pos, significance / (2 * n)
    )
    fp_counts = n_neg - self._tn_counts
    fpr_ubs = _clopper_pearson_upper(fp_counts, n_neg, significance / (2 * n))

    bounds = np.stack([fpr_ubs, fnr_ubs], axis=1)

    # Filter any thresholds where TNR or TPR is too small.
    bounds = bounds[np.max(bounds, axis=1) < 1 - delta]
    if not bounds.size:
      return 0

    # Eq. 6 in https://arxiv.org/abs/1905.02383. If FPR + FNR is too large,
    # the bound still holds in reverse (by switching D and D'), which has the
    # effect of making mu from Eq. 6 negative. Hence we look for the maximum
    # absolute value of mu.
    max_mu = np.max(np.abs(_norm.isf(bounds[:, 0]) - _norm.ppf(bounds[:, 1])))
    if max_mu == 0:
      return 0

    # Conversion of GDP to (eps, delta)-DP. Corollary 2.13 in
    # https://arxiv.org/abs/1905.02383, in the log domain for stability.
    def delta_gap(eps):
      return _log_sub(
          _norm.logcdf(-(eps / max_mu) + max_mu / 2),
          eps + _norm.logcdf(-(eps / max_mu) - max_mu / 2),
      ) - np.log(delta)

    eps_lb, eps_ub = 0, 100
    if delta_gap(eps_lb) <= 0:
      return eps_lb
    if delta_gap(eps_ub) >= 0:
      return eps_ub

    return scipy.optimize.brentq(delta_gap, eps_lb, eps_ub, xtol=eps_tol)


  def _epsilon_one_run_all_thresholds(
      self,
      significance: float,
      delta: float,
      one_sided: bool,
      threshold: float | None = None,
      use_fdp: bool = False,
  ) -> tuple[float, float]:
    """Computes the epsilon bound at one or all thresholds."""
    if not one_sided:
      raise ValueError('one_sided must be True.')

    if use_fdp:
      audit_fn = _epsilon_one_run_fdp
    else:
      audit_fn = _epsilon_one_run

    n_pos = self._fn_counts[-1]
    n_neg = self._tn_counts[-1]
    m = n_pos + n_neg

    if threshold is not None:
      tp = np.sum(self._in_canary_scores >= threshold)
      fp = np.sum(self._out_canary_scores >= threshold)
      return audit_fn(0, m, tp + fp, tp, significance, delta), threshold

    tp_counts = n_pos - self._fn_counts
    fp_counts = n_neg - self._tn_counts

    # As a heuristic, search in order of decreasing precision bound, so later
    # thresholds are likely to be ruled out without a full search over eps.
    total_counts = tp_counts + fp_counts
    prec_lbs = 1.0 - _clopper_pearson_upper(
        fp_counts, total_counts, significance
    )
    sorted_indices = np.argsort(-prec_lbs)

    best_eps = 0
    best_idx = -1
    best_q = 0.5

    for idx in sorted_indices:
      n_guess = total_counts[idx]
      n_correct = tp_counts[idx]

      if n_guess == 0 or n_correct / n_guess <= best_q:
        # Raw precision at this threshold is already less than the best so far.
        continue

      new_eps = audit_fn(best_eps, m, n_guess, n_correct, significance, delta)
      if new_eps > best_eps:
        best_eps = new_eps
        best_idx = idx
        best_q = _logistic(best_eps)

    if threshold is None:
      threshold = self._thresholds[best_idx]

    return best_eps, threshold


[docs]
  def epsilon_one_run(
      self,
      significance: float,
      delta: float,
      one_sided: bool = True,
      *,
      threshold_strategy: ThresholdStrategy = Bonferroni(),
  ) -> float:
    r"""Computes lower bound on epsilon for a single round of auditing.

    This is an implementation of the method from Steinke et al. 2024, "Privacy
    Auditing in One (1) Training Run": https://arxiv.org/abs/2305.08846.

    Currently only one-sided hypotheses are supported ($k_- = 0$).

    Args:
      significance: Allowed probability of failure (one minus confidence).
      delta: Approximate DP delta.
      one_sided: Whether to consider only hypotheses with ($k_- = 0$). Must be
        True.
      threshold_strategy: How to select the threshold to use for the epsilon
        estimate.

    Returns:
      The estimated epsilon lower bound.
    """
    if not 0 < significance < 1.0:
      raise ValueError(f'significance must be in (0, 1.0), got {significance}.')
    if not 0 < delta <= 1:
      raise ValueError(f'delta must be in (0, 1], got {delta}.')
    if not one_sided:
      raise ValueError('one_sided must be True.')

    return self._audit_with_threshold_strategy(
        threshold_strategy,
        CanaryScoreAuditor._epsilon_one_run_all_thresholds,
        significance,
        delta,
        one_sided,
    )



[docs]
  def epsilon_one_run_fdp(
      self,
      significance: float,
      delta: float,
      one_sided: bool = True,
      *,
      threshold_strategy: ThresholdStrategy = Bonferroni(),
  ) -> float:
    """Computes lower bound on epsilon for a single round of auditing.

    This is an implementation of the method from Mahloujifar et al. 2024,
    "Auditing f-Differential Privacy in One Run":
    https://arxiv.org/pdf/2410.22235.

    Currently only one-sided hypotheses are supported ($k_- = 0$).

    Args:
      significance: Allowed probability of failure (one minus confidence).
      delta: Approximate DP delta.
      one_sided: Whether to consider only hypotheses with ($k_- = 0$). Must be
        True.
      threshold_strategy: How to select the threshold to use for the epsilon
        estimate.

    Returns:
      The estimated epsilon lower bound.
    """
    if not 0 < significance < 1.0:
      raise ValueError(f'significance must be in (0, 1.0), got {significance}.')
    if not 0 < delta <= 1:
      raise ValueError(f'delta must be in (0, 1], got {delta}.')
    if not one_sided:
      raise ValueError('one_sided must be True.')

    return self._audit_with_threshold_strategy(
        threshold_strategy,
        functools.partial(
            CanaryScoreAuditor._epsilon_one_run_all_thresholds,
            use_fdp=True,
        ),
        significance,
        delta,
        one_sided,
    )


  def _audit_with_threshold_strategy(
      self,
      threshold_strategy: ThresholdStrategy,
      audit_all_thresholds_method: AuditAllThresholdsMethod,
      significance: float,
      delta: float,
      one_sided: bool = True,
  ) -> float:
    """Computes the epsilon bound for a given threshold strategy."""

    def single_split(frac: float, alpha: float, seed: int | None) -> float:
      rng = np.random.default_rng(seed)
      in_scores_1, in_scores_2 = _random_partition(
          self._in_canary_scores, rng, frac
      )
      out_scores_1, out_scores_2 = _random_partition(
          self._out_canary_scores, rng, frac
      )
      auditor_1 = CanaryScoreAuditor(in_scores_1, out_scores_1)
      _, threshold = audit_all_thresholds_method(
          auditor_1, alpha, delta, one_sided, None
      )
      auditor_2 = CanaryScoreAuditor(in_scores_2, out_scores_2)
      eps, _ = audit_all_thresholds_method(
          auditor_2, alpha, delta, one_sided, threshold
      )
      return eps

    match threshold_strategy:
      case Explicit(threshold):
        eps, _ = audit_all_thresholds_method(
            self, significance, delta, one_sided, threshold
        )
      case Split(threshold_estimation_frac=frac, seed=seed):
        eps = single_split(frac, significance, seed)
      case MultiSplit(
          num_samples=num_samples,
          threshold_estimation_frac=frac,
          seed=seed,
      ):
        rng = np.random.default_rng(seed=seed)
        seeds = rng.integers(np.iinfo(np.int64).max, size=num_samples)
        with futures.ThreadPoolExecutor() as pool:
          values = list(
              pool.map(
                  functools.partial(single_split, frac, significance / 2),
                  seeds,
              )
          )
        eps = np.median(values)
      case Bonferroni():
        significance_bonferroni = significance / len(self._fn_counts)
        eps, _ = audit_all_thresholds_method(
            self, significance_bonferroni, delta, one_sided, None
        )
      case _:
        raise ValueError(
            f'Unsupported threshold strategy: {threshold_strategy}.'
        )

    return eps