Source code for jax_privacy.accounting.calibrate

# coding=utf-8
# Copyright 2025 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Calibrating DP hyper-parameters using the RDP accountant."""

from collections.abc import Callable, Sequence
import math

from jax_privacy.accounting import analysis
import numpy as np
import scipy.optimize


def _solve_calibration(
    fn: Callable[[float], float], x_min: float, x_max: float, tol: float
) -> float:
  """Find an x in [x_min, x_max] that minimizes fn(x) using scipy.optimize."""
  opt_result = scipy.optimize.minimize_scalar(
      fn,
      bounds=(x_min, x_max),
      method='bounded',
      options={'xatol': tol},
  )
  assert opt_result.success

  return float(opt_result.x)


[docs] def calibrate_num_updates( *, target_epsilon: float, accountant: analysis.DpTrainingAccountant, noise_multipliers: float | Sequence[tuple[int, float]], batch_sizes: int | Sequence[tuple[int, int]], num_samples: int, target_delta: float, examples_per_user: int | None = None, cycle_length: int | None = None, truncated_batch_size: int | None = None, initial_max_updates: int = 4, initial_min_updates: int = 1, tol: float = 0.1, ) -> int: """Computes the number of steps to achieve `target_epsilon`. Args: target_epsilon: The desired final epsilon. accountant: Method of computing the privacy guarantee. noise_multipliers: Noise multiplier. Float or list of pairs (t: int, nm: float) if the noise multiplier changes across updates. 't' indicates update where noise_multiplier is set to 'nm'. batch_sizes: Batch size. Integer or list of pairs (t: int, bs: int) if the noise multiplier changes across updates. 't' indicates step where batch_size is set to 'bs'. num_samples: Number of training examples. target_delta: Desired delta for the returned epsilon. examples_per_user: If multiple examples per user are used, this is the maximum number any user contributes to the training set. cycle_length: If using cyclic Poisson sampling with BandMF, the length of the cycle. truncated_batch_size: If using truncated Poisson sampling, the maximum batch size to truncate to. initial_max_updates: An initial estimate of the number of updates. initial_min_updates: Minimum number of updates. tol: tolerance of the optimizer for the calibration. Returns: Number of updates. """ if not accountant.can_calibrate_steps(): raise ValueError(f'`accountant`={type(accountant)} cannot calibrate steps.') def get_epsilon(num_updates: int) -> float: dp_params = analysis.DpParams( noise_multipliers=noise_multipliers, batch_size=batch_sizes, num_samples=num_samples, delta=target_delta, examples_per_user=examples_per_user, cycle_length=cycle_length, truncated_batch_size=truncated_batch_size, ) return accountant.compute_epsilon(num_updates, dp_params) if get_epsilon(initial_min_updates) > target_epsilon: raise ValueError( 'Epsilon at initial_min_steps is too large. ' 'Try increasing `target_epsilon`.' ) max_steps = initial_max_updates min_steps = initial_min_updates while get_epsilon(max_steps) < target_epsilon: min_steps, max_steps = max_steps, 2 * max_steps error_epsilon = lambda s: np.abs(get_epsilon(int(s)) - target_epsilon) steps = int( math.floor(_solve_calibration(error_epsilon, min_steps, max_steps, tol)) ) if cycle_length is not None and cycle_length != 1: # For BandMF, rounding up to the nearest multiple of cycle length does not # affect the privacy analysis. We should report this rounded up value to # the user so they can get more training steps for the same epsilon. return math.ceil(steps / cycle_length) * cycle_length else: return steps
[docs] def calibrate_noise_multiplier( *, target_epsilon: float, accountant: analysis.DpTrainingAccountant, batch_sizes: int | Sequence[tuple[int, int]], num_updates: int, num_samples: int, target_delta: float, examples_per_user: int | None = None, cycle_length: int | None = None, truncated_batch_size: int | None = None, initial_max_noise: float = 1.0, initial_min_noise: float = 0.0, tol: float = 0.01, ) -> float: """Computes the noise multiplier to achieve `target_epsilon`. Args: target_epsilon: The desired final epsilon. accountant: Method of computing the privacy guarantee. batch_sizes: Batch size. Integer or list of pairs (t: int, bs: int) if the noise multiplier changes across steps. 't' indicates step where batch_size is set to 'bs'. num_updates: Total number of iterations. num_samples: Number of training examples. target_delta: Desired delta for the returned epsilon. examples_per_user: If multiple examples per user are used, this is the maximum number any user contributes to the training set. cycle_length: If using cyclic Poisson sampling with BandMF, the length of the cycle. truncated_batch_size: If using truncated Poisson sampling, the maximum batch size to truncate to. initial_max_noise: An initial estimate of the noise multiplier. initial_min_noise: Minimum noise multiplier. tol: tolerance of the optimizer for the calibration. Returns: Noise multiplier. """ if not accountant.can_calibrate_noise_multipliers(): raise ValueError( f'`accountant`={type(accountant)} cannot calibrate noise multipliers.' ) def get_epsilon(noise_multiplier: float) -> float: dp_params = analysis.DpParams( noise_multipliers=noise_multiplier, batch_size=batch_sizes, num_samples=num_samples, delta=target_delta, examples_per_user=examples_per_user, cycle_length=cycle_length, truncated_batch_size=truncated_batch_size, ) return accountant.compute_epsilon(num_updates, dp_params) max_noise = initial_max_noise min_noise = initial_min_noise while get_epsilon(max_noise) > target_epsilon: min_noise, max_noise = max_noise, 2 * max_noise error_epsilon = lambda s: np.abs(get_epsilon(s) - target_epsilon) noise_multiplier = float( _solve_calibration(error_epsilon, min_noise, max_noise, tol) ) return noise_multiplier
[docs] def calibrate_batch_size( *, target_epsilon: float, accountant: analysis.DpTrainingAccountant, noise_multipliers: float | Sequence[tuple[int, float]], num_updates: int, num_samples: int, target_delta: float, examples_per_user: int | None = None, cycle_length: int | None = None, truncated_batch_size: int | None = None, initial_max_batch_size: int = 8, initial_min_batch_size: int = 1, tol: float = 0.01, ) -> int: """Computes the batch size required to achieve `target_epsilon`. Args: target_epsilon: The desired final epsilon. accountant: Method of computing the privacy guarantee. noise_multipliers: Noise multiplier. Float or list of pairs (t: int, nm: float) if the noise multiplier changes across steps. 't' indicates step where noise_multiplier is set to 'nm'. num_updates: Total number of iterations. num_samples: Number of training examples. target_delta: Desired delta for the returned epsilon. examples_per_user: If multiple examples per user are used, this is the maximum number any user contributes to the training set. cycle_length: If using cyclic Poisson sampling with BandMF, the length of the cycle. truncated_batch_size: If using truncated Poisson sampling, the maximum batch size to truncate to. initial_max_batch_size: An initial estimate of the batch size. initial_min_batch_size: Minimum batch size. tol: tolerance of the optimizer for the calibration. Returns: Batch size. """ if not accountant.can_calibrate_batch_size(): raise ValueError( f'`accountant`={type(accountant)} cannot calibrate batch size.' ) def get_epsilon(batch_size: int) -> float: dp_params = analysis.DpParams( noise_multipliers=noise_multipliers, batch_size=batch_size, num_samples=num_samples, delta=target_delta, examples_per_user=examples_per_user, cycle_length=cycle_length, truncated_batch_size=truncated_batch_size, ) return accountant.compute_epsilon(num_updates, dp_params) max_batch_size = initial_max_batch_size min_batch_size = initial_min_batch_size if get_epsilon(min_batch_size) > target_epsilon: raise ValueError( 'Epsilon at batch size 1 is too large. Try increasing `target_epsilon`.' ) while get_epsilon(max_batch_size) < target_epsilon: min_batch_size, max_batch_size = max_batch_size, 2 * max_batch_size error_epsilon = lambda s: np.abs(get_epsilon(int(s)) - target_epsilon) batch_size = int( math.floor( _solve_calibration(error_epsilon, min_batch_size, max_batch_size, tol) ) ) return batch_size