Source code for jax_privacy.accounting

# Copyright 2026 DeepMind Technologies Limited.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Functions for creating DpEvent objects for common JAX Privacy mechanisms.

Example Usage (Calculating Epsilon for DP-SGD):

  >>> import dp_accounting
  >>> event = dpsgd_event(noise_multiplier=3, iterations=512, sampling_prob=0.1)
  >>> accountant = dp_accounting.pld.PLDAccountant()
  >>> epsilon = accountant.compose(event).get_epsilon(target_delta=1e-6)
  >>> round(epsilon, 2)
  3.77

Example Usage (Calibrating Noise Multiplier for DP-SGD):

  >>> make_event = lambda sigma: dpsgd_event(sigma, 512, sampling_prob=0.1)
  >>> noise_multiplier = dp_accounting.calibrate_dp_mechanism(
  ...     dp_accounting.pld.PLDAccountant,
  ...     make_event,
  ...     target_epsilon=1.0,
  ...     target_delta=1e-6
  ... )
  >>> round(noise_multiplier, 2)
  9.66

Example Usage (Calibrating Number of Iterations for DP-SGD):

  >>> make_event = lambda t: dpsgd_event(3.0, t, sampling_prob=0.1)
  >>> dp_accounting.calibrate_dp_mechanism(
  ...     dp_accounting.pld.PLDAccountant,
  ...     make_event,
  ...     target_epsilon=2.0,
  ...     target_delta=1e-6,
  ...     discrete=True,
  ...     bracket_interval=dp_accounting.LowerEndpointAndGuess(1, 128)
  ... )
  155
"""

import math

import dp_accounting

from . import _validate


def _validate_poisson_args(noise_multiplier, iterations, sampling_prob):
  """Validates common arguments for Poisson-sampled DP events."""
  _validate.non_negative(
      noise_multiplier=noise_multiplier, iterations=iterations
  )
  _validate.in_range(0, 1, sampling_prob=sampling_prob)


def _validate_fixed_args(
    noise_multiplier: float,
    iterations: int,
    dataset_size: int,
    batch_size: int,
    replace: bool,
) -> None:
  """Validates the arguments for fixed-size sampling DP events.

  Args:
    noise_multiplier: Noise multiplier of the Gaussian mechanism.
    iterations: Number of iterations the mechanism is run for.
    dataset_size: Number of examples in the dataset.
    batch_size: Batch size per iteration.
    replace: Whether sampling is done with replacement.
  """
  _validate.non_negative(
      noise_multiplier=noise_multiplier,
      iterations=iterations,
      dataset_size=dataset_size,
      batch_size=batch_size,
  )
  if not replace and batch_size > dataset_size:
    raise ValueError(
        f'Expected {batch_size=} <= {dataset_size=} for replace=False.'
    )
  if replace and dataset_size == 0 and batch_size > 0:
    raise ValueError(
        f'Expected {batch_size=} == 0 for {dataset_size=} with replace=True.'
    )


def _gaussian_event(
    noise_multiplier: float, use_zcdp: bool
) -> dp_accounting.DpEvent:
  """Returns the Gaussian or zCDP DpEvent for the given noise multiplier."""
  if use_zcdp:
    rho = math.inf if noise_multiplier == 0 else 0.5 / noise_multiplier**2
    return dp_accounting.ZCDpEvent(rho)
  else:
    return dp_accounting.GaussianDpEvent(noise_multiplier)



[docs]
def dpsgd_event(
    noise_multiplier: float,
    iterations: int,
    *,
    sampling_prob: float,
    use_zcdp: bool = False,
) -> dp_accounting.DpEvent:
  """Returns the DpEvent for DP-SGD with the given training parameters.

  This mechanism is a composition of poisson-sampled Gaussian mechanisms. See
  this paper for more details:

  * [Deep Learning with Differential Privacy](https://arxiv.org/abs/1607.00133)

  Args:
    noise_multiplier: The noise multiplier of the mechanism.
    iterations: The number of iterations to run the mechanism for.
    sampling_prob: The Poisson sampling probability of the mechanism, i.e., the
      probability an example will be included in each batch.
    use_zcdp: Whether to use generic dp_event.ZCDpEvent instead of specific
      dp_event.GaussianDpEvent. For example, set this to True if using discrtete
      Gaussian noise instead of continuous Gaussian noise.

  Returns:
    A DpEvent object.
  """
  _validate_poisson_args(noise_multiplier, iterations, sampling_prob)
  gaussian = _gaussian_event(noise_multiplier, use_zcdp)
  sampled = dp_accounting.PoissonSampledDpEvent(sampling_prob, gaussian)
  return dp_accounting.SelfComposedDpEvent(sampled, iterations)




[docs]
def fixed_dpsgd_event(
    noise_multiplier: float,
    iterations: int,
    *,
    dataset_size: int,
    batch_size: int,
    replace: bool = False,
    use_zcdp: bool = False,
) -> dp_accounting.DpEvent:
  """Returns the DpEvent for DP-SGD with fixed-size sampling.

  This mechanism samples a fixed-size batch at each iteration and applies a
  Gaussian mechanism to the aggregated gradients. It is equivalent to DP-SGD
  with fixed-size sampling.

  Note: The without-replacement event is compatible with the RDP accountant
  under the REPLACE_ONE neighboring relation. Sampled-with-replacement events
  may not be supported by all accountants.

  Args:
    noise_multiplier: The noise multiplier of the mechanism.
    iterations: The number of iterations to run the mechanism for.
    dataset_size: The number of examples in the dataset.
    batch_size: The fixed batch size per iteration.
    replace: Whether to sample with replacement.
    use_zcdp: Whether to use generic dp_event.ZCDpEvent instead of specific
      dp_event.GaussianDpEvent. For example, set this to True if using discrtete
      Gaussian noise instead of continuous Gaussian noise.

  Returns:
    A DpEvent object.
  """
  _validate_fixed_args(
      noise_multiplier,
      iterations,
      dataset_size,
      batch_size,
      replace,
  )
  gaussian = _gaussian_event(noise_multiplier, use_zcdp)
  if replace:
    sampled = dp_accounting.dp_event.SampledWithReplacementDpEvent(
        dataset_size,
        batch_size,
        gaussian,
    )
  else:
    sampled = dp_accounting.dp_event.SampledWithoutReplacementDpEvent(
        dataset_size,
        batch_size,
        gaussian,
    )
  return dp_accounting.SelfComposedDpEvent(sampled, iterations)




[docs]
def truncated_dpsgd_event(
    noise_multiplier: float,
    iterations: int,
    *,
    sampling_prob: float,
    num_examples: int,
    truncated_batch_size: int,
) -> dp_accounting.DpEvent:
  """Returns the DpEvent for truncated DP-SGD with the given training params.

  This mechanism is like DP-SGD, but batches larger than `truncated_batch_size`
  are truncated to size `truncated_batch_size`. See these references for more
  information about this mechanism:

  * [Scalable DP-SGD: Shuffling vs. Poisson
    Subsampling](https://arxiv.org/abs/2411.04205)
  * [Tighter Privacy Analysis for Truncated Poisson
    Sampling](https://arxiv.org/abs/2508.15089)

  Args:
    noise_multiplier: The noise multiplier of the mechanism.
    iterations: The number of iterations to run the mechanism for.
    sampling_prob: The Poisson sampling probability of the mechanism, i.e., the
      probability an example will be included in each batch before truncation.
    num_examples: The number of examples in the dataset.
    truncated_batch_size: The maximum batch size.

  Returns:
    A DpEvent object.
  """
  _validate_poisson_args(noise_multiplier, iterations, sampling_prob)
  sampled_gaussian = dp_accounting.TruncatedSubsampledGaussianDpEvent(
      dataset_size=num_examples,
      sampling_probability=sampling_prob,
      truncated_batch_size=truncated_batch_size,
      noise_multiplier=noise_multiplier,
  )
  return dp_accounting.SelfComposedDpEvent(sampled_gaussian, iterations)




[docs]
def amplified_bandmf_event(
    noise_multiplier: float,
    iterations: int,
    *,
    num_bands: int,
    sampling_prob: float,
) -> dp_accounting.DpEvent:
  """Returns the DpEvent for DP-BandMF with the given training parameters.

  The examples will be split up into `num_bands` groups, and then minibatches
  will be formed by sampling within each group. See this paper for more details:

  * [(Amplified) Banded Matrix Factorization: A unified approach to private
    training](https://arxiv.org/abs/2306.08153) for more details.

  Args:
    noise_multiplier: The noise multiplier to use, assuming the strategy matrix
      has maximum L2 column norm of one.
    iterations: The number of iterations to run the mechanism for.
    num_bands: The number of bands to use.
    sampling_prob: The Poisson sampling probability of the mechanism, i.e., the
      probability an example will be included in each batch. Note that because
      the examples are partitioned into `num_bands` groups, the expected batch
      size is actually `dataset_size * sampling_prob / num_bands` (i.e., a
      factor of `num_bands` smaller than DP-SGD).

  Returns:
    A DpEvent object.
  """
  _validate_poisson_args(noise_multiplier, iterations, sampling_prob)
  rounds = math.ceil(iterations / num_bands)
  return dpsgd_event(
      noise_multiplier=noise_multiplier,
      sampling_prob=sampling_prob,
      iterations=rounds,
  )




[docs]
def truncated_amplified_bandmf_event(
    noise_multiplier: float,
    iterations: int,
    *,
    num_bands: int,
    sampling_prob: float,
    largest_group_size: int,
    truncated_batch_size: int,
) -> dp_accounting.DpEvent:
  """Returns the DpEvent for truncated DP-BandMF with the given parameters.

  This mechanism is like BandMF, but batches larger than `truncated_batch_size`
  are truncated to size `truncated_batch_size`. This mechanism is compatible
  with the `zero-out` adjacency notion, and requires knowledge of the total
  number of examples.

  Args:
    noise_multiplier: The noise multiplier to use, assuming the strategy matrix
      has maximum L2 column norm of one.
    iterations: The number of iterations to run the mechanism for.
    num_bands: The number of bands to use.
    sampling_prob: The Poisson sampling probability of the mechanism, i.e., the
      probability an example will be included in each batch before truncation.
      Note that because the examples are partitioned into `num_bands` groups,
      the expected batch size (before truncation) is actually `dataset_size *
      sampling_prob / num_bands` (i.e., a factor of `num_bands` smaller than
      DP-SGD).
    largest_group_size: The number of examples in the largest group, usually
      math.ceil(num_examples / num_bands).
    truncated_batch_size: The maximum batch size.

  Returns:
    A DpEvent object.
  """
  _validate_poisson_args(noise_multiplier, iterations, sampling_prob)
  return truncated_dpsgd_event(
      noise_multiplier=noise_multiplier,
      sampling_prob=sampling_prob,
      iterations=math.ceil(iterations / num_bands),
      num_examples=largest_group_size,
      truncated_batch_size=truncated_batch_size,
  )