Source code for orbax.checkpoint.options

# Copyright 2026 The Orbax Authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Configuration options for APIs like CheckpointManager and Checkpointer."""

import dataclasses
from typing import Callable, Optional, Set

from orbax.checkpoint._src.multihost import multihost



[docs] @dataclasses.dataclass class AsyncOptions: """Options used to configure async behavior. See :py:class:`.AsyncCheckpointer` for details. """ timeout_secs: int = ( 1200 # 20 minutes. Same as default in `AsyncCheckpointer`. ) barrier_sync_fn: Optional[multihost.BarrierSyncFn] = None post_finalization_callback: Optional[Callable[[], None]] = None create_directories_asynchronously: bool = True
[docs] @dataclasses.dataclass class MultiprocessingOptions: """Options used to configure multiprocessing behavior. primary_host: the host id of the primary host. Default to 0. If it's set to None, then all hosts will be considered as primary. It's useful in the case that all hosts are only working with local storage. active_processes: A set of process indices (corresponding to `multihost.process_index()`) over which `CheckpointManager` is expected to be called. This makes it possible to have a `CheckpointManager` instance that runs over a subset of processes, rather than all processes as it is normally expected to do. If specified, `primary_host` must belong to `active_processes`. barrier_sync_key_prefix: A string to be prepended to the barrier sync key used to synchronize processes. This is useful to avoid collisions with other barrier syncs if another CheckpointManager is being used concurrently. """ primary_host: Optional[int] = 0 active_processes: Optional[Set[int]] = None barrier_sync_key_prefix: Optional[str] = None
[docs] @dataclasses.dataclass(frozen=True) class FileOptions: """Options used to configure checkpoint directories and files. Attributes: path_permission_mode: Path permission mode for step directories, user metadata files. e.g. 0o750. Please check https://github.com/google/etils/blob/main/etils/epath/backend.py if your """ path_permission_mode: int | None = None
@dataclasses.dataclass class MemoryLimitOptions: """Options for configuring memory limits for save. Can help to reduce the possibility of OOM's when large checkpoints are saved. Attributes: max_transfer_concurrent_gb: The max memory limit in GB allowed for. Required if `save_device_host_concurrent_gb` is set to `"auto"`. """ max_transfer_concurrent_gb: int | None = None