> ## Documentation Index
> Fetch the complete documentation index at: https://internal.nolano.ai/llms.txt
> Use this file to discover all available pages before exploring further.

# OptimizationConfig

> API reference for optimization configuration

## OptimizationConfig

<ParamField path="total_training_steps" type="int" required>
  Total number of training steps for the experiment
</ParamField>

<ParamField path="max_learning_rate" type="float" required>
  Maximum learning rate value
</ParamField>

<ParamField path="global_batch_size" type="int" required>
  Global batch size for training
</ParamField>

<ParamField path="learning_rate_schedule" type="str | callable" default="constant">
  Learning rate scheduling strategy:

  * String options: `"constant"`, `"linear"`, `"cosine"`, `"exponential"`
  * Custom function with signature: `(learning_rate, current_step, total_steps) → decayed_rate`

  Note: Warmup is applied after this schedule and must be disabled separately if not needed
</ParamField>

<ParamField path="warmup_steps" type="int" default="0">
  Number of learning rate warmup steps.
</ParamField>

<ParamField path="decay_steps" type="int" default="0">
  Number of learning rate decay steps. Must be set to 0 when using custom learning rate schedules.

  Constraint: `warmup_steps + decay_steps ≤ total_training_steps`
</ParamField>

<ParamField path="min_learning_rate" type="float | None" default="max_learning_rate / 10">
  Minimum learning rate value.
</ParamField>

<ParamField path="optimizer_type" type="str" default="Adam">
  Optimizer algorithm. Options: `"Adam"`, `"SGD"`, `"Lion"`
</ParamField>

<ParamField path="weight_decay" type="float" default="0.01">
  L2 regularization coefficient.
</ParamField>

<ParamField path="z_loss" type="float" default="0.0">
  Z-loss regularization coefficient. Set to 0.0 to disable.
</ParamField>

<ParamField path="load_balancing" type="float | None" default="None">
  Load balancing coefficient for Mixture of Experts (MoE) models. Only applicable for MoE architectures.
</ParamField>

<ParamField path="clip_grad" type="float" default="1.0">
  Gradient clipping threshold based on global L2 norm.
</ParamField>

## ExperimentConfig

<ParamField path="data_configs" type="DataConfig | List[DataConfig]" required>
  Data configuration(s) for potentially multi-objective modeling
</ParamField>

<ParamField path="optimization_config" type="OptimizationConfig" required>
  Optimization and training parameters
</ParamField>

<ParamField path="model_config" type="ModelConfig" required>
  Model architecture and initialization parameters
</ParamField>

<ParamField path="meta_config" type="MetaConfig" default="Uses MetaConfig defaults">
  Metadata and run-specific parameters
</ParamField>

## MetaConfig

<ParamField path="name" type="str" default="trial-run">
  Name identifier for this experimental run.
</ParamField>

<ParamField path="seed" type="int" default="42">
  Random seed for reproducible training.
</ParamField>

<ParamField path="save_path" type="str" default="current working directory / run_name">
  Directory path for saving model checkpoints.
</ParamField>

<ParamField path="model_save_frequency" type="int" default="-1">
  Frequency (in steps) for saving model checkpoints. Set to -1 to save only at the end of training.
</ParamField>

<ParamField path="max_checkpoints" type="int" default="-1">
  Maximum number of model checkpoints to retain. Set to -1 for no limit.
</ParamField>

## DataConfig

<ParamField path="data_paths" type="str | List[str]" required>
  Path(s) to preprocessed data files
</ParamField>

<ParamField path="features" type="str | List[str] | callable | List[callable] | None" default="None">
  Feature engineering functions for lag tokens (historical lag features) and exogenous variables (external variables). Can be string identifier(s) or custom function(s).
</ParamField>

<ParamField path="sampling_weight" type="float | None" default="Equal weight among all data configs">
  Relative sampling weight for this data source (normalized to sum to 1 across all data configs).
</ParamField>

<ParamField path="training_objective" type="str | callable" default="cross_entropy">
  Loss function specification:

  * `"cross_entropy"`: Chronos-style or text cross-entropy loss
  * `"mse"`: Mean Squared Error (TimesFM-style)
  * `"quantile"` or `"pinball"`: Quantile/Pinball loss (TiRex-style)
  * `"multi_task"`: Multi-task learning (TimesFM 2.0-style)
  * Custom callable loss function
</ParamField>

<ParamField path="validation_split" type="float" default="0.1">
  Portion of the dataset to use as validation data (0.0-1.0, where 1.0 means all data is validation).
</ParamField>

<Warning>
  At least one `DataConfig` must have `validation_split < 1.0` for training to proceed.
</Warning>
