beamds.beam.config package#
Submodules#
beamds.beam.config.configurations module#
- class beamds.beam.config.configurations.AccelerateConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
DeepspeedConfig
,FederatedTrainingConfig
- parameters = [BeamParam(name='device_placement', type=<class 'bool'>, default=False, help='Whether or not the accelerator should put objects on device', tags=None), BeamParam(name='split_batches', type=<class 'bool'>, default=False, help='Whether or not the accelerator should split the batches yielded by the dataloaders across the devices', tags=None)]#
- class beamds.beam.config.configurations.BeamProjectConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='project_name', type=<class 'str'>, default='beam', help='The name of the beam project', tags=None), BeamParam(name='algorithm', type=<class 'str'>, default='Algorithm', help='algorithm name', tags=None), BeamParam(name='identifier', type=<class 'str'>, default='debug', help='The name of the model to use', tags=None), BeamParam(name='logs_path', type=<class 'str'>, default='/home/runner/beam_data/projects/experiment', help='Root directory for Logs and results', tags=None), BeamParam(name='data_path', type=<class 'str'>, default='/home/runner/beam_data/projects/data', help='Where the dataset is located', tags=None), BeamParam(name='config_file', type=<class 'str'>, default=None, help='The beam config file to use with secret keys', tags=None), BeamParam(name='verbosity', type=<class 'str'>, default='info', help='The verbosity level [debug|info|warning|error]', tags=None)]#
- class beamds.beam.config.configurations.CacheConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='cache_depth', type=<class 'int'>, default=None, help='The depth of the cache', tags=None), BeamParam(name='cache_path', type=<class 'str'>, default=None, help='The path to the cache (if None, the cache is stored in memory)', tags=None), BeamParam(name='cache_exception_keys', type=<class 'list'>, default=None, help='The keys to exclude from the cache', tags=None), BeamParam(name='cache_store_suffix', type=<class 'str'>, default=None, help='The suffix to add to the stored file (if None, the cache is stored as BeamData)', tags=None), BeamParam(name='silent_cache', type=<class 'bool'>, default=False, help='Whether to log cache operations', tags=None)]#
- class beamds.beam.config.configurations.DDPConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='find_unused_parameters', type=<class 'bool'>, default=False, help='For DDP applications: allows running backward on a subgraph of the model. introduces extra overheads, so applications should only set find_unused_parameters to True when necessary', tags=None), BeamParam(name='broadcast_buffers', type=<class 'bool'>, default=True, help='For DDP applications: Flag that enables syncing (broadcasting) buffers of the module at beginning of the forward function.', tags=None), BeamParam(name='nvlink', type=<class 'bool'>, default=False, help='For DDP applications: whether nvlink is available for faster communication', tags=None)]#
- class beamds.beam.config.configurations.DatasetConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='split_dataset_seed', type=<class 'int'>, default=5782, help='Seed dataset split (set to zero to get random split)', tags=None), BeamParam(name='test_size', type=<class 'float'>, default=0.2, help='Test set percentage', tags=None), BeamParam(name='validation_size', type=<class 'float'>, default=0.2, help='Validation set percentage', tags=None), BeamParam(name='stratify_dataset', type=<class 'bool'>, default=False, help='Stratify the dataset split by the labels', tags=None), BeamParam(name='dataset_time_index', type=<class 'str'>, default=None, help='The time index to use for time-based splits', tags=None), BeamParam(name='test_split_method', type=<class 'str'>, default='uniform', help='The method to split the test set [uniform|time_based]', tags=None)]#
- class beamds.beam.config.configurations.DeviceConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='device', type=<class 'str'>, default='0', help='GPU Number or cpu/cuda string', tags=None), BeamParam(name='device_list', type=<class 'list'>, default=None, help='Set GPU priority for parallel execution e.g. --device-list 2 1 3 will use GPUs 2 and 1 when passing --n-gpus=2 and will use GPUs 2 1 3 when passing --n-gpus=3. If None, will use an ascending order starting from the GPU passed in the --device parameter. e.g. when --device=1 will use GPUs 1,2,3,4 when --n-gpus=4', tags=None), BeamParam(name='n_gpus', type=<class 'int'>, default=1, help='Number of parallel gpu workers. Set <=1 for single process', tags=None)]#
- class beamds.beam.config.configurations.ExperimentConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamProjectConfig
,KeysConfig
,CacheConfig
Arguments
global parameters
These parameters are responsible for which experiment to load or to generate: the name of the experiment is <alg>_<identifier>_exp_<num>_<time> The possible configurations: reload = False, override = True: always overrides last experiment (default configuration) reload = False, override = False: always append experiment to the list (increment experiment num) reload = True, resume = -1: resume to the last experiment reload = True, resume = <n>: resume to the <n> experiment
- parameters = [BeamParam(name='llm', type=<class 'str'>, default=None, help='URI of a Large Language Model to be used in the experiment.', tags=None), BeamParam(name='reload', type=<class 'bool'>, default=False, help='Load saved model', tags=None), BeamParam(name='resume', type=<class 'int'>, default=-1, help='Resume experiment number, set -1 for last experiment: active when reload=True', tags=None), BeamParam(name='override', type=<class 'bool'>, default=False, help='Override last experiment: active when reload=False', tags=None), BeamParam(name='reload_checkpoint', type=<class 'str'>, default='best', help='Which checkpoint to reload [best|last|<epoch>]', tags=None), BeamParam(name='cpu_workers', type=<class 'int'>, default=0, help='How many CPUs will be used for the data loading', tags=None), BeamParam(name='data_fetch_timeout', type=<class 'float'>, default=0.0, help='Timeout for the dataloader fetching. set to 0 for no timeout.', tags=None), BeamParam(name='tensorboard', type=<class 'bool'>, default=True, help='Log results to tensorboard', tags=None), BeamParam(name='mlflow', type=<class 'bool'>, default=False, help='Log results to MLFLOW serve', tags=None), BeamParam(name='lognet', type=<class 'bool'>, default=True, help='Log networks parameters', tags=None), BeamParam(name='deterministic', type=<class 'bool'>, default=False, help='Use deterministic pytorch optimization for reproducability when enabling non-deterministic behavior, it sets torch.backends.cudnn.benchmark = True which accelerates the computation', tags=None), BeamParam(name='scalene', type=<class 'bool'>, default=False, help='Profile the experiment with the Scalene python profiler', tags=None), BeamParam(name='safetensors', type=<class 'bool'>, default=False, help='Save tensors in safetensors format instead of native torch', tags=None), BeamParam(name='store_initial_weights', type=<class 'bool'>, default=False, help="Store the network's initial weights", tags=None), BeamParam(name='store_init_args', type=<class 'bool'>, default=True, help='Store the algorithm init args/kwargs for better reloading', tags=None), BeamParam(name='copy_code', type=<class 'bool'>, default=True, help='Copy the code directory into the experiment directory', tags=None), BeamParam(name='restart_epochs_count', type=<class 'bool'>, default=True, help='When reloading an algorithm, restart counting epochs from zero (with respect to schedulers and swa training)', tags='tune'), BeamParam(name='seed', type=<class 'int'>, default=0, help='Seed for reproducability (zero is saved for random seed)', tags=None), BeamParam(name='train_timeout', type=<class 'int'>, default=None, help='Timeout for the training in seconds. Set to None for no timeout', tags='tune'), BeamParam(name='log_experiment', type=<class 'bool'>, default=True, help='Log experiment to the log directory', tags=None), BeamParam(name='print_results', type=<class 'bool'>, default=True, help='Print results after each epoch to screen', tags=None), BeamParam(name='visualize_weights', type=<class 'bool'>, default=False, help='Visualize network weights on tensorboard', tags=None), BeamParam(name='enable_tqdm', type=<class 'bool'>, default=True, help='Print tqdm progress bar when training', tags=None), BeamParam(name='visualize_results_log_base', type=<class 'int'>, default=10, help='log base for the logarithmic based results visualization', tags=None), BeamParam(name='tqdm_threshold', type=<class 'float'>, default=10.0, help='Minimal expected epoch time to print tqdm bar set 0 to ignore and determine tqdm bar with tqdm-enable flag', tags=None), BeamParam(name='tqdm_stats', type=<class 'float'>, default=1.0, help='Take this period to calculate the expected epoch time', tags=None), BeamParam(name='visualize_results', type=<class 'str'>, default='yes', help='when to visualize results on tensorboard [yes|no|logscale|best|never|final]', tags=None), BeamParam(name='store_results', type=<class 'str'>, default='logscale', help='when to store results to pickle files [yes|no|logscale|best|never|final]', tags=None), BeamParam(name='store_networks', type=<class 'str'>, default='best/last', help='when to store network weights to the log directory [yes|no|logscale|best|all_bests|never|final|last|best/last]', tags=None), BeamParam(name='comet', type=<class 'bool'>, default=False, help='Whether to use comet.ml for logging', tags=None), BeamParam(name='git_directory', type=<class 'str'>, default=None, help='The git directory to use for comet.ml logging', tags=None), BeamParam(name='comet_workspace', type=<class 'str'>, default=None, help='The comet.ml workspace to use for logging', tags=None), BeamParam(name='mlflow_url', type=<class 'str'>, default=None, help='The url of the mlflow serve to use for logging. If None, mlflow will log to $MLFLOW_TRACKING_URI', tags=None), BeamParam(name='training_framework', type=<class 'str'>, default='torch', help='Chose between [torch|amp|accelerate|deepspeed]', tags=None), BeamParam(name='compile_train', type=<class 'bool'>, default=False, help='Apply torch.compile to optimize the inner_train function to speed up training. To use this feature, you must override and use the alg.inner_train function in your alg.train_iteration function', tags=None), BeamParam(name='compile_network', type=<class 'bool'>, default=False, help='Apply torch.compile to optimize the network forward function to speed up training.', tags=None), BeamParam(name='objective', type=<class 'str'>, default='objective', help='A single objective to apply hyperparameter optimization or ReduceLROnPlateau scheduling. By default we consider maximization of the objective (e.g. accuracy) You can override this behavior by overriding the Algorithm.report method.', tags=None), BeamParam(name='optimization_mode', type=<class 'str'>, default=None, help='Set [min/max] to minimize/maximize the objective. By default objectives that contain the words "loss/error/mse" a are minimized and other objectives are maximized. You can override this behavior by setting this flag.', tags=None), BeamParam(name='objective_to_report', type=<class 'str'>, default='best', help='Which objective to report in HPO run [best|last]', tags=None)]#
- class beamds.beam.config.configurations.FederatedTrainingConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
DeviceConfig
- parameters = [BeamParam(name='mp_ip', type=<class 'str'>, default='localhost', help='IP to be used for multiprocessing', tags=None), BeamParam(name='mp_port', type=<class 'str'>, default=None, help='Port to be used for multiprocessing', tags=None), BeamParam(name='n_cpus_per_worker', type=<class 'int'>, default=6, help='Number of cpus to use in each worker', tags=None), BeamParam(name='n_gpus_per_worker', type=<class 'int'>, default=1, help='Number of gpus to use in each worker', tags=None), BeamParam(name='distributed_backend', type=<class 'str'>, default='nccl', help='The distributed backend to use. Supported backends: [nccl, gloo, mpi]', tags=None), BeamParam(name='mp_context', type=<class 'str'>, default='spawn', help='The multiprocessing context to use', tags=None), BeamParam(name='kv_store', type=<class 'str'>, default='tcp', help='The key-value store to use [tcp|file|hash]', tags=None), BeamParam(name='kv_store_path', type=<class 'str'>, default=None, help='The path to the key-value store file', tags=None), BeamParam(name='kv_store_timeout', type=<class 'float'>, default=300.0, help='The timeout for the key-value store', tags=None), BeamParam(name='kv_store_port', type=<class 'str'>, default=None, help='The port to use for the key-value store', tags=None), BeamParam(name='federated_runner', type=<class 'bool'>, default=False, help='Use the new federated runner for distributed training', tags=None)]#
- class beamds.beam.config.configurations.KeysConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='COMET_API_KEY', type=<class 'str'>, default=None, help='The comet.ml api key to use for logging', tags=None), BeamParam(name='AWS_ACCESS_KEY_ID', type=<class 'str'>, default=None, help='The aws access key to use for S3 connections', tags=None), BeamParam(name='AWS_SECRET_ACCESS_KEY', type=<class 'str'>, default=None, help='The aws private key to use for S3 connections', tags=None), BeamParam(name='SSH_PRIVATE_KEY', type=<class 'str'>, default=None, help='The ssh secret key to use for ssh connections', tags=None), BeamParam(name='OPENAI_API_KEY', type=<class 'str'>, default=None, help='The openai api key to use for openai connections', tags=None), BeamParam(name='BEAM_USERNAME', type=<class 'str'>, default=None, help='The beam username to use for connections like smb/ftp/ssh etc', tags=None), BeamParam(name='BEAM_PASSWORD', type=<class 'str'>, default=None, help='The beam password to use for connections like smb/ftp/ssh etc', tags=None), BeamParam(name='K8S_API_KEY', type=<class 'str'>, default=None, help='The k8s api key to use for k8s connections', tags=None)]#
- class beamds.beam.config.configurations.NNCompilerConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
For torch compile see: https://pytorch.org/docs/stable/generated/torch.compile.html
For torch jit see: https://pytorch.org/docs/stable/generated/torch.jit.trace.html
- parameters = [BeamParam(name='compile_fullgraph', type=<class 'bool'>, default=False, help='Whether it is ok to break model into several subgraphs', tags=None), BeamParam(name='compile_dynamic', type=<class 'bool'>, default=None, help='Use dynamic shape tracing. When this is True, we will up-front attempt to generate a kernel that is as dynamic as possible to avoid recompilations when sizes change.', tags=None), BeamParam(name='compile_backend', type=<class 'str'>, default='inductor', help='The backend to use for compilation [inductor|torch]', tags=None), BeamParam(name='compile_mode', type=<class 'str'>, default='default', help='[default|reduce-overhead|max-autotune|max-autotune-no-cudagraphs], see https://pytorch.org/docs/stable/generated/torch.compile.html', tags=None), BeamParam(name='compile_options', type=<class 'dict'>, default=None, help='Additional options for the compiler', tags=None), BeamParam(name='jit_check_trace', type=<class 'bool'>, default=True, help='Check if the same inputs run through traced code produce the same outputs', tags=None), BeamParam(name='jit_check_inputs', type=<class 'list'>, default=None, help='A list of tuples of input arguments that should be used to check the trace against what is expected.', tags=None), BeamParam(name='jit_check_tolerance', type=<class 'float'>, default=1e-05, help='Floating-point comparison tolerance to use in the checker procedure.', tags=None), BeamParam(name='jit_strict', type=<class 'bool'>, default=True, help='Only turn this off when you want the tracer to record your mutable container types', tags=None)]#
- class beamds.beam.config.configurations.NNExperimentConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
ExperimentConfig
,NNTrainingConfig
,DDPConfig
- class beamds.beam.config.configurations.NNModelConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='init', type=<class 'str'>, default='ortho', help='Initialization method [ortho|N02|xavier|]', tags='tune')]#
- class beamds.beam.config.configurations.NNTrainingConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
NNModelConfig
,SchedulerConfig
,AccelerateConfig
,SWAConfig
,OptimizerConfig
,DatasetConfig
,SamplerConfig
,NNCompilerConfig
- parameters = [BeamParam(name='scale_epoch_by_batch_size', type=<class 'bool'>, default=True, help='When True: epoch length corresponds to the number of examples sampled from the dataset in each epoch When False: epoch length corresponds to the number of forward passes in each epoch', tags=None), BeamParam(name='model_dtype', type=<class 'str'>, default='float32', help='dtype, both for automatic mixed precision and accelerate. Supported dtypes: [float32, float16, bfloat16]', tags=['tune', 'model']), BeamParam(name='total_steps', type=<class 'int'>, default=1000000, help='Total number of environment steps', tags='tune'), BeamParam(name='epoch_length', type=<class 'int'>, default=None, help='Length of train+eval epochs (if None - it is taken from epoch_length_train/epoch_length_eval arguments)', tags='tune'), BeamParam(name='train_on_tail', type=<class 'bool'>, default=False, help='Should the last (incomplete) batch be included in the training epoch (useful for small datasets but not time efficient)', tags=None), BeamParam(name='epoch_length_train', type=<class 'int'>, default=None, help='Length of each epoch (if None - it is the dataset[train] size)', tags='tune'), BeamParam(name='epoch_length_eval', type=<class 'int'>, default=None, help='Length of each evaluation epoch (if None - it is the dataset[validation] size)', tags='tune'), BeamParam(name='n_epochs', type=<class 'int'>, default=None, help='Number of epochs, if None, it uses the total steps to determine the number of iterations', tags='tune'), BeamParam(name='batch_size', type=<class 'int'>, default=256, help='Batch Size', tags='tune'), BeamParam(name='batch_size_train', type=<class 'int'>, default=None, help='Batch Size for training iterations', tags='tune'), BeamParam(name='batch_size_eval', type=<class 'int'>, default=None, help='Batch Size for testing/evaluation iterations', tags='tune'), BeamParam(name='reduction', type=<class 'str'>, default='sum', help='whether to sum loss elements or average them [sum|mean|mean_batch|sqrt|mean_sqrt]', tags='tune'), BeamParam(name=['lr_dense', 'lr'], type=<class 'float'>, default=0.001, help='learning rate for dense optimizers', tags='tune'), BeamParam(name='lr_sparse', type=<class 'float'>, default=0.01, help='learning rate for sparse optimizers', tags='tune'), BeamParam(name='stop_at', type=<class 'float'>, default=None, help='Early stopping when objective >= stop_at', tags='tune'), BeamParam(name='early_stopping_patience', type=<class 'int'>, default=None, help='Early stopping patience in epochs, stop when current_epoch - best_epoch >= early_stopping_patience', tags='tune')]#
- class beamds.beam.config.configurations.OptimizerConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='weight_decay', type=<class 'float'>, default=0.0, help='L2 regularization coefficient for dense optimizers', tags='tune'), BeamParam(name='eps', type=<class 'float'>, default=0.0001, help="Adam's epsilon parameter", tags='tune'), BeamParam(name='momentum', type=<class 'float'>, default=0.9, help="The momentum and Adam's β1 parameter", tags='tune'), BeamParam(name='beta2', type=<class 'float'>, default=0.999, help="Adam's β2 parameter", tags='tune'), BeamParam(name='clip_gradient', type=<class 'float'>, default=0.0, help='Clip Gradient L2 norm', tags='tune'), BeamParam(name='accumulate', type=<class 'int'>, default=1, help='Accumulate gradients for this number of backward iterations', tags='tune')]#
- class beamds.beam.config.configurations.SWAConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='swa', type=<class 'float'>, default=None, help='SWA period. If float it is a fraction of the total number of epochs. If integer, it is the number of SWA epochs.', tags=None), BeamParam(name='swa_lr', type=<class 'float'>, default=0.05, help='The SWA learning rate', tags='tune'), BeamParam(name='swa_anneal_epochs', type=<class 'int'>, default=10, help='The SWA lr annealing period', tags='tune')]#
- class beamds.beam.config.configurations.SamplerConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='oversampling_factor', type=<class 'float'>, default=0.0, help='A factor [0, 1] that controls how much to oversample where 0-no oversampling and 1-full oversampling. Set 0 for no oversampling', tags='tune'), BeamParam(name='expansion_size', type=<class 'int'>, default=10000000, help='largest expanded index size for oversampling', tags=None), BeamParam(name='dynamic_sampler', type=<class 'bool'>, default=False, help='Whether to use a dynamic sampler (mainly for rl/optimization)', tags=None), BeamParam(name='buffer_size', type=<class 'int'>, default=None, help='Maximal Dataset size in dynamic problems', tags='tune'), BeamParam(name='probs_normalization', type=<class 'str'>, default='sum', help="Sampler's probabilities normalization method [sum/softmax]", tags=None), BeamParam(name='sample_size', type=<class 'int'>, default=100000, help='Periodic sample size for the dynamic sampler', tags=None)]#
- class beamds.beam.config.configurations.SchedulerConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='scheduler_steps', type=<class 'str'>, default='epoch', help='When to apply schedulers steps [epoch|iteration|none]: each epoch or each iteration. Use none to avoid scheduler steps or to use your own custom steps policy', tags=None), BeamParam(name='scheduler', type=<class 'str'>, default=None, help='Build BeamScheduler. Supported schedulers: [one_cycle,reduce_on_plateau,cosine_annealing]', tags='tune'), BeamParam(name='cycle_base_momentum', type=<class 'float'>, default=0.85, help='The base momentum in one-cycle optimizer', tags='tune'), BeamParam(name='cawr_t0', type=<class 'int'>, default=10, help=' Number of iterations for the first restart in CosineAnnealingWarmRestarts scheduler', tags='tune'), BeamParam(name='cawr_tmult', type=<class 'int'>, default=1, help=' A factor increases Ti after a restart in CosineAnnealingWarmRestarts scheduler', tags='tune'), BeamParam(name='scheduler_factor', type=<class 'float'>, default=0.31622776601683794, help='The factor to reduce lr in schedulers such as ReduceOnPlateau', tags='tune'), BeamParam(name='scheduler_patience', type=<class 'int'>, default=10, help='Patience for the ReduceOnPlateau scheduler', tags='tune'), BeamParam(name='scheduler_warmup', type=<class 'float'>, default=5, help="Scheduler's warmup factor (in epochs)", tags='tune'), BeamParam(name='cycle_max_momentum', type=<class 'float'>, default=0.95, help='The maximum momentum in one-cycle scheduler', tags='tune')]#
- class beamds.beam.config.configurations.TransformerConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
CacheConfig
- parameters = [BeamParam(name='mp_method', type=<class 'str'>, default='joblib', help='The multiprocessing method to use', tags=None), BeamParam(name='n_chunks', type=<class 'int'>, default=None, help='The number of chunks to split the dataset', tags=None), BeamParam(name='name', type=<class 'str'>, default=None, help='The name of the dataset', tags=None), BeamParam(name='store_path', type=<class 'str'>, default=None, help='The path to store the results', tags=None), BeamParam(name='partition', type=<class 'str'>, default=None, help='The partition to use for splitting the dataset', tags=None), BeamParam(name='chunksize', type=<class 'int'>, default=None, help='The chunksize to use for splitting the dataset', tags=None), BeamParam(name='squeeze', type=<class 'bool'>, default=False, help='Whether to squeeze the chunks (e.g. 1-dim dataframe to series)', tags=None), BeamParam(name='reduce', type=<class 'bool'>, default=True, help='Whether to reduce and collate the results', tags=None), BeamParam(name='reduce_dim', type=<class 'int'>, default=0, help='The dimension to reduce the results', tags=None), BeamParam(name='transform_strategy', type=<class 'str'>, default=None, help='The transform strategy to use can be [CC|CS|SC|SS]', tags=None), BeamParam(name='store_chunk', type=<class 'bool'>, default=None, help='Whether to store the chunked results [None stores chunks if n_chunks/chunksize is not None and store_path is not None]', tags=None), BeamParam(name='split_by', type=<class 'str'>, default='keys', help='The split strategy to use can be [keys|index|columns]', tags=None), BeamParam(name='store_suffix', type=<class 'str'>, default=None, help='The suffix to add to the stored file', tags=None), BeamParam(name='override', type=<class 'bool'>, default=False, help='Whether to override the stored file if it exists', tags=None), BeamParam(name='use-dill', type=<class 'bool'>, default=False, help='Whether to use dill for serialization', tags=None), BeamParam(name='return-results', type=<class 'bool'>, default=None, help='Whether to return the results if None, it is set to True if store_path is None', tags=None), BeamParam(name='n_workers', type=<class 'int'>, default=None, help='The number of workers to use for the transformation. If None defaults to 1 if chunksize is not None and n_chunks otherwise,if <1 defaults to half of the number of cpus', tags=None), BeamParam(name='use-cache', type=<class 'bool'>, default=False, help='Use the store_path as cache and do not apply transformation if cache exists', tags=None), BeamParam(name='retries', type=<class 'int'>, default=1, help='The number of retries to apply for each chunk', tags=None), BeamParam(name='retries_delay', type=<class 'float'>, default=1.0, help='The delay between retries', tags=None), BeamParam(name='strict-transform', type=<class 'bool'>, default=False, help='Whether to raise an exception if the transformation fails', tags=None)]#
- class beamds.beam.config.configurations.UniversalConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
NNExperimentConfig
,TransformerConfig
beamds.beam.config.core_config module#
- class beamds.beam.config.core_config.BeamConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
Namespace
- property beam_class_name#
- defaults = {}#
- property help#
- property is_initialized#
- property namespace#
- parameters = [BeamParam(name='debug', type=<class 'bool'>, default=False, help='Whether to run in debug mode (logger is set to DEBUG level)', tags=None), BeamParam(name='colors', type=<class 'bool'>, default=True, help='Whether to use colors in the logger output', tags=None), BeamParam(name='beam-logs-path', type=<class 'str'>, default='/home/runner/beam_data/logs', help='Where to store the beam-logger output', tags=None)]#
- property tags#
- class beamds.beam.config.core_config.BeamParam(name: Union[str, List[str]], type: type, default: <built-in function any>, help: Optional[str] = None, tags: Union[List[str], str, NoneType] = None)[source]#
Bases:
object
- default: any#
- help: str | None = None#
- name: str | List[str]#
- tags: List[str] | str | None = None#
- type: type#
beamds.beam.config.deepspeed module#
- class beamds.beam.config.deepspeed.DeepspeedConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
BeamConfig
- parameters = [BeamParam(name='deepspeed_config', type=<class 'str'>, default=None, help='Deepspeed configuration JSON file.', tags=None), BeamParam(name='deepspeed_optimizer', type=<class 'str'>, default='AdamW', help='Optimizer type (currently used for deepspeed configuration only) Supported optimizers: [Adam, AdamW, Lamb, OneBitAdam, OneBitLamb]', tags=None), BeamParam(name='zero_stage', type=<class 'int'>, default=2, help='The ZeRO training stage to use.', tags=None), BeamParam(name='stage3_gather_16bit_weights_on_model_save', type=<class 'bool'>, default=False, help='Whether to gather 16-bit weights on model save in ZeRO stage 3', tags=None), BeamParam(name='offload_param_device', type=<class 'str'>, default=None, help='Whether to offload parameters from GPU in ZeRO stage 3: [cpu, nvme, none]', tags=None), BeamParam(name='offload_param_pin_memory', type=<class 'bool'>, default=True, help='Whether to pin memory for offloaded parameters', tags=None), BeamParam(name='offload_param_nvme_path', type=<class 'str'>, default='/home/runner/beam_data/projects/deepspeed', help='Path to NVMe device for offloaded parameters', tags=None), BeamParam(name='offload_optimizer_device', type=<class 'str'>, default=None, help='Whether to offload optimizer states from GPU in ZeRO stages 1/2/3: [cpu, nvme, none]', tags=None), BeamParam(name='offload_optimizer_pin_memory', type=<class 'bool'>, default=True, help='Whether to pin memory for offloaded optimizer states', tags=None), BeamParam(name='autotuning', type=<class 'bool'>, default=False, help='Whether to use deepspeed autotuning feature.', tags=None), BeamParam(name='partition_activations', type=<class 'bool'>, default=False, help='Enables partition activation when used with model parallelism', tags=None), BeamParam(name='cpu_checkpointing', type=<class 'bool'>, default=False, help='Offloads partitioned activations to CPU if partition_activations is enabled', tags=None), BeamParam(name='contiguous_memory_optimization', type=<class 'bool'>, default=False, help='Copies partitioned activations so that they are contiguous in memory', tags=None), BeamParam(name='number_checkpoints', type=<class 'int'>, default=None, help='Total number of activation checkpoints used to allocate memory buffer for contiguous_memory_optimization', tags=None), BeamParam(name='synchronize_checkpoint_boundary', type=<class 'bool'>, default=False, help='Inserts get_accelerator().synchronize() at each checkpoint boundary', tags=None), BeamParam(name='profile', type=<class 'bool'>, default=False, help='Logs the forward and backward time for each checkpoint function', tags=None), BeamParam(name='grad_accum_dtype', type=<class 'str'>, default=None, help='The data type for gradient accumulation.Supported types: [float32, float16, bfloat16]', tags=None)]#
beamds.beam.config.utils module#
- beamds.beam.config.utils.args_that_were_provided_explicitly(parser, args: dict = None, sys_argv=True)[source]#
- beamds.beam.config.utils.boolean_feature(parser, feature, default=False, help='', metavar=None)[source]#