beamds.beam.fine_tune package#
Submodules#
beamds.beam.fine_tune.algorithm module#
beamds.beam.fine_tune.dataset module#
beamds.beam.fine_tune.hparams module#
- class beamds.beam.fine_tune.hparams.FTLLMConfig(*args, _store_init_path=None, _save_init_args=True, **kwargs)[source]#
Bases:
UniversalConfig
- defaults = {'batch_size': 2, 'epoch_length': 100, 'lr_dense': 1e-05, 'lr_sparse': 0.0001, 'model_dtype': 'bfloat16', 'project_name': 'fine_tune_llm', 'reduction': 'mean_batch', 'scale_epoch_by_batch_size': False, 'training_framework': 'accelerate'}#
- parameters = [BeamParam(name='model', type=<class 'str'>, default=None, help='Model to use for fine-tuning', tags=None), BeamParam(name='prompt_key', type=<class 'str'>, default='prompt', help='Key to use for the prompt', tags=None), BeamParam(name='completion_key', type=<class 'str'>, default=None, help='Key to use for the completion', tags=None), BeamParam(name='lora_alpha', type=<class 'float'>, default=16, help='Lora alpha parameter', tags=['tune']), BeamParam(name='lora_dropout', type=<class 'float'>, default=0.05, help='Lora dropout', tags=['tune']), BeamParam(name='lora_r', type=<class 'int'>, default=16, help='Lora r parameter', tags=['tune']), BeamParam(name='lora_fan_in_fan_out', type=<class 'bool'>, default=False, help='Set this to True if the layer to replace stores weight like (fan_in, fan_out)', tags=None), BeamParam(name='lora_bias', type=<class 'str'>, default='none', help='Bias type for Lora. Can be ‘none’, ‘all’ or ‘lora_only’. If ‘all’ or ‘lora_only’', tags=['tune']), BeamParam(name='load_in_8bit', type=<class 'bool'>, default=False, help='Load the model in 8bit mode', tags=None), BeamParam(name='modules_to_save', type=<class 'list'>, default=None, help='List of modules apart from LoRA layers to be set as trainable and saved in the final checkpoint', tags=None), BeamParam(name='layers_to_transform', type=<class 'list'>, default=None, help='The layer indexes to transform, if this argument is specified, it will apply the LoRA transformations on the layer indexes that are specified in this list.', tags=None), BeamParam(name='target_modules', type=<class 'list'>, default=None, help='The names of the modules to apply Lora to', tags=None), BeamParam(name='hf_cache_dir', type=<class 'str'>, default=None, help='Directory for Huggingface to cache to and load from', tags=None), BeamParam(name='hf_data_dir', type=<class 'str'>, default=None, help='Directory for the dataset to load from', tags=None), BeamParam(name='return_overflowing_tokens', type=<class 'bool'>, default=False, help='Whether or not to split overflowing tokens into their own batch', tags=None), BeamParam(name='context_length', type=<class 'int'>, default=128, help='The maximal context length to train the model with', tags=['tune']), BeamParam(name='dataset', type=<class 'str'>, default=None, help='The dataset which is used for fine-tuning', tags=None)]#