| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566 |
- # Configuration for data loading, formatting, and fine-tuning
- output_dir: "/tmp/finetuning-pipeline/llama3_2_vision/" # Directory to store output files
- data:
- data_path: "data/path" # Path to the dataset to load
- is_local: true # Whether the data is stored locally
- formatter_type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or
- system_prompt: "You are a helpful assisstant" # System prompt to use for the dataset
- column_mapping:
- input: "instruction" # Field containing the input text
- output: "output" # Field containing the output text
- image: "image" # Field containing the image path (optional)
- # Additional arguments to pass to the load_dataset function
- dataset_kwargs:
- split: "validation" # Dataset split to load
- shuffle: false # Whether to shuffle the dataset
- # Training configuration
- finetuning:
- #formatter_type: "torchtune" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
- model_path: "path/to/model" # Path to the model checkpoint
- tokenizer_path: "path/to/tokenizer" # Path to the tokenizer
- output_dir: /tmp/finetuning-pipeline/model_outputs # Directory to store checkpoints
- log_dir: /tmp/finetuning-pipeline/logs # Directory to store logs
- strategy: "lora" # Training strategy ('fft' or 'lora')
- num_epochs: 1 # Number of training epochs
- max_steps_per_epoch: null
- batch_size: 8 # Batch size per device for training
- torchtune_config: "llama3_2_vision/11B_lora" # TorchTune-specific configuration
- num_processes_per_node: 8 # TorchTune-specific configuration
- distributed: true # Whether to use distributed training
- # vLLM Inference configuration
- inference:
- # Model configuration
- model_path: "path/to/model/checkpoint" # Path to the model checkpoint
- quantization: null # Quantization method (awq, gptq, squeezellm)
- dtype: "auto" # Data type for model weights (half, float, bfloat16, auto)
- trust_remote_code: false # Trust remote code when loading the model
- # Server configuration
- port: 8000 # Port to run the server on
- host: "0.0.0.0" # Host to run the server on
- # Performance configuration
- tensor_parallel_size: 8 # Number of GPUs to use for tensor parallelism
- max_model_len: 8192 # Maximum sequence length
- max_num_seqs: 1 # Maximum number of sequences
- gpu_memory_utilization: 0.95 # Fraction of GPU memory to use
- enforce_eager: false # Enforce eager execution
- inference_data_kwargs:
- data_path: "inference/data/path" # Path to the inference dataset
- split: "validation" # Dataset split to load
- formatter_type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
- format_data: false # Whether to format the inference dataset
- max_samples: null # Maximum number of samples to load (null for all)
- is_local: true # Whether the data is stored locally
- # Additional vLLM parameters (optional)
- # swap_space: 4 # Size of CPU swap space in GiB
- # block_size: 16 # Size of blocks used in the KV cache
- # disable_log_stats: true # Disable logging of stats
- # disable_log_requests: false # Disable logging of requests
|