config.yaml 3.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
  1. # Configuration for data loading, formatting, and fine-tuning
  2. output_dir: "/tmp/finetuning-pipeline/llama3_2_vision/" # Directory to store output files
  3. data:
  4. data_path: "data/path" # Path to the dataset to load
  5. is_local: true # Whether the data is stored locally
  6. formatter_type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or
  7. system_prompt: "You are a helpful assisstant" # System prompt to use for the dataset
  8. column_mapping:
  9. input: "instruction" # Field containing the input text
  10. output: "output" # Field containing the output text
  11. image: "image" # Field containing the image path (optional)
  12. # Additional arguments to pass to the load_dataset function
  13. dataset_kwargs:
  14. split: "validation" # Dataset split to load
  15. shuffle: false # Whether to shuffle the dataset
  16. # Training configuration
  17. finetuning:
  18. #formatter_type: "torchtune" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
  19. model_path: "path/to/model" # Path to the model checkpoint
  20. tokenizer_path: "path/to/tokenizer" # Path to the tokenizer
  21. output_dir: /tmp/finetuning-pipeline/model_outputs # Directory to store checkpoints
  22. log_dir: /tmp/finetuning-pipeline/logs # Directory to store logs
  23. strategy: "lora" # Training strategy ('fft' or 'lora')
  24. num_epochs: 1 # Number of training epochs
  25. max_steps_per_epoch: null
  26. batch_size: 8 # Batch size per device for training
  27. torchtune_config: "llama3_2_vision/11B_lora" # TorchTune-specific configuration
  28. num_processes_per_node: 8 # TorchTune-specific configuration
  29. distributed: true # Whether to use distributed training
  30. # vLLM Inference configuration
  31. inference:
  32. # Model configuration
  33. model_path: "path/to/model/checkpoint" # Path to the model checkpoint
  34. quantization: null # Quantization method (awq, gptq, squeezellm)
  35. dtype: "auto" # Data type for model weights (half, float, bfloat16, auto)
  36. trust_remote_code: false # Trust remote code when loading the model
  37. # Server configuration
  38. port: 8000 # Port to run the server on
  39. host: "0.0.0.0" # Host to run the server on
  40. # Performance configuration
  41. tensor_parallel_size: 8 # Number of GPUs to use for tensor parallelism
  42. max_model_len: 8192 # Maximum sequence length
  43. max_num_seqs: 1 # Maximum number of sequences
  44. gpu_memory_utilization: 0.95 # Fraction of GPU memory to use
  45. enforce_eager: false # Enforce eager execution
  46. inference_data_kwargs:
  47. data_path: "inference/data/path" # Path to the inference dataset
  48. split: "validation" # Dataset split to load
  49. formatter_type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
  50. format_data: false # Whether to format the inference dataset
  51. max_samples: null # Maximum number of samples to load (null for all)
  52. is_local: true # Whether the data is stored locally
  53. # Additional vLLM parameters (optional)
  54. # swap_space: 4 # Size of CPU swap space in GiB
  55. # block_size: 16 # Size of blocks used in the KV cache
  56. # disable_log_stats: true # Disable logging of stats
  57. # disable_log_requests: false # Disable logging of requests