config.yaml 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131
  1. # # Configuration for data loading, formatting, and fine-tuning
  2. # output_dir: "/tmp/finetune_pipeline/outputs/" # Directory to store output files
  3. # data:
  4. # data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path)
  5. # is_local: false # Whether the data is stored locally
  6. # # Maps custom column names to standard field names
  7. # column_mapping:
  8. # input: "query" # Field containing the input text
  9. # output: "response" # Field containing the output text
  10. # image: "image" # Field containing the image path (optional)
  11. # # Additional arguments to pass to the load_dataset function
  12. # # dataset_kwargs:
  13. # # split: "train" # Dataset split to load
  14. # # # Add any other dataset-specific arguments here
  15. # # Formatter configuration
  16. # formatter:
  17. # type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
  18. # # # Something like this in the torchtune config
  19. # # dataset:
  20. # # _component_: torchtune.datasets.CustomSFTDataset
  21. # # packed: False
  22. # # split: train
  23. # # seed: null
  24. # # shuffle: True
  25. # # Training configuration
  26. # finetuning:
  27. # strategy: "lora" # Training strategy ('fft' or 'lora')
  28. # num_epochs: 1 # Number of training epochs
  29. # batch_size: 1 # Batch size per device for training
  30. # torchtune_config: "llama3_2_vision/11B_lora" # TorchTune-specific configuration
  31. # num_processes_per_node: 8 # TorchTune-specific configuration
  32. # distributed: true # Whether to use distributed training
  33. # # vLLM Inference configuration
  34. # inference:
  35. # # Model configuration
  36. # model_path: "/home/ubuntu/yash-workspace/medgemma-4b-it" # Path to the model checkpoint
  37. # quantization: null # Quantization method (awq, gptq, squeezellm)
  38. # # Server configuration
  39. # port: 8000 # Port to run the server on
  40. # host: "0.0.0.0" # Host to run the server on
  41. # # Performance configuration
  42. # tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism
  43. # max_model_len: 32 # Maximum sequence length
  44. # max_num_seqs: 1 # Maximum number of sequences
  45. # gpu_memory_utilization: 0.9 # Fraction of GPU memory to use
  46. # enforce_eager: false # Enforce eager execution
  47. # eval_data: "your/eval/dataset/path" # Path to the evaluation dataset (optional)
  48. # # Additional vLLM parameters (optional)
  49. # # swap_space: 4 # Size of CPU swap space in GiB
  50. # # block_size: 16 # Size of blocks used in the KV cache
  51. # # disable_log_stats: true # Disable logging of stats
  52. # # disable_log_requests: false # Disable logging of requests
  53. # Configuration for data loading, formatting, and fine-tuning
  54. output_dir: "/home/yashkhare/workspace/finetuning-pipeline/" # Directory to store output files
  55. # Formatter configuration
  56. formatter:
  57. type: "torchtune" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
  58. data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path)
  59. is_local: false # Whether the data is stored locally
  60. # Maps custom column names to standard field names
  61. column_mapping:
  62. input: "query" # Field containing the input text
  63. output: "response" # Field containing the output text
  64. image: null # Field containing the image path (optional)
  65. # Additional arguments to pass to the load_dataset function
  66. dataset_kwargs:
  67. split: "validation" # Dataset split to load
  68. # Training configuration
  69. finetuning:
  70. model_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct" # Path to the model checkpoint
  71. tokenizer_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct/original/tokenizer.model" # Path to the tokenizer
  72. output_dir: ${output_dir}/model_outputs # Directory to store checkpoints
  73. log_dir: ${output_dir}/logs # Directory to store logs
  74. strategy: "lora" # Training strategy ('fft' or 'lora')
  75. num_epochs: 1 # Number of training epochs
  76. batch_size: 4 # Batch size per device for training
  77. torchtune_config: "llama3_1/8B_lora" # TorchTune-specific configuration
  78. num_processes_per_node: 8 # TorchTune-specific configuration
  79. distributed: true # Whether to use distributed training
  80. # vLLM Inference configuration
  81. inference:
  82. # Model configuration
  83. model_path: "/home/yashkhare/workspace/medgemma-4b-it" # Path to the model checkpoint
  84. quantization: null # Quantization method (awq, gptq, squeezellm)
  85. dtype: "auto" # Data type for model weights (half, float, bfloat16, auto)
  86. trust_remote_code: false # Trust remote code when loading the model
  87. # Server configuration
  88. port: 8000 # Port to run the server on
  89. host: "0.0.0.0" # Host to run the server on
  90. # Performance configuration
  91. tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism
  92. max_model_len: 512 # Maximum sequence length
  93. max_num_seqs: 1 # Maximum number of sequences
  94. gpu_memory_utilization: 0.95 # Fraction of GPU memory to use
  95. enforce_eager: false # Enforce eager execution
  96. inference_data: "dz-osamu/IU-Xray" # Path to the inference dataset (optional)
  97. # Additional vLLM parameters (optional)
  98. # swap_space: 4 # Size of CPU swap space in GiB
  99. # block_size: 16 # Size of blocks used in the KV cache
  100. # disable_log_stats: true # Disable logging of stats
  101. # disable_log_requests: false # Disable logging of requests