# # Configuration for data loading, formatting, and fine-tuning # output_dir: "/tmp/finetune_pipeline/outputs/" # Directory to store output files # data: # data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path) # is_local: false # Whether the data is stored locally # # Maps custom column names to standard field names # column_mapping: # input: "query" # Field containing the input text # output: "response" # Field containing the output text # image: "image" # Field containing the image path (optional) # # Additional arguments to pass to the load_dataset function # # dataset_kwargs: # # split: "train" # Dataset split to load # # # Add any other dataset-specific arguments here # # Formatter configuration # formatter: # type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or 'openai') # # # Something like this in the torchtune config # # dataset: # # _component_: torchtune.datasets.CustomSFTDataset # # packed: False # # split: train # # seed: null # # shuffle: True # # Training configuration # finetuning: # strategy: "lora" # Training strategy ('fft' or 'lora') # num_epochs: 1 # Number of training epochs # batch_size: 1 # Batch size per device for training # torchtune_config: "llama3_2_vision/11B_lora" # TorchTune-specific configuration # num_processes_per_node: 8 # TorchTune-specific configuration # distributed: true # Whether to use distributed training # # vLLM Inference configuration # inference: # # Model configuration # model_path: "/home/ubuntu/yash-workspace/medgemma-4b-it" # Path to the model checkpoint # quantization: null # Quantization method (awq, gptq, squeezellm) # # Server configuration # port: 8000 # Port to run the server on # host: "0.0.0.0" # Host to run the server on # # Performance configuration # tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism # max_model_len: 32 # Maximum sequence length # max_num_seqs: 1 # Maximum number of sequences # gpu_memory_utilization: 0.9 # Fraction of GPU memory to use # enforce_eager: false # Enforce eager execution # eval_data: "your/eval/dataset/path" # Path to the evaluation dataset (optional) # # Additional vLLM parameters (optional) # # swap_space: 4 # Size of CPU swap space in GiB # # block_size: 16 # Size of blocks used in the KV cache # # disable_log_stats: true # Disable logging of stats # # disable_log_requests: false # Disable logging of requests # Configuration for data loading, formatting, and fine-tuning output_dir: "/home/yashkhare/workspace/finetuning-pipeline/" # Directory to store output files # Formatter configuration formatter: type: "torchtune" # Type of formatter to use ('torchtune', 'vllm', or 'openai') data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path) is_local: false # Whether the data is stored locally # Maps custom column names to standard field names column_mapping: input: "query" # Field containing the input text output: "response" # Field containing the output text image: null # Field containing the image path (optional) # Additional arguments to pass to the load_dataset function dataset_kwargs: split: "validation" # Dataset split to load # Training configuration finetuning: model_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct" # Path to the model checkpoint tokenizer_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct/original/tokenizer.model" # Path to the tokenizer output_dir: ${output_dir}/model_outputs # Directory to store checkpoints log_dir: ${output_dir}/logs # Directory to store logs strategy: "lora" # Training strategy ('fft' or 'lora') num_epochs: 1 # Number of training epochs batch_size: 4 # Batch size per device for training torchtune_config: "llama3_1/8B_lora" # TorchTune-specific configuration num_processes_per_node: 8 # TorchTune-specific configuration distributed: true # Whether to use distributed training # vLLM Inference configuration inference: # Model configuration model_path: "/home/yashkhare/workspace/medgemma-4b-it" # Path to the model checkpoint quantization: null # Quantization method (awq, gptq, squeezellm) dtype: "auto" # Data type for model weights (half, float, bfloat16, auto) trust_remote_code: false # Trust remote code when loading the model # Server configuration port: 8000 # Port to run the server on host: "0.0.0.0" # Host to run the server on # Performance configuration tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism max_model_len: 512 # Maximum sequence length max_num_seqs: 1 # Maximum number of sequences gpu_memory_utilization: 0.95 # Fraction of GPU memory to use enforce_eager: false # Enforce eager execution inference_data: "dz-osamu/IU-Xray" # Path to the inference dataset (optional) # Additional vLLM parameters (optional) # swap_space: 4 # Size of CPU swap space in GiB # block_size: 16 # Size of blocks used in the KV cache # disable_log_stats: true # Disable logging of stats # disable_log_requests: false # Disable logging of requests