| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131 |
- # # Configuration for data loading, formatting, and fine-tuning
- # output_dir: "/tmp/finetune_pipeline/outputs/" # Directory to store output files
- # data:
- # data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path)
- # is_local: false # Whether the data is stored locally
- # # Maps custom column names to standard field names
- # column_mapping:
- # input: "query" # Field containing the input text
- # output: "response" # Field containing the output text
- # image: "image" # Field containing the image path (optional)
- # # Additional arguments to pass to the load_dataset function
- # # dataset_kwargs:
- # # split: "train" # Dataset split to load
- # # # Add any other dataset-specific arguments here
- # # Formatter configuration
- # formatter:
- # type: "vllm" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
- # # # Something like this in the torchtune config
- # # dataset:
- # # _component_: torchtune.datasets.CustomSFTDataset
- # # packed: False
- # # split: train
- # # seed: null
- # # shuffle: True
- # # Training configuration
- # finetuning:
- # strategy: "lora" # Training strategy ('fft' or 'lora')
- # num_epochs: 1 # Number of training epochs
- # batch_size: 1 # Batch size per device for training
- # torchtune_config: "llama3_2_vision/11B_lora" # TorchTune-specific configuration
- # num_processes_per_node: 8 # TorchTune-specific configuration
- # distributed: true # Whether to use distributed training
- # # vLLM Inference configuration
- # inference:
- # # Model configuration
- # model_path: "/home/ubuntu/yash-workspace/medgemma-4b-it" # Path to the model checkpoint
- # quantization: null # Quantization method (awq, gptq, squeezellm)
- # # Server configuration
- # port: 8000 # Port to run the server on
- # host: "0.0.0.0" # Host to run the server on
- # # Performance configuration
- # tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism
- # max_model_len: 32 # Maximum sequence length
- # max_num_seqs: 1 # Maximum number of sequences
- # gpu_memory_utilization: 0.9 # Fraction of GPU memory to use
- # enforce_eager: false # Enforce eager execution
- # eval_data: "your/eval/dataset/path" # Path to the evaluation dataset (optional)
- # # Additional vLLM parameters (optional)
- # # swap_space: 4 # Size of CPU swap space in GiB
- # # block_size: 16 # Size of blocks used in the KV cache
- # # disable_log_stats: true # Disable logging of stats
- # # disable_log_requests: false # Disable logging of requests
- # Configuration for data loading, formatting, and fine-tuning
- output_dir: "/home/yashkhare/workspace/finetuning-pipeline/" # Directory to store output files
- # Formatter configuration
- formatter:
- type: "torchtune" # Type of formatter to use ('torchtune', 'vllm', or 'openai')
- data_path: "dz-osamu/IU-Xray" # Path to the dataset to format (either a Hugging Face dataset ID or a local path)
- is_local: false # Whether the data is stored locally
- # Maps custom column names to standard field names
- column_mapping:
- input: "query" # Field containing the input text
- output: "response" # Field containing the output text
- image: null # Field containing the image path (optional)
- # Additional arguments to pass to the load_dataset function
- dataset_kwargs:
- split: "validation" # Dataset split to load
- # Training configuration
- finetuning:
- model_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct" # Path to the model checkpoint
- tokenizer_path: "/home/yashkhare/workspace/Llama-3.1-8B-Instruct/original/tokenizer.model" # Path to the tokenizer
- output_dir: ${output_dir}/model_outputs # Directory to store checkpoints
- log_dir: ${output_dir}/logs # Directory to store logs
- strategy: "lora" # Training strategy ('fft' or 'lora')
- num_epochs: 1 # Number of training epochs
- batch_size: 4 # Batch size per device for training
- torchtune_config: "llama3_1/8B_lora" # TorchTune-specific configuration
- num_processes_per_node: 8 # TorchTune-specific configuration
- distributed: true # Whether to use distributed training
- # vLLM Inference configuration
- inference:
- # Model configuration
- model_path: "/home/yashkhare/workspace/medgemma-4b-it" # Path to the model checkpoint
- quantization: null # Quantization method (awq, gptq, squeezellm)
- dtype: "auto" # Data type for model weights (half, float, bfloat16, auto)
- trust_remote_code: false # Trust remote code when loading the model
- # Server configuration
- port: 8000 # Port to run the server on
- host: "0.0.0.0" # Host to run the server on
- # Performance configuration
- tensor_parallel_size: 1 # Number of GPUs to use for tensor parallelism
- max_model_len: 512 # Maximum sequence length
- max_num_seqs: 1 # Maximum number of sequences
- gpu_memory_utilization: 0.95 # Fraction of GPU memory to use
- enforce_eager: false # Enforce eager execution
- inference_data: "dz-osamu/IU-Xray" # Path to the inference dataset (optional)
- # Additional vLLM parameters (optional)
- # swap_space: 4 # Size of CPU swap space in GiB
- # block_size: 16 # Size of blocks used in the KV cache
- # disable_log_stats: true # Disable logging of stats
- # disable_log_requests: false # Disable logging of requests
|