radu
/
LLamaRecipes
spegling av https://github.com/facebookresearch/llama-recipes.git


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566
							# Configuration for data loading, formatting, and fine-tuning
output_dir: "/tmp/finetuning-pipeline/llama3_2_vision/"  # Directory to store output files

data:
  data_path: "data/path"  # Path to the dataset to load
  is_local: true           # Whether the data is stored locally
  formatter_type: "vllm"            # Type of formatter to use ('torchtune', 'vllm', or
  system_prompt: "You are a helpful assisstant"  # System prompt to use for the dataset
  column_mapping:
    input: "instruction"             # Field containing the input text
    output: "output"              # Field containing the output text
    image: "image"           # Field containing the image path (optional)
  # Additional arguments to pass to the load_dataset function
  dataset_kwargs:
    split: "validation"                # Dataset split to load
    shuffle: false                 # Whether to shuffle the dataset

# Training configuration
finetuning:
  #formatter_type: "torchtune"            # Type of formatter to use ('torchtune', 'vllm', or 'openai')
  model_path: "path/to/model" # Path to the model checkpoint
  tokenizer_path: "path/to/tokenizer" # Path to the tokenizer
  output_dir: /tmp/finetuning-pipeline/model_outputs  # Directory to store checkpoints
  log_dir: /tmp/finetuning-pipeline/logs  # Directory to store logs
  strategy: "lora"               # Training strategy ('fft' or 'lora')
  num_epochs: 1                 # Number of training epochs
  max_steps_per_epoch: null
  batch_size: 8                 # Batch size per device for training
  torchtune_config: "llama3_2_vision/11B_lora"             # TorchTune-specific configuration
  num_processes_per_node: 8             # TorchTune-specific configuration
  distributed: true             # Whether to use distributed training


# vLLM Inference configuration
inference:
  # Model configuration
  model_path: "path/to/model/checkpoint" # Path to the model checkpoint
  quantization: null            # Quantization method (awq, gptq, squeezellm)
  dtype: "auto"                 # Data type for model weights (half, float, bfloat16, auto)
  trust_remote_code: false      # Trust remote code when loading the model

  # Server configuration
  port: 8000                    # Port to run the server on
  host: "0.0.0.0"               # Host to run the server on

  # Performance configuration
  tensor_parallel_size: 8       # Number of GPUs to use for tensor parallelism
  max_model_len: 8192           # Maximum sequence length
  max_num_seqs: 1              # Maximum number of sequences
  gpu_memory_utilization: 0.95   # Fraction of GPU memory to use
  enforce_eager: false          # Enforce eager execution

  inference_data_kwargs:
    data_path: "inference/data/path"     # Path to the inference dataset
    split: "validation"               # Dataset split to load
    formatter_type: "vllm"            # Type of formatter to use ('torchtune', 'vllm', or 'openai')
    format_data: false                # Whether to format the inference dataset
    max_samples: null                 # Maximum number of samples to load (null for all)
    is_local: true                   # Whether the data is stored locally

  # Additional vLLM parameters (optional)
  # swap_space: 4               # Size of CPU swap space in GiB
  # block_size: 16              # Size of blocks used in the KV cache
  # disable_log_stats: true     # Disable logging of stats
  # disable_log_requests: false # Disable logging of requests