|
@@ -3,12 +3,12 @@ model_name: "meta-llama/Meta-Llama-3.1-8B-Instruct" # The name of the model to e
|
|
evals_dataset: "meta-llama/Meta-Llama-3.1-8B-Instruct-evals" # The name of the 3.1 evals dataset to evaluate, please make sure this eval dataset corresponds to the model loaded. This must be a valid Meta Llama 3.1 evals dataset name in the Llama 3.1 Evals collection.
|
|
evals_dataset: "meta-llama/Meta-Llama-3.1-8B-Instruct-evals" # The name of the 3.1 evals dataset to evaluate, please make sure this eval dataset corresponds to the model loaded. This must be a valid Meta Llama 3.1 evals dataset name in the Llama 3.1 Evals collection.
|
|
# Must be one of the following ["meta-llama/Meta-Llama-3.1-8B-Instruct-evals","meta-llama/Meta-Llama-3.1-70B-Instruct-evals","meta-llama/Meta-Llama-3.1-405B-Instruct-evals","meta-llama/Meta-Llama-3.1-8B-evals","meta-llama/Meta-Llama-3.1-70B-evals","meta-llama/Meta-Llama-3.1-405B-evals"]
|
|
# Must be one of the following ["meta-llama/Meta-Llama-3.1-8B-Instruct-evals","meta-llama/Meta-Llama-3.1-70B-Instruct-evals","meta-llama/Meta-Llama-3.1-405B-Instruct-evals","meta-llama/Meta-Llama-3.1-8B-evals","meta-llama/Meta-Llama-3.1-70B-evals","meta-llama/Meta-Llama-3.1-405B-evals"]
|
|
|
|
|
|
-tasks: "meta_math_hard" # Available tasks for instruct model: "meta_math_hard", "meta_gpqa", "meta_mmlu_pro_instruct", "meta_ifeval"; or just use "meta_instruct" to run all of them.
|
|
|
|
|
|
+tasks: "meta_instruct" # Available tasks for instruct model: "meta_math_hard", "meta_gpqa", "meta_mmlu_pro_instruct", "meta_ifeval"; or just use "meta_instruct" to run all of them.
|
|
# Available tasks for pretrain model: "meta_bbh", "meta_mmlu_pro_pretrain"; or just use "meta_pretrain" to run all of them.
|
|
# Available tasks for pretrain model: "meta_bbh", "meta_mmlu_pro_pretrain"; or just use "meta_pretrain" to run all of them.
|
|
|
|
|
|
-tensor_parallel_size: 2 # The VLLM argument that speicify the tensor parallel size for the model, eg how many GPUs to use for a model copy.
|
|
|
|
|
|
+tensor_parallel_size: 1 # The VLLM argument that speicify the tensor parallel size for the model, eg how many GPUs to use for a model copy.
|
|
|
|
|
|
-data_parallel_size: 4 # The VLLM argument that speicify the data parallel size for the model, eg how copies of model will be used.
|
|
|
|
|
|
+data_parallel_size: 8 # The VLLM argument that speicify the data parallel size for the model, eg how copies of model will be used.
|
|
|
|
|
|
gpu_memory_utilization: 0.9 #The VLLM argument that speicify gpu memory utilization, the rest will be reserved for KV cache.
|
|
gpu_memory_utilization: 0.9 #The VLLM argument that speicify gpu memory utilization, the rest will be reserved for KV cache.
|
|
|
|
|
|
@@ -18,8 +18,8 @@ batch_size: "auto" # Batch size, can be 'auto', 'auto:N', or an integer. It is s
|
|
|
|
|
|
output_path: "eval_results" # the output folder to store all the eval results and samples.
|
|
output_path: "eval_results" # the output folder to store all the eval results and samples.
|
|
|
|
|
|
-limit: 16 # Limit number of examples per task, set 'null' to run all.
|
|
|
|
-#limit: null # Limit number of examples per task.
|
|
|
|
|
|
+#limit: 16 # Limit number of examples per task, set 'null' to run all.
|
|
|
|
+limit: null # Limit number of examples per task.
|
|
|
|
|
|
verbosity: "INFO" #Logging level: CRITICAL, ERROR, WARNING, INFO, DEBUG.
|
|
verbosity: "INFO" #Logging level: CRITICAL, ERROR, WARNING, INFO, DEBUG.
|
|
|
|
|