ifeval.yaml 807 B

123456789101112131415161718192021222324252627282930313233
  1. task: meta_ifeval
  2. dataset_path: parquet
  3. dataset_kwargs:
  4. data_files: ./work_dir/joined_ifeval.parquet
  5. output_type: generate_until
  6. test_split: train
  7. num_fewshot: 0
  8. doc_to_text: prompt
  9. doc_to_target: 0
  10. generation_kwargs:
  11. until: []
  12. do_sample: false
  13. temperature: 0.0
  14. max_gen_toks: 1280
  15. process_results: !function utils.process_results
  16. metric_list:
  17. - metric: prompt_level_strict_acc
  18. aggregation: mean
  19. higher_is_better: true
  20. - metric: inst_level_strict_acc
  21. aggregation: !function utils.agg_inst_level_acc
  22. higher_is_better: true
  23. - metric: prompt_level_loose_acc
  24. aggregation: mean
  25. higher_is_better: true
  26. - metric: inst_level_loose_acc
  27. aggregation: !function utils.agg_inst_level_acc
  28. higher_is_better: true
  29. metadata:
  30. version: 2.0
  31. fewshot_config:
  32. sampler: first_n