datasets.py 1.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. from dataclasses import dataclass
  4. @dataclass
  5. class samsum_dataset:
  6. dataset: str = "samsum_dataset"
  7. train_split: str = "train"
  8. test_split: str = "validation"
  9. trust_remote_code: bool = False
  10. @dataclass
  11. class grammar_dataset:
  12. dataset: str = "grammar_dataset"
  13. train_split: str = "src/llama_recipes/datasets/grammar_dataset/gtrain_10k.csv"
  14. test_split: str = "src/llama_recipes/datasets/grammar_dataset/grammar_validation.csv"
  15. @dataclass
  16. class alpaca_dataset:
  17. dataset: str = "alpaca_dataset"
  18. train_split: str = "train"
  19. test_split: str = "val"
  20. data_path: str = "src/llama_recipes/datasets/alpaca_data.json"
  21. @dataclass
  22. class custom_dataset:
  23. dataset: str = "custom_dataset"
  24. file: str = "recipes/quickstart/finetuning/datasets/custom_dataset.py"
  25. train_split: str = "train"
  26. test_split: str = "validation"
  27. data_path: str = ""
  28. @dataclass
  29. class llamaguard_toxicchat_dataset:
  30. dataset: str = "llamaguard_toxicchat_dataset"
  31. train_split: str = "train"
  32. test_split: str = "test"