datasets.py 809 B

12345678910111213141516171819202122232425262728
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. from dataclasses import dataclass
  4. @dataclass
  5. class samsum_dataset:
  6. dataset: str = "samsum_dataset"
  7. train_split: str = "train"
  8. test_split: str = "validation"
  9. input_length: int = 2048
  10. @dataclass
  11. class grammar_dataset:
  12. dataset: str = "grammar_dataset"
  13. train_split: str = "ft_datasets/grammar_dataset/gtrain_10k.csv"
  14. test_split: str = "ft_datasets/grammar_dataset/grammar_validation.csv"
  15. input_length: int = 2048
  16. @dataclass
  17. class alpaca_dataset:
  18. dataset: str = "alpaca_dataset"
  19. train_split: str = "train"
  20. test_split: str = "val"
  21. data_path: str = "ft_datasets/alpaca_data.json"