__init__.py 1.0 KB

123456789101112131415161718192021
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. from functools import partial
  4. from llama_cookbook.datasets.grammar_dataset.grammar_dataset import get_dataset as get_grammar_dataset
  5. from llama_cookbook.datasets.alpaca_dataset import InstructionDataset as get_alpaca_dataset
  6. from llama_cookbook.datasets.custom_dataset import get_custom_dataset,get_data_collator
  7. from llama_cookbook.datasets.samsum_dataset import get_preprocessed_samsum as get_samsum_dataset
  8. from llama_cookbook.datasets.toxicchat_dataset import get_llamaguard_toxicchat_dataset as get_llamaguard_toxicchat_dataset
  9. DATASET_PREPROC = {
  10. "alpaca_dataset": partial(get_alpaca_dataset),
  11. "grammar_dataset": get_grammar_dataset,
  12. "samsum_dataset": get_samsum_dataset,
  13. "custom_dataset": get_custom_dataset,
  14. "llamaguard_toxicchat_dataset": get_llamaguard_toxicchat_dataset,
  15. }
  16. DATALOADER_COLLATE_FUNC = {
  17. "custom_dataset": get_data_collator
  18. }