generate_question_answers.py 2.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. # Copyright (c) Meta Platforms, Inc. and affiliates.
  2. # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
  3. import argparse
  4. import asyncio
  5. import json
  6. from config import load_config
  7. from generator_utils import generate_question_batches
  8. from itertools import chain
  9. import logging
  10. import aiofiles # Ensure aiofiles is installed for async file operations
  11. # Configure logging to include the timestamp, log level, and message
  12. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  13. async def main(context):
  14. try:
  15. logging.info("Starting to generate question/answer pairs.")
  16. data = await generate_question_batches(context)
  17. if not data:
  18. logging.warning("No data generated. Please check the input context or model configuration.")
  19. return
  20. flattened_list = list(chain.from_iterable(data))
  21. logging.info(f"Successfully generated {len(flattened_list)} question/answer pairs.")
  22. # Use asynchronous file operation for writing to the file
  23. async with aiofiles.open("data.json", "w") as output_file:
  24. await output_file.write(json.dumps(flattened_list, indent=4))
  25. logging.info("Data successfully written to 'data.json'. Process completed.")
  26. except Exception as e:
  27. logging.error(f"An unexpected error occurred during the process: {e}")
  28. def parse_arguments(context):
  29. # Define command line arguments for the script
  30. parser = argparse.ArgumentParser(
  31. description="Generate question/answer pairs from documentation."
  32. )
  33. parser.add_argument(
  34. "-t", "--total_questions",
  35. type=int,
  36. default=context["total_questions"],
  37. help="Specify the number of question/answer pairs to generate."
  38. )
  39. parser.add_argument(
  40. "-m", "--model",
  41. choices=["gpt-3.5-turbo-16k", "gpt-3.5-turbo-0125"],
  42. default="gpt-3.5-turbo-16k",
  43. help="Select the model to use for generation."
  44. )
  45. return parser.parse_args()
  46. if __name__ == "__main__":
  47. logging.info("Initializing the process and loading configuration...")
  48. context = load_config()
  49. args = parse_arguments(context)
  50. context["total_questions"] = args.total_questions
  51. context["model"] = args.model
  52. logging.info(f"Configuration loaded. Generating {args.total_questions} question/answer pairs using model '{args.model}'.")
  53. asyncio.run(main(context))