generate_question_answers.py 2.2 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import argparse
  2. import asyncio
  3. import json
  4. from config import load_config
  5. from model_handler import generate_questions
  6. from itertools import chain
  7. import logging
  8. import aiofiles # Ensure aiofiles is installed for async file operations
  9. # Configure logging to include the timestamp, log level, and message
  10. logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  11. async def main(context):
  12. try:
  13. logging.info("Starting to generate question/answer pairs.")
  14. data = await generate_questions(context)
  15. if not data:
  16. logging.warning("No data generated. Please check the input context or model configuration.")
  17. return
  18. flattened_list = list(chain.from_iterable(data))
  19. logging.info(f"Successfully generated {len(flattened_list)} question/answer pairs.")
  20. # Use asynchronous file operation for writing to the file
  21. async with aiofiles.open("data.json", "w") as output_file:
  22. await output_file.write(json.dumps(flattened_list, indent=4))
  23. logging.info("Data successfully written to 'data.json'. Process completed.")
  24. except Exception as e:
  25. logging.error(f"An unexpected error occurred during the process: {e}")
  26. def parse_arguments(context):
  27. # Define command line arguments for the script
  28. parser = argparse.ArgumentParser(
  29. description="Generate question/answer pairs from documentation."
  30. )
  31. parser.add_argument(
  32. "-n", "--num_data",
  33. type=int,
  34. default=context["num_data_default"],
  35. help="Specify the number of question/answer pairs to generate."
  36. )
  37. parser.add_argument(
  38. "-m", "--model",
  39. choices=["gpt-3.5-turbo-16k", "gpt-3.5-turbo-0125"],
  40. default="gpt-3.5-turbo-16k",
  41. help="Select the model to use for generation."
  42. )
  43. return parser.parse_args()
  44. if __name__ == "__main__":
  45. logging.info("Initializing the process and loading configuration...")
  46. context = load_config()
  47. args = parse_arguments(context)
  48. context["num_data"] = args.num_data
  49. context["model"] = args.model
  50. logging.info(f"Configuration loaded. Generating {args.num_data} question/answer pairs using model '{args.model}'.")
  51. asyncio.run(main(context))