|
@@ -0,0 +1,61 @@
|
|
|
|
+import argparse
|
|
|
|
+import asyncio
|
|
|
|
+import json
|
|
|
|
+from config import load_config
|
|
|
|
+from model_handler import generate_questions
|
|
|
|
+from itertools import chain
|
|
|
|
+import logging
|
|
|
|
+import aiofiles # Ensure aiofiles is installed for async file operations
|
|
|
|
+
|
|
|
|
+# Configure logging to include the timestamp, log level, and message
|
|
|
|
+logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
|
|
|
|
+
|
|
|
|
+async def main(context):
|
|
|
|
+ try:
|
|
|
|
+ logging.info("Starting to generate question/answer pairs.")
|
|
|
|
+ data = await generate_questions(context)
|
|
|
|
+ if not data:
|
|
|
|
+ logging.warning("No data generated. Please check the input context or model configuration.")
|
|
|
|
+ return
|
|
|
|
+
|
|
|
|
+ flattened_list = list(chain.from_iterable(data))
|
|
|
|
+ logging.info(f"Successfully generated {len(flattened_list)} question/answer pairs.")
|
|
|
|
+
|
|
|
|
+ # Use asynchronous file operation for writing to the file
|
|
|
|
+ async with aiofiles.open("data.json", "w") as output_file:
|
|
|
|
+ await output_file.write(json.dumps(flattened_list, indent=4))
|
|
|
|
+
|
|
|
|
+ logging.info("Data successfully written to 'data.json'. Process completed.")
|
|
|
|
+
|
|
|
|
+ except Exception as e:
|
|
|
|
+ logging.error(f"An unexpected error occurred during the process: {e}")
|
|
|
|
+
|
|
|
|
+def parse_arguments(context):
|
|
|
|
+ # Define command line arguments for the script
|
|
|
|
+ parser = argparse.ArgumentParser(
|
|
|
|
+ description="Generate question/answer pairs from documentation."
|
|
|
|
+ )
|
|
|
|
+ parser.add_argument(
|
|
|
|
+ "-n", "--num_data",
|
|
|
|
+ type=int,
|
|
|
|
+ default=context["num_data_default"],
|
|
|
|
+ help="Specify the number of question/answer pairs to generate."
|
|
|
|
+ )
|
|
|
|
+ parser.add_argument(
|
|
|
|
+ "-m", "--model",
|
|
|
|
+ choices=["gpt-3.5-turbo-16k", "gpt-3.5-turbo-0125"],
|
|
|
|
+ default="gpt-3.5-turbo-16k",
|
|
|
|
+ help="Select the model to use for generation."
|
|
|
|
+ )
|
|
|
|
+ return parser.parse_args()
|
|
|
|
+
|
|
|
|
+if __name__ == "__main__":
|
|
|
|
+ logging.info("Initializing the process and loading configuration...")
|
|
|
|
+ context = load_config()
|
|
|
|
+ args = parse_arguments(context)
|
|
|
|
+
|
|
|
|
+ context["num_data"] = args.num_data
|
|
|
|
+ context["model"] = args.model
|
|
|
|
+
|
|
|
|
+ logging.info(f"Configuration loaded. Generating {args.num_data} question/answer pairs using model '{args.model}'.")
|
|
|
|
+ asyncio.run(main(context))
|