2 tahun lalu · 6a83585185
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py
@@ -60,7 +60,6 @@ class VllmChatService(ChatService):
 
				             )
			
 
				             response = await event_loop.run_in_executor(None, api_chat_call)
			
 
				             assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
			
 
				-            print("assistant_response",assistant_response)
			
 
				             return assistant_response
			
 
				         except Exception as error:
			
 
				             logging.error(f"Error during chat request execution: {error}",exc_info=True)
			
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py
@@ -7,7 +7,6 @@ import json
 
				 from config import load_config
			
 
				 from generator_utils import generate_question_batches, generate_data_curation
			
 
				 from chat_utils import OctoAIChatService, VllmChatService
			
 
				-from itertools import chain
			
 
				 import logging
			
 
				 import aiofiles  # Ensure aiofiles is installed for async file operations
			
 
				 
			
@@ -23,11 +22,11 @@ async def main(context):
 
				         chat_service = OctoAIChatService()
			
 
				     try:
			
 
				         logging.info("Starting to generate question/answer pairs.")
			
 
				+        # Generate question/answer pairs as list
			
 
				         data = await generate_question_batches(chat_service, context)
			
 
				         if not data:
			
 
				             logging.warning("No data generated. Please check the input context or model configuration.")
			
 
				             return
			
 
				-        data = list(chain.from_iterable(data))
			
 
				         logging.info(f"Successfully generated {len(data)} question/answer pairs.")
			
 
				         if context["use_curation"]:
			
 
				             logging.info("Starting to do self-curation using LLM.")
			
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml
@@ -5,7 +5,7 @@ question_prompt_template: >
 
				   which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
			
 
				   then extract the context that is related to the question and answer, preferably using the sentences from original text,
			
 
				   please make sure you follow those rules:
			
 
				-  1. Generate at most {num_questions} question answer pairs, you can generate less questions if you believe there are nothing related to Llama language models.
			
 
				+  1. Generate {num_questions} question answer pairs.
			
 
				   2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
			
 
				   3. Generate in {language}.
			
 
				   4. The questions can be answered based *solely* on the given passage.
			
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py
@@ -121,7 +121,7 @@ def parse_qac_to_json(response_string):
 
				         qa_set.add((clean(question), clean(answer),clean(context)))
			
 
				     qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
			
 
				 
			
 
				-    return json.dumps(qa_list, indent=4)
			
 
				+    return qa_list
			
 
				 
			
 
				 def parse_qa_to_json(response_string):
			
 
				     split_lines = response_string.split("\n")
			
@@ -155,14 +155,13 @@ def parse_qa_to_json(response_string):
 
				     return qa_list
			
 
				 
			
 
				 async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
			
 
				+    if num_questions == 0:
			
 
				+        logging.info(f"Error: num_questions is 0")
			
 
				+        return {}
			
 
				     prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
			
 
				     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
			
 
				-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
			
 
				     # parse the result string to a list of dict that has Question, Answer, Context
			
 
				-    result = parse_qac_to_json(result)
			
 
				-    if not result:
			
 
				-        return {}
			
 
				-    return json.loads(await chat_service.execute_chat_request_async(api_context, chat_request_payload,eval=False))
			
 
				+    return await chat_service.execute_chat_request_async(api_context, chat_request_payload)
			
 
				 # This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
			
 
				 async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
			
 
				     prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
			
@@ -208,14 +207,17 @@ async def generate_question_batches(chat_service, api_context: dict):
 
				         questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
			
 
				         print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
			
 
				         try:
			
 
				-            result = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
			
 
				-            generation_tasks.append(result)
			
 
				+            task = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
			
 
				+            generation_tasks.append(task)
			
 
				         except Exception as e:
			
 
				             print(f"Error during chat request execution: {e}")
			
 
				 
			
 
				     question_generation_results = await asyncio.gather(*generation_tasks)
			
 
				-
			
 
				-    return question_generation_results
			
 
				+    final_result = []
			
 
				+    for result in question_generation_results:
			
 
				+        parsed_json = parse_qac_to_json(result)
			
 
				+        final_result.extend(parsed_json)
			
 
				+    return final_result
			
 
				 
			
 
				 async def generate_data_curation(chat_service, api_context: dict, generated_questions: list):
			
 
				     eval_tasks = []