Преглед на файлове

working draft of end-to-end pipelines

Kai Wu преди 1 година
родител
ревизия
6a83585185

Файловите разлики са ограничени, защото са твърде много
+ 1 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md


+ 0 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py

@@ -60,7 +60,6 @@ class VllmChatService(ChatService):
             )
             )
             response = await event_loop.run_in_executor(None, api_chat_call)
             response = await event_loop.run_in_executor(None, api_chat_call)
             assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
             assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
-            print("assistant_response",assistant_response)
             return assistant_response
             return assistant_response
         except Exception as error:
         except Exception as error:
             logging.error(f"Error during chat request execution: {error}",exc_info=True)
             logging.error(f"Error during chat request execution: {error}",exc_info=True)

+ 1 - 2
recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py

@@ -7,7 +7,6 @@ import json
 from config import load_config
 from config import load_config
 from generator_utils import generate_question_batches, generate_data_curation
 from generator_utils import generate_question_batches, generate_data_curation
 from chat_utils import OctoAIChatService, VllmChatService
 from chat_utils import OctoAIChatService, VllmChatService
-from itertools import chain
 import logging
 import logging
 import aiofiles  # Ensure aiofiles is installed for async file operations
 import aiofiles  # Ensure aiofiles is installed for async file operations
 
 
@@ -23,11 +22,11 @@ async def main(context):
         chat_service = OctoAIChatService()
         chat_service = OctoAIChatService()
     try:
     try:
         logging.info("Starting to generate question/answer pairs.")
         logging.info("Starting to generate question/answer pairs.")
+        # Generate question/answer pairs as list
         data = await generate_question_batches(chat_service, context)
         data = await generate_question_batches(chat_service, context)
         if not data:
         if not data:
             logging.warning("No data generated. Please check the input context or model configuration.")
             logging.warning("No data generated. Please check the input context or model configuration.")
             return
             return
-        data = list(chain.from_iterable(data))
         logging.info(f"Successfully generated {len(data)} question/answer pairs.")
         logging.info(f"Successfully generated {len(data)} question/answer pairs.")
         if context["use_curation"]:
         if context["use_curation"]:
             logging.info("Starting to do self-curation using LLM.")
             logging.info("Starting to do self-curation using LLM.")

+ 1 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml

@@ -5,7 +5,7 @@ question_prompt_template: >
   which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
   which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
   then extract the context that is related to the question and answer, preferably using the sentences from original text,
   then extract the context that is related to the question and answer, preferably using the sentences from original text,
   please make sure you follow those rules:
   please make sure you follow those rules:
-  1. Generate at most {num_questions} question answer pairs, you can generate less questions if you believe there are nothing related to Llama language models.
+  1. Generate {num_questions} question answer pairs.
   2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
   2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
   3. Generate in {language}.
   3. Generate in {language}.
   4. The questions can be answered based *solely* on the given passage.
   4. The questions can be answered based *solely* on the given passage.

+ 12 - 10
recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py

@@ -121,7 +121,7 @@ def parse_qac_to_json(response_string):
         qa_set.add((clean(question), clean(answer),clean(context)))
         qa_set.add((clean(question), clean(answer),clean(context)))
     qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
     qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
 
 
-    return json.dumps(qa_list, indent=4)
+    return qa_list
 
 
 def parse_qa_to_json(response_string):
 def parse_qa_to_json(response_string):
     split_lines = response_string.split("\n")
     split_lines = response_string.split("\n")
@@ -155,14 +155,13 @@ def parse_qa_to_json(response_string):
     return qa_list
     return qa_list
 
 
 async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
 async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
+    if num_questions == 0:
+        logging.info(f"Error: num_questions is 0")
+        return {}
     prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
     prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
     # parse the result string to a list of dict that has Question, Answer, Context
     # parse the result string to a list of dict that has Question, Answer, Context
-    result = parse_qac_to_json(result)
-    if not result:
-        return {}
-    return json.loads(await chat_service.execute_chat_request_async(api_context, chat_request_payload,eval=False))
+    return await chat_service.execute_chat_request_async(api_context, chat_request_payload)
 # This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
 # This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
 async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
 async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
     prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
     prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
@@ -208,14 +207,17 @@ async def generate_question_batches(chat_service, api_context: dict):
         questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
         questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
         print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
         print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
         try:
         try:
-            result = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
-            generation_tasks.append(result)
+            task = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
+            generation_tasks.append(task)
         except Exception as e:
         except Exception as e:
             print(f"Error during chat request execution: {e}")
             print(f"Error during chat request execution: {e}")
 
 
     question_generation_results = await asyncio.gather(*generation_tasks)
     question_generation_results = await asyncio.gather(*generation_tasks)
-
-    return question_generation_results
+    final_result = []
+    for result in question_generation_results:
+        parsed_json = parse_qac_to_json(result)
+        final_result.extend(parsed_json)
+    return final_result
 
 
 async def generate_data_curation(chat_service, api_context: dict, generated_questions: list):
 async def generate_data_curation(chat_service, api_context: dict, generated_questions: list):
     eval_tasks = []
     eval_tasks = []