Browse Source

working draft of end-to-end pipelines

Kai Wu 1 year ago
parent
commit
6a83585185

File diff suppressed because it is too large
+ 1 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md


+ 0 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py

@@ -60,7 +60,6 @@ class VllmChatService(ChatService):
             )
             response = await event_loop.run_in_executor(None, api_chat_call)
             assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
-            print("assistant_response",assistant_response)
             return assistant_response
         except Exception as error:
             logging.error(f"Error during chat request execution: {error}",exc_info=True)

+ 1 - 2
recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py

@@ -7,7 +7,6 @@ import json
 from config import load_config
 from generator_utils import generate_question_batches, generate_data_curation
 from chat_utils import OctoAIChatService, VllmChatService
-from itertools import chain
 import logging
 import aiofiles  # Ensure aiofiles is installed for async file operations
 
@@ -23,11 +22,11 @@ async def main(context):
         chat_service = OctoAIChatService()
     try:
         logging.info("Starting to generate question/answer pairs.")
+        # Generate question/answer pairs as list
         data = await generate_question_batches(chat_service, context)
         if not data:
             logging.warning("No data generated. Please check the input context or model configuration.")
             return
-        data = list(chain.from_iterable(data))
         logging.info(f"Successfully generated {len(data)} question/answer pairs.")
         if context["use_curation"]:
             logging.info("Starting to do self-curation using LLM.")

+ 1 - 1
recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml

@@ -5,7 +5,7 @@ question_prompt_template: >
   which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
   then extract the context that is related to the question and answer, preferably using the sentences from original text,
   please make sure you follow those rules:
-  1. Generate at most {num_questions} question answer pairs, you can generate less questions if you believe there are nothing related to Llama language models.
+  1. Generate {num_questions} question answer pairs.
   2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
   3. Generate in {language}.
   4. The questions can be answered based *solely* on the given passage.

+ 12 - 10
recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py

@@ -121,7 +121,7 @@ def parse_qac_to_json(response_string):
         qa_set.add((clean(question), clean(answer),clean(context)))
     qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
 
-    return json.dumps(qa_list, indent=4)
+    return qa_list
 
 def parse_qa_to_json(response_string):
     split_lines = response_string.split("\n")
@@ -155,14 +155,13 @@ def parse_qa_to_json(response_string):
     return qa_list
 
 async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
+    if num_questions == 0:
+        logging.info(f"Error: num_questions is 0")
+        return {}
     prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
     # parse the result string to a list of dict that has Question, Answer, Context
-    result = parse_qac_to_json(result)
-    if not result:
-        return {}
-    return json.loads(await chat_service.execute_chat_request_async(api_context, chat_request_payload,eval=False))
+    return await chat_service.execute_chat_request_async(api_context, chat_request_payload)
 # This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
 async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
     prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
@@ -208,14 +207,17 @@ async def generate_question_batches(chat_service, api_context: dict):
         questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
         print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
         try:
-            result = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
-            generation_tasks.append(result)
+            task = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
+            generation_tasks.append(task)
         except Exception as e:
             print(f"Error during chat request execution: {e}")
 
     question_generation_results = await asyncio.gather(*generation_tasks)
-
-    return question_generation_results
+    final_result = []
+    for result in question_generation_results:
+        parsed_json = parse_qac_to_json(result)
+        final_result.extend(parsed_json)
+    return final_result
 
 async def generate_data_curation(chat_service, api_context: dict, generated_questions: list):
     eval_tasks = []