2 anni fa · 6204d5ae38
--- a/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/._faq-data
+++ b/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/._faq-data
--- a/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/config.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/config.py
@@ -13,6 +13,6 @@ def load_config(config_path: str = "./config.yaml"):
 
																         config["api_key"] = os.environ["OCTOAI_API_TOKEN"]
															
 
																     except KeyError:
															
 
																         print("API token did not found, please set the OCTOAI_API_TOKEN environment variable if using OctoAI, otherwise set api_key to default EMPTY")
															
 
																-        # local Vllm endpoint did not need API key, so set the API key to "EMPTY" if not found
															
 
																+        # local Vllm endpoint did not need API key, so set the API key to "EMPTY" if OCTOAI_API_TOKEN not found
															
 
																         config["api_key"] = "EMPTY"
															
 
																     return config
															
--- a/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/config.yaml
+++ b/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/config.yaml
@@ -4,7 +4,7 @@ question_prompt_template: >
 
																   read it and generate question and answer pairs
															
 
																   that are most likely be asked by a use of llama that just want to start,
															
 
																   please make sure you follow those rules,
															
 
																-  1. Generate only {total_questions} question answer pairs.
															
 
																+  1. Generate only {num_questions} question answer pairs.
															
 
																   2. Generate in {language}.
															
 
																   3. The questions can be answered based *solely* on the given passage.
															
 
																   4. Avoid asking questions with similar meaning.
															
@@ -27,4 +27,4 @@ data_dir: "./data"
 
																 language: "English"
															
 
																-total_questions: 1000
															
 
																+num_questions: 2
															
--- a/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/generate_question_answers.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/generate_question_answers.py
@@ -5,7 +5,7 @@ import argparse
 
																 import asyncio
															
 
																 import json
															
 
																 from config import load_config
															
 
																-from generator_utils import generate_question_batches, parse_qa_to_json, get_model_name
															
 
																+from generator_utils import generate_question_batches, parse_qa_to_json
															
 
																 from itertools import chain
															
 
																 import logging
															
 
																 import aiofiles  # Ensure aiofiles is installed for async file operations
															
@@ -21,7 +21,10 @@ logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(
 
																 rate_limit_threshold = 2000
															
 
																 allowed_concurrent_requests = int(rate_limit_threshold * 0.75)
															
 
																 request_limiter = asyncio.Semaphore(allowed_concurrent_requests)
															
 
																-
															
 
																+# Since OctoAI has different naming for llama models, create this mapping to get huggingface offical model name given OctoAI names.
															
 
																+MODEL_NAME_MAPPING={"meta-llama-3-70b-instruct":"meta-llama/Meta-Llama-3-70B-Instruct",
															
 
																+"meta-llama-3-8b-instruct":"meta-llama/Meta-Llama-3-8B-Instruct","llama-2-7b-chat":"meta-llama/Llama-2-7b-chat-hf"
															
 
																+,"llama-2-70b-chat":"meta-llama/Llama-2-70b-chat-hf"}
															
 
																 class ChatService(ABC):
															
 
																     @abstractmethod
															
 
																     async def execute_chat_request_async(self, api_context: dict, chat_request):
															
@@ -57,7 +60,7 @@ class VllmChatService(ChatService):
 
																         async with request_limiter:
															
 
																             try:
															
 
																                 event_loop = asyncio.get_running_loop()
															
 
																-                model_name = get_model_name(api_context['model'])
															
 
																+                model_name = MODEL_NAME_MAPPING[api_context['model']]
															
 
																                 client = OpenAI(api_key=api_context['api_key'], base_url="http://localhost:"+ str(api_context['endpoint'])+"/v1")
															
 
																                 api_chat_call = partial(
															
 
																                     client.chat.completions.create,
															
@@ -68,7 +71,8 @@ class VllmChatService(ChatService):
 
																                 response = await event_loop.run_in_executor(None, api_chat_call)
															
 
																                 assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
															
 
																                 assistant_response_json = parse_qa_to_json(assistant_response)
															
 
																-                assert(len(assistant_response_json)!=0)
															
 
																+                if len(assistant_response_json)==0:
															
 
																+                    logging.error("No question/answer pairs generated. Please check the input context or model configuration.")
															
 
																                 return assistant_response_json
															
 
																             except Exception as error:
															
 
																                 logging.error(f"Error during chat request execution: {error}",exc_info=True)
															
@@ -103,8 +107,8 @@ def parse_arguments():
 
																     parser.add_argument(
															
 
																         "-t", "--total_questions",
															
 
																         type=int,
															
 
																-        default=10,
															
 
																-        help="Specify the number of question/answer pairs to generate."
															
 
																+        default=100,
															
 
																+        help="Specify the total number of question/answer pairs to generate."
															
 
																     )
															
 
																     parser.add_argument(
															
 
																         "-m", "--model",
															
--- a/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/generator_utils.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/data_pipelines/generator_utils.py
@@ -14,16 +14,7 @@ import json
 
																 # Initialize logging
															
 
																 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																-# Since OctoAI has different naming for llama models, get the huggingface offical model name using OctoAI names.
															
 
																-def get_model_name(model):
															
 
																-    if model == "meta-llama-3-70b-instruct":
															
 
																-        return "meta-llama/Meta-Llama-3-70B-Instruct"
															
 
																-    elif model == "meta-llama-3-8b-instruct":
															
 
																-        return "meta-llama/Meta-Llama-3-8B-Instruct"
															
 
																-    elif model == "llama-2-7b-chat":
															
 
																-        return "meta-llama/Llama-2-7b-chat-hf"
															
 
																-    else:
															
 
																-        return "meta-llama/Llama-2-70b-chat-hf"
															
 
																+
															
 
																 def read_text_file(file_path):
															
 
																     try:
															
 
																         with open(file_path, 'r') as f:
															
@@ -88,8 +79,13 @@ def read_file_content(context):
 
																     if len(text) == 0:
															
 
																         logging.error(f"Error reading files, text is empty")
															
 
																     return ' '.join(file_strings)
															
 
																-
															
 
																-
															
 
																+# clean the text by removing all parts that did not contain any alphanumeric characters
															
 
																+def clean(s):
															
 
																+        result = []
															
 
																+        for item in s.split('"'):
															
 
																+            if any(c.isalnum() for c in item):
															
 
																+                result.append(item)
															
 
																+        return " ".join(result)
															
 
																 def parse_qa_to_json(response_string):
															
 
																     split_lines = response_string.split("\n")
															
@@ -109,21 +105,21 @@ def parse_qa_to_json(response_string):
 
																                 end = i
															
 
																             # found Question means we have reached the end of the question, so add it to qa_list
															
 
																             elif '"Question":' in line:
															
 
																-                question = " ".join(" ".join(split_lines[start:end]).split('"Question":')[1].split('"')[1:-1])
															
 
																-                answer = " ".join(" ".join(split_lines[end:i]).split('"Answer":')[1].split('"')[1:-1])
															
 
																+                question = " ".join(split_lines[start:end]).split('"Question":')[1]
															
 
																+                answer = " ".join(split_lines[end:i]).split('"Answer":')[1]
															
 
																                 start,end = i,None
															
 
																-                qa_set.add((question, answer))
															
 
																+                qa_set.add((clean(question), clean(answer)))
															
 
																         # adding last question back to qa_list
															
 
																-        if start and end:
															
 
																-            question = " ".join(" ".join(split_lines[start:end]).split('"Question":')[1].split('"')[1:-1])
															
 
																-            answer = " ".join(" ".join(split_lines[end:i]).split('"Answer":')[1].split('"')[1:-1])
															
 
																-            qa_set.add((question, answer))
															
 
																+    if start and end:
															
 
																+        question = " ".join(split_lines[start:end]).split('"Question":')[1]
															
 
																+        answer = " ".join(split_lines[end:]).split('"Answer":')[1]
															
 
																+        qa_set.add((clean(question), clean(answer)))
															
 
																     qa_list = [{"question": q, "answer":a} for q,a in qa_set]
															
 
																     return json.dumps(qa_list, indent=4)
															
 
																-async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, total_questions: int) -> dict:
															
 
																-    prompt_for_system = api_context['question_prompt_template'].format(total_questions=total_questions, language=api_context["language"])
															
 
																+async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
															
 
																+    prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
															
 
																     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
															
 
																     result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
															
 
																     if not result:
															
@@ -142,7 +138,8 @@ async def generate_question_batches(chat_service, api_context: dict):
 
																     total_questions = api_context["total_questions"]
															
 
																     batches_count = len(document_batches)
															
 
																-    base_questions_per_batch = total_questions // batches_count
															
 
																+    # each batch should have at least 1 question
															
 
																+    base_questions_per_batch = max(total_questions // batches_count,1)
															
 
																     extra_questions = total_questions % batches_count
															
 
																     print(f"Questions per batch: {base_questions_per_batch} (+1 for the first {extra_questions} batches), Total questions: {total_questions}, Batches: {batches_count}")