2 лет назад · 65af017cc4
--- a/tutorials/chatbot/data_pipelines/generate_question_answers.py
+++ b/tutorials/chatbot/data_pipelines/generate_question_answers.py
@@ -5,18 +5,55 @@ import argparse
 
																 import asyncio
															
 
																 import json
															
 
																 from config import load_config
															
 
																-from generator_utils import generate_question_batches
															
 
																+from generator_utils import generate_question_batches, parse_qa_to_json
															
 
																 from itertools import chain
															
 
																 import logging
															
 
																 import aiofiles  # Ensure aiofiles is installed for async file operations
															
 
																+from abc import ABC, abstractmethod
															
 
																+from octoai.client import Client
															
 
																+from functools import partial
															
 
																 # Configure logging to include the timestamp, log level, and message
															
 
																 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																+# Manage rate limits with throttling
															
 
																+rate_limit_threshold = 2000
															
 
																+allowed_concurrent_requests = int(rate_limit_threshold * 0.75)
															
 
																+request_limiter = asyncio.Semaphore(allowed_concurrent_requests)
															
 
																+
															
 
																+class ChatService(ABC):
															
 
																+    @abstractmethod
															
 
																+    async def execute_chat_request_async(self, api_context: dict, chat_request):
															
 
																+        pass
															
 
																+
															
 
																+# Please implement your own chat service class here.
															
 
																+# The class should inherit from the ChatService class and implement the execute_chat_request_async method.
															
 
																+class OctoAIChatService(ChatService):
															
 
																+    async def execute_chat_request_async(self, api_context: dict, chat_request):
															
 
																+        async with request_limiter:
															
 
																+            try:
															
 
																+                event_loop = asyncio.get_running_loop()
															
 
																+                client = Client(api_context['api_key'])
															
 
																+                api_chat_call = partial(
															
 
																+                    client.chat.completions.create,
															
 
																+                    model=api_context['model'],
															
 
																+                    messages=chat_request,
															
 
																+                    temperature=0.0
															
 
																+                )
															
 
																+                response = await event_loop.run_in_executor(None, api_chat_call)
															
 
																+                assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
															
 
																+                assistant_response_json = parse_qa_to_json(assistant_response)
															
 
																+                      
															
 
																+                return assistant_response_json
															
 
																+            except Exception as error:
															
 
																+                print(f"Error during chat request execution: {error}")
															
 
																+                return ""
															
 
																+            
															
 
																 async def main(context):
															
 
																+    chat_service = OctoAIChatService()
															
 
																     try:
															
 
																         logging.info("Starting to generate question/answer pairs.")
															
 
																-        data = await generate_question_batches(context)
															
 
																+        data = await generate_question_batches(chat_service, context)
															
 
																         if not data:
															
 
																             logging.warning("No data generated. Please check the input context or model configuration.")
															
 
																             return
															
--- a/tutorials/chatbot/data_pipelines/generator_utils.py
+++ b/tutorials/chatbot/data_pipelines/generator_utils.py
@@ -8,18 +8,12 @@ from octoai.client import Client
 
																 import asyncio
															
 
																 import magic
															
 
																 from PyPDF2 import PdfReader
															
 
																-from functools import partial
															
 
																 import json
															
 
																 from doc_processor import split_text_into_chunks
															
 
																 import logging
															
 
																-
															
 
																 # Initialize logging
															
 
																 logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																-# Manage rate limits with throttling
															
 
																-rate_limit_threshold = 2000
															
 
																-allowed_concurrent_requests = int(rate_limit_threshold * 0.75)
															
 
																-request_limiter = asyncio.Semaphore(allowed_concurrent_requests)
															
 
																 def read_text_file(file_path):
															
 
																     try:
															
@@ -81,36 +75,13 @@ def parse_qa_to_json(response_string):
 
																     # Convert the list to a JSON string
															
 
																     return json.dumps(qa_list, indent=4)
															
 
																-async def execute_chat_request_async(api_context: dict, chat_request):
															
 
																-    async with request_limiter:
															
 
																-        try:
															
 
																-            event_loop = asyncio.get_running_loop()
															
 
																-            # Prepare the API call
															
 
																-            client = Client(api_context['api_key'])
															
 
																-            api_chat_call = partial(
															
 
																-                client.chat.completions.create,
															
 
																-                model=api_context['model'],
															
 
																-                messages=chat_request,
															
 
																-                temperature=0.0
															
 
																-            )
															
 
																-            # Execute the API call in a separate thread
															
 
																-            response = await event_loop.run_in_executor(None, api_chat_call)
															
 
																-            # Extract and return the assistant's response
															
 
																-            # return next((message['message']['content'] for message in response.choices if message['message']['role'] == 'assistant'), "")
															
 
																-            assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
															
 
																-            assistant_response_json = parse_qa_to_json(assistant_response)
															
 
																-                  
															
 
																-            return assistant_response_json
															
 
																-        except Exception as error:
															
 
																-            print(f"Error during chat request execution: {error}")
															
 
																-            return ""
															
 
																-
															
 
																-async def prepare_and_send_request(api_context: dict, document_content: str, total_questions: int) -> dict:
															
 
																+
															
 
																+async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, total_questions: int) -> dict:
															
 
																     prompt_for_system = api_context['question_prompt_template'].format(total_questions=total_questions, language=api_context["language"])
															
 
																     chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
															
 
																-    return json.loads(await execute_chat_request_async(api_context, chat_request_payload))
															
 
																+    return json.loads(await chat_service.execute_chat_request_async(api_context, chat_request_payload))
															
 
																-async def generate_question_batches(api_context: dict):
															
 
																+async def generate_question_batches(chat_service, api_context: dict):
															
 
																     document_text = read_file_content(api_context)
															
 
																     tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="</s>", padding_side="right")
															
 
																     document_batches = split_text_into_chunks(api_context, document_text, tokenizer)
															
@@ -127,8 +98,7 @@ async def generate_question_batches(api_context: dict):
 
																         #Distribute extra questions across the first few batches
															
 
																         questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
															
 
																         print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
															
 
																-        generation_tasks.append(prepare_and_send_request(api_context, batch_content, questions_in_current_batch))
															
 
																-    # generation_tasks.append(prepare_and_send_request(api_context, document_batches_2[0], total_questions))
															
 
																+        generation_tasks.append(prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch))
															
 
																     question_generation_results = await asyncio.gather(*generation_tasks)