Преглед изворни кода

fix peft.py and remove chatbot folder

Kai Wu пре 10 месеци
родитељ
комит
98ee7488e6

Разлика између датотеке није приказан због своје велике величине
+ 0 - 207
recipes/use_cases/end2end-recipes/chatbot/README.md


BIN
recipes/use_cases/end2end-recipes/chatbot/eval-loss-3runs.png


Разлика између датотеке није приказан због своје велике величине
+ 0 - 129
recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md


+ 0 - 66
recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py

@@ -1,66 +0,0 @@
-import asyncio
-import logging
-from abc import ABC, abstractmethod
-from octoai.client import OctoAI
-from functools import partial
-from openai import OpenAI
-import json
-# Configure logging to include the timestamp, log level, and message
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-# Since OctoAI has different naming for llama models, create this mapping to get huggingface offical model name given OctoAI names.
-MODEL_NAME_MAPPING={"meta-llama-3-70b-instruct":"meta-llama/Meta-Llama-3-70B-Instruct",
-"meta-llama-3-8b-instruct":"meta-llama/Meta-Llama-3-8B-Instruct","llama-2-7b-chat":"meta-llama/Llama-2-7b-chat-hf"
-,"llama-2-70b-chat":"meta-llama/Llama-2-70b-chat-hf"}
-# Manage rate limits with throttling
-rate_limit_threshold = 2000
-allowed_concurrent_requests = int(rate_limit_threshold * 0.75)
-request_limiter = asyncio.Semaphore(allowed_concurrent_requests)
-class ChatService(ABC):
-    @abstractmethod
-    async def execute_chat_request_async(self, api_context: dict, chat_request):
-        pass
-
-# Please implement your own chat service class here.
-# The class should inherit from the ChatService class and implement the execute_chat_request_async method.
-# The following are two example chat service classes that you can use as a reference.
-class OctoAIChatService(ChatService):
-    async def execute_chat_request_async(self, api_context: dict, chat_request):
-        async with request_limiter:
-            try:
-                event_loop = asyncio.get_running_loop()
-                client = OctoAI(api_context['api_key'])
-                api_chat_call = partial(
-                    client.chat.completions.create,
-                    model=api_context['model'],
-                    messages=chat_request,
-                    temperature=0.0
-                )
-                response = await event_loop.run_in_executor(None, api_chat_call)
-                assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
-                return assistant_response
-            except Exception as error:
-                logging.error(f"Error during chat request execution: {error}",exc_info=True)
-                return ""
-# Use the local vllm openai compatible server for generating question/answer pairs to make API call syntax consistent
-# please read for more detail:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html.
-class VllmChatService(ChatService):
-    async def execute_chat_request_async(self, api_context: dict, chat_request):
-        try:
-            event_loop = asyncio.get_running_loop()
-            if api_context["model"] in MODEL_NAME_MAPPING:
-                model_name = MODEL_NAME_MAPPING[api_context['model']]
-            else:
-                model_name = api_context['model']
-            client = OpenAI(api_key=api_context['api_key'], base_url="http://localhost:"+ str(api_context['endpoint'])+"/v1")
-            api_chat_call = partial(
-                client.chat.completions.create,
-                model=model_name,
-                messages=chat_request,
-                temperature=0.0
-            )
-            response = await event_loop.run_in_executor(None, api_chat_call)
-            assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
-            return assistant_response
-        except Exception as error:
-            logging.error(f"Error during chat request execution: {error}",exc_info=True)
-            return ""

+ 0 - 18
recipes/use_cases/end2end-recipes/chatbot/pipelines/config.py

@@ -1,18 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
-
-import yaml
-import os
-
-def load_config(config_path: str = "./config.yaml"):
-    # Read the YAML configuration file
-    with open(config_path, "r") as file:
-        config = yaml.safe_load(file)
-    # Set the API key from the environment variable
-    try:
-        config["api_key"] = os.environ["OCTOAI_API_TOKEN"]
-    except KeyError:
-        print("API token did not found, please set the OCTOAI_API_TOKEN environment variable if using OctoAI, otherwise set api_key to default EMPTY")
-        # local Vllm endpoint did not need API key, so set the API key to "EMPTY" if OCTOAI_API_TOKEN not found
-        config["api_key"] = "EMPTY"
-    return config

+ 0 - 47
recipes/use_cases/end2end-recipes/chatbot/pipelines/doc_processor.py

@@ -1,47 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
-
-# Assuming result_average_token is a constant, use UPPER_CASE for its name to follow Python conventions
-AVERAGE_TOKENS_PER_RESULT = 100
-
-def get_token_limit_for_model(model: str) -> int:
-    """Returns the token limit for a given model."""
-    if model == "llama-2-13b-chat" or model == "llama-2-70b-chat":
-        return 4096
-    else:
-        return 8192
-
-def calculate_num_tokens_for_message(encoded_text) -> int:
-    """Calculates the number of tokens used by a message."""
-    # Added 3 to account for priming with assistant's reply, as per original comment
-    return len(encoded_text) + 3
-
-
-def split_text_into_chunks(context: dict, text: str, tokenizer) -> list[str]:
-    """Splits a long text into substrings based on token length constraints, adjusted for question generation."""
-    # Adjusted approach to calculate max tokens available for text chunks
-    encoded_text = tokenizer(text, return_tensors="pt", padding=True)["input_ids"]
-    encoded_text = encoded_text.squeeze()
-    model_token_limit = get_token_limit_for_model(context["model"])
-
-    tokens_for_questions = calculate_num_tokens_for_message(encoded_text)
-    estimated_tokens_per_question = AVERAGE_TOKENS_PER_RESULT
-    estimated_total_question_tokens = estimated_tokens_per_question * context["total_questions"]
-    # Ensure there's a reasonable minimum chunk size
-    max_tokens_for_text = max(model_token_limit - tokens_for_questions - estimated_total_question_tokens, model_token_limit // 10)
-
-    chunks, current_chunk = [], []
-    print(f"Splitting text into chunks of {max_tokens_for_text} tokens, encoded_text {len(encoded_text)}", flush=True)
-    for token in encoded_text:
-        if len(current_chunk) >= max_tokens_for_text:
-            chunks.append(tokenizer.decode(current_chunk).strip())
-            current_chunk = []
-        else:
-            current_chunk.append(token)
-
-    if current_chunk:
-        chunks.append(tokenizer.decode(current_chunk).strip())
-
-    print(f"Number of chunks in the processed text: {len(chunks)}", flush=True)
-
-    return chunks

+ 0 - 163
recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_chatbot.py

@@ -1,163 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
-from chat_utils import OctoAIChatService, VllmChatService
-import logging
-import evaluate
-import argparse
-from config import load_config
-import asyncio
-import json
-from itertools import chain
-from generator_utils import parse_qa_to_json, generate_LLM_eval
-
-def compute_rouge_score(generated : str, reference: str):
-    rouge_score = evaluate.load('rouge')
-    return rouge_score.compute(
-        predictions=generated,
-        references=reference,
-        use_stemmer=True,
-        use_aggregator=True
-    )
-def compute_bert_score(generated : str, reference: str):
-    bertscore = evaluate.load("bertscore")
-    score = bertscore.compute(
-        predictions=generated,
-        references=reference,
-        lang="en"
-    )
-    f1 = score["f1"]
-    precision = score["precision"]
-    recall = score["recall"]
-    return sum(precision)/len(precision), sum(recall)/len(recall), sum(f1)/len(f1)
-# This function is used to eval the fine-tuned model, given the question, generate the answer.
-async def eval_request(chat_service, api_context: dict, question: str) -> dict:
-    prompt_for_system = api_context['eval_prompt_template'].format(language=api_context["language"])
-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {question}"}]
-    # Getting a list of result, in this case, there should be only one result
-    response_string = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
-    # convert the result string to a dict that contains Question, Answer
-    result_list = parse_qa_to_json(response_string)
-    if not result_list or len(result_list) > 1:
-        print("Error: eval response should be a list of one result dict")
-        return {}
-    result = result_list[0]
-    if "Answer" not in result:
-        print("Error: eval response does not contain answer")
-        return {}
-    # Send back the model generated answer
-
-    return result["Answer"]
-
-async def generate_eval_answer(chat_service, api_context: dict, questions: list):
-    eval_tasks = []
-    for batch_index, question in enumerate(questions):
-        try:
-            result = eval_request(chat_service, api_context, question)
-            eval_tasks.append(result)
-        except Exception as e:
-            print(f"Error during data eval request execution: {e}")
-    print(len(eval_tasks),"eval_tasks")
-    eval_results = await asyncio.gather(*eval_tasks)
-
-    return eval_results
-
-async def main(context):
-    if context["endpoint"]:
-        chat_service = VllmChatService()
-    else:
-        chat_service = OctoAIChatService()
-    try:
-        logging.info("Starting to generate answer given the eval set.")
-        with open(context["eval_json"]) as fp:
-            eval_json = json.load(fp)
-        questions,groud_truth = [],[]
-        for index, item in enumerate(eval_json):
-            questions.append(item["question"])
-            groud_truth.append(item["answer"])
-        generated_answers = await generate_eval_answer(chat_service, context,questions)
-        if not generated_answers:
-            logging.warning("No answers generated. Please check the input context or model configuration.")
-            return
-        logging.info(f"Successfully generated {len(generated_answers)} answers.")
-        judge_list = []
-        for index, item in enumerate(generated_answers):
-            judge_list.append({"Question":questions[index],"Ground_truth":groud_truth[index],"Generated_answer":generated_answers[index]})
-        if context["judge_endpoint"]:
-            # make a copy of the context then change the VLLM endpoint to judge_endpoint
-            context_copy = dict(context)
-            context_copy["endpoint"] = context["judge_endpoint"]
-            context_copy["model"] = "meta-llama/Meta-Llama-3-70B-Instruct"
-            judge_results = await generate_LLM_eval(chat_service, context_copy, judge_list)
-            correct_num = 0
-            for result in judge_results:
-                correct_num += result["Result"] == "YES"
-            LLM_judge_score = correct_num/len(judge_results)
-            print(f"The accuracy of the model is {LLM_judge_score}")
-        rouge_score = compute_rouge_score(generated_answers,groud_truth)
-        print("Rouge_score:",rouge_score)
-        P, R, F1 = compute_bert_score(generated_answers,groud_truth)
-        print(f"BERTScore Precision: {P:.4f}, Recall: {R:.4f}, F1: {F1:.4f}")
-        # Saving the eval result to a log file
-        with open(context["output_log"],"a") as fp:
-            fp.write(f"Eval_result for {context['model']} \n")
-            fp.write(f"Rouge_score: {rouge_score} \n")
-            fp.write(f"BERTScore Precision: {P:.4f}, Recall: {R:.4f}, F1: {F1:.4f} \n")
-            if context["judge_endpoint"]:
-                fp.write(f"LLM_judge_score: {LLM_judge_score} \n")
-            fp.write(f"QA details: \n")
-            for item in judge_list:
-                fp.write(f"question: {item['Question']} \n")
-                fp.write(f"generated_answers: {item['Generated_answer']} \n")
-                fp.write(f"groud_truth: {item['Ground_truth']} \n")
-                fp.write("\n")
-        logging.info(f"Eval successfully, the eval result is saved to {context['output_log']}.")
-    except Exception as e:
-        logging.error(f"An unexpected error occurred during the process: {e}",exc_info=True)
-
-def parse_arguments():
-    # Define command line arguments for the script
-    parser = argparse.ArgumentParser(
-        description="Generate question/answer pairs from documentation."
-    )
-    parser.add_argument(
-        "-m", "--model",
-        default="chatbot",
-        help="Select the model to use for evaluation, this maybe a LoRA adapter."
-    )
-    parser.add_argument(
-        "-c", "--config_path",
-        default="eval_config.yaml",
-        help="Set the configuration file path that has system prompt along with language, evalset path."
-    )
-    parser.add_argument(
-        "-v", "--vllm_endpoint",
-        default=None,
-        type=int,
-        help="If a port is specified, then use local vllm endpoint for evaluations."
-    )
-    parser.add_argument(
-        "-j", "--judge_endpoint",
-        default=None,
-        type=int,
-        help="If a port is specified, then use local vllm endpoint as judge LLM."
-    )
-    parser.add_argument(
-        "-o", "--output_log",
-        default="eval_result.log",
-        help="save the eval result to a log file. Default is eval_result.log"
-    )
-    return parser.parse_args()
-
-if __name__ == "__main__":
-    logging.info("Initializing the process and loading configuration...")
-    args = parse_arguments()
-    context = load_config(args.config_path)
-    context["model"] = args.model
-    context["endpoint"] = args.vllm_endpoint
-    context["judge_endpoint"] = args.judge_endpoint
-    context["output_log"] = args.output_log
-    if context["endpoint"]:
-        logging.info(f"Use local vllm service for eval at port: '{args.vllm_endpoint}'.")
-    if context["judge_endpoint"]:
-        logging.info(f"Use local vllm service for judge at port: '{args.judge_endpoint}'.")
-    asyncio.run(main(context))

+ 0 - 23
recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_config.yaml

@@ -1,23 +0,0 @@
-eval_prompt_template: >
-  You are a AI assistant that skilled in answering questions related to Llama language models,
-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
-  Below is a question from a llama user, think step by step and then answer it in {language}, make the answer as concise as possible, it should be at most 100 words.
-  Return the result with the template:
-  [
-    {{
-      "Question": "The question user asked to you"
-      "Answer": "Your answer to the question"
-  }}
-  ]
-judge_prompt_template: >
-  You are provided with a question, a teacher's answer and a student's answer. Given that question, you need to score the how good the student answer is compare to
-  the teacher's answer. If the student's answer is correct based on the teacher's answer, then return YES. If the answer is not faithful, then return NO
-  and explain which part of the student's answer is not faithful in the Reason section.
-  Return the result in json format with the template:
-    {{
-      "Reason": "your reason here.",
-      "Result": "YES or NO."
-    }}
-eval_json: "./evalset.json"
-
-language: "English"

Разлика између датотеке није приказан због своје велике величине
+ 0 - 178
recipes/use_cases/end2end-recipes/chatbot/pipelines/evalset.json


+ 0 - 90
recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py

@@ -1,90 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
-
-import argparse
-import asyncio
-import json
-from config import load_config
-from generator_utils import generate_question_batches, generate_data_curation
-from chat_utils import OctoAIChatService, VllmChatService
-import logging
-import aiofiles  # Ensure aiofiles is installed for async file operations
-
-
-# Configure logging to include the timestamp, log level, and message
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-
-async def main(context):
-    if context["endpoint"]:
-        chat_service = VllmChatService()
-    else:
-        chat_service = OctoAIChatService()
-    try:
-        logging.info("Starting to generate question/answer pairs.")
-        # Generate question/answer pairs as list
-        data = await generate_question_batches(chat_service, context)
-        if not data:
-            logging.warning("No data generated. Please check the input context or model configuration.")
-            return
-        logging.info(f"Successfully generated {len(data)} question/answer pairs.")
-        if context["use_curation"]:
-            logging.info("Starting to do self-curation using LLM.")
-            data = await generate_data_curation(chat_service, context,data)
-            logging.info(f"Only {len(data)} question/answer pairs pass the self-curation")
-        async with aiofiles.open(context['output_path'], "w") as output_file:
-             await output_file.write(json.dumps(data, indent=4))
-        logging.info(f"Data successfully written to {context['output_path']}. Process completed.")
-    except Exception as e:
-        logging.error(f"An unexpected error occurred during the process: {e}",exc_info=True)
-
-def parse_arguments():
-    # Define command line arguments for the script
-    parser = argparse.ArgumentParser(
-        description="Generate question/answer pairs from documentation."
-    )
-    parser.add_argument(
-        "-t", "--total_questions",
-        type=int,
-        default=100,
-        help="Specify the total number of question/answer pairs to generate."
-    )
-    parser.add_argument(
-        "-m", "--model",
-        choices=["meta-llama-3-70b-instruct","meta-llama-3-8b-instruct","llama-2-13b-chat", "llama-2-70b-chat"],
-        default="meta-llama-3-70b-instruct",
-        help="Select the model to use for generation."
-    )
-    parser.add_argument(
-        "-c", "--config_path",
-        default="./generation_config.yaml",
-        help="Set the configuration file path that has system prompt along with language, dataset path and number of questions."
-    )
-    parser.add_argument(
-        "-v", "--vllm_endpoint",
-        default=None,
-        type=int,
-        help="If a port is specified, then use local vllm endpoint for generating question/answer pairs."
-    )
-    parser.add_argument(
-        "-o", "--output_path",
-        default="./data.json",
-        help="set the output path for the generated QA pairs. Default is data.json"
-    )
-    return parser.parse_args()
-
-if __name__ == "__main__":
-    logging.info("Initializing the process and loading configuration...")
-    args = parse_arguments()
-
-    context = load_config(args.config_path)
-    context["total_questions"] = args.total_questions
-    context["model"] = args.model
-    context["endpoint"] = args.vllm_endpoint
-    # If curation prompt is not empty, then use self-curation
-    context["use_curation"] = len(context["curation_prompt_template"]) > 0
-    context["output_path"] = args.output_path
-    logging.info(f"Configuration loaded. Generating {args.total_questions} question/answer pairs using model '{args.model}'.")
-    if context["endpoint"]:
-        logging.info(f"Use local vllm service at port: '{args.vllm_endpoint}'.")
-    asyncio.run(main(context))

+ 0 - 50
recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml

@@ -1,50 +0,0 @@
-question_prompt_template: >
-  You are a language model skilled in creating quiz questions.
-  You will be provided with a document,
-  read it and please generate question and answer pairs that are most likely be asked by a user of Llama language models,
-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
-  then extract the context that is related to the question and answer, preferably using the sentences from original text,
-  please make sure you follow those rules:
-  1. Generate {num_questions} question answer pairs, you can generate less answer if there is nothing related to model, training, fine-tuning and evaluation details of Llama language models, .
-  2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
-  3. Generate in {language}.
-  4. The questions can be answered based *solely* on the given passage.
-  5. Avoid asking questions with similar meaning.
-  6. Make the answer as concise as possible, it should be at most 100 words.
-  7. Provide relevant links from the document to support the answer.
-  8. Never use any abbreviation.
-  9. Return the result in json format with the template:
-    [
-      {{
-        "Question": "your question A.",
-        "Answer": "your answer to question A."
-        "Context": "the context for question A"
-      }},
-      {{
-        "Question": "your question B.",
-        "Answer": "your answer to question B."
-        "Context": "the context for question B"
-      }}
-    ]
-
-curation_prompt_template: >
-  Below is a question and answer pair (QA pair) and its related context about Llama language models,
-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2.
-  Given the context, evaluate whether or not this qusestion and answer pair is related to Llama language models, including model, training, fine-tuning and evaluation details,
-  and whether this question and answer is relevant to the context.
-  Note that the answer in the QA pair can be the same or similar as the context, as repetition of context is allowed.
-  Respond with only a single JSON blob with an "Reason" field that is a short (less than 100 word)
-  explanation of your answer and an "Result" field which is YES or NO.
-  Only answer "YES", if the question and answer pair is based on the context and provides relevant information about Llama language models.
-  Only generate the answer in {language}.
-  Return the result in json format with the template:
-    {{
-      "Reason": "your reason here.",
-      "Result": "YES or No."
-    }},
-
-data_dir: "./data"
-
-language: "English"
-
-num_questions: 2

+ 0 - 267
recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py

@@ -1,267 +0,0 @@
-# Copyright (c) Meta Platforms, Inc. and affiliates.
-# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
-
-import os
-import re
-import string
-from transformers import  AutoTokenizer
-import asyncio
-import magic
-from PyPDF2 import PdfReader
-import json
-from doc_processor import split_text_into_chunks
-import logging
-import json
-# Initialize logging
-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
-
-def read_text_file(file_path):
-    try:
-        with open(file_path, 'r') as f:
-            text = f.read().strip() + ' '
-            if len(text) == 0:
-                print("File is empty ",file_path)
-            return text
-    except Exception as e:
-        logging.error(f"Error reading text file {file_path}: {e}")
-    return ''
-
-def read_pdf_file(file_path):
-    try:
-        with open(file_path, 'rb') as f:
-            pdf_reader = PdfReader(f)
-            num_pages = len(pdf_reader.pages)
-            file_text = [pdf_reader.pages[page_num].extract_text().strip() + ' ' for page_num in range(num_pages)]
-            text = ''.join(file_text)
-            if len(text) == 0:
-                print("File is empty ",file_path)
-            return ''.join(file_text)
-    except Exception as e:
-        logging.error(f"Error reading PDF file {file_path}: {e}")
-    return ''
-
-def read_json_file(file_path):
-    try:
-        with open(file_path, 'r') as f:
-            data = json.load(f)
-            # Assuming each item in the list has a 'question' and 'answer' key
-            # Concatenating question and answer pairs with a space in between and accumulating them into a single string
-            file_text = ' '.join([item['question'].strip() + ' ' + item['answer'].strip() + ' ' for item in data])
-            if len(file_text) == 0:
-                print("File is empty ",file_path)
-            return file_text
-    except Exception as e:
-        logging.error(f"Error reading JSON file {file_path}: {e}")
-    return ''
-
-
-def process_file(file_path):
-    print("starting to process file: ", file_path)
-    file_type = magic.from_file(file_path, mime=True)
-    if file_type in ['text/plain', 'text/markdown', 'JSON']:
-        return read_text_file(file_path)
-    elif file_type == 'application/pdf':
-        return read_pdf_file(file_path)
-    else:
-        logging.warning(f"Unsupported file type {file_type} for file {file_path}")
-        return ''
-def remove_non_printable(s):
-    printable = set(string.printable)
-    return ''.join(filter(lambda x: x in printable, s))
-def read_file_content(context):
-    file_strings = []
-
-    for root, _, files in os.walk(context['data_dir']):
-        for file in files:
-            file_path = os.path.join(root, file)
-            file_text = process_file(file_path)
-            if file_text:
-                file_strings.append(file_text)
-    text = '\n'.join(file_strings)
-    text = remove_non_printable(text)
-    with open(context['data_dir'] + '/' + 'all_text.txt', 'w') as f:
-        f.write(text)
-    return remove_non_printable(text)
-# clean the text by removing all parts that did not contain any alphanumeric characters
-def clean(s):
-        result = []
-        for item in s.split('"'):
-            if any(c.isalnum() for c in item):
-                result.append(item)
-        return " ".join(result)
-# given a response string, return a string that can be saved as json.
-def parse_qac_to_json(response_string):
-    split_lines = response_string.split("\n")
-    start,mid,end = None,None,None
-    # must use set to avoid duplicate question/answer pairs due to async function calls
-    qa_set = set()
-    for i in range(len(split_lines)):
-        line = split_lines[i]
-        # starting to find "Question"
-        if not start:
-            # Once found, set start to this line number
-            if '"Question":' in line:
-                start = i
-        else:
-            # "Question" has been found, find "Answer", once found, set end to this line number
-            if '"Answer":' in line:
-                mid = i
-            elif '"Context":' in line:
-                end = i
-            # found Question means we have reached the end of the question, so add it to qa_list
-            elif '"Question":' in line:
-                question = " ".join(split_lines[start:mid]).split('"Question":')[1]
-                answer = " ".join(split_lines[mid:end]).split('"Answer":')[1]
-                context = " ".join(split_lines[end:i]).split('"Context":')[1]
-                start,mid,end = i,None,None
-                qa_set.add((clean(question), clean(answer),clean(context)))
-        # adding last question back to qa_list
-    if start and mid and end:
-        question = " ".join(split_lines[start:mid]).split('"Question":')[1]
-        answer = " ".join(split_lines[mid:end]).split('"Answer":')[1]
-        context = " ".join(split_lines[end:]).split('"Context":')[1]
-        start,mid,end = i,None,None
-        qa_set.add((clean(question), clean(answer),clean(context)))
-    qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
-
-    return qa_list
-
-def parse_qa_to_json(response_string):
-    split_lines = response_string.split("\n")
-    start,end = None,None
-    # must use set to avoid duplicate question/answer pairs due to async function calls
-    qa_set = set()
-    for i in range(len(split_lines)):
-        line = split_lines[i]
-        # starting to find "Question"
-        if not start:
-            # Once found, set start to this line number
-            if '"Question":' in line:
-                start = i
-        else:
-            # "Question" has been found, find "Answer", once found, set end to this line number
-            if '"Answer":' in line:
-                end = i
-            # found Question means we have reached the end of the question, so add it to qa_list
-            elif '"Question":' in line:
-                question = " ".join(split_lines[start:end]).split('"Question":')[1]
-                answer = " ".join(split_lines[end:i]).split('"Answer":')[1]
-                start,end = i,None
-                qa_set.add((clean(question), clean(answer)))
-        # adding last question back to qa_list
-    if start and end:
-        question = " ".join(split_lines[start:end]).split('"Question":')[1]
-        answer = " ".join(split_lines[end:]).split('"Answer":')[1]
-        qa_set.add((clean(question), clean(answer)))
-    qa_list = [{"Question": q, "Answer":a} for q,a in qa_set]
-
-    return qa_list
-
-async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
-    if num_questions == 0:
-        logging.info(f"Error: num_questions is 0")
-        return {}
-    prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
-    # parse the result string to a list of dict that has Question, Answer, Context
-    return await chat_service.execute_chat_request_async(api_context, chat_request_payload)
-# This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
-async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
-    prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {document_content['Question']} \n Answer: {document_content['Answer']}\n Context: {document_content['Context']} "}]
-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
-    if not result:
-        return {}
-    # no parsing needed, just return the loads the result as a dict
-    result = json.loads(result)
-    if "Result" not in result:
-        print("Error: eval response does not contain answer")
-        print(document_content,result)
-        return {}
-    # Send back the original QA pair is the model eval result is YES
-    if result["Result"] == "YES":
-        return document_content
-    else:
-        print(document_content,result)
-    return {}
-
-
-async def generate_question_batches(chat_service, api_context: dict):
-    document_text = read_file_content(api_context)
-    if len(document_text)== 0:
-        logging.error(f"Error reading files, document_text is empty")
-    if api_context["model"] in ["meta-llama-3-70b-instruct","meta-llama-3-8b-instruct"]:
-        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", pad_token="</s>", padding_side="right")
-    else:
-        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="</s>", padding_side="right")
-    document_batches = split_text_into_chunks(api_context, document_text, tokenizer)
-
-    total_questions = api_context["total_questions"]
-    batches_count = len(document_batches)
-    # each batch should have at least 1 question
-    base_questions_per_batch = max(total_questions // batches_count,1)
-    extra_questions = total_questions % batches_count
-
-    print(f"Questions per batch: {base_questions_per_batch} (+1 for the first {extra_questions} batches), Total questions: {total_questions}, Batches: {batches_count}")
-    generation_tasks = []
-    for batch_index, batch_content in enumerate(document_batches):
-        print(f"len of batch_content: {len(batch_content)}, batch_index: {batch_index}")
-        #Distribute extra questions across the first few batches
-        questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
-        print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
-        try:
-            task = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
-            generation_tasks.append(task)
-        except Exception as e:
-            print(f"Error during chat request execution: {e}")
-
-    question_generation_results = await asyncio.gather(*generation_tasks)
-    final_result = []
-    for result in question_generation_results:
-        parsed_json = parse_qac_to_json(result)
-        final_result.extend(parsed_json)
-    return final_result
-
-async def generate_data_curation(chat_service, api_context: dict, evaluation_list: list):
-    eval_tasks = []
-    for batch_index, batch_content in enumerate(evaluation_list):
-        try:
-            result = data_curation_request(chat_service, api_context, batch_content)
-            eval_tasks.append(result)
-        except Exception as e:
-            print(f"Error during data eval request execution: {e}")
-
-    eval_results = await asyncio.gather(*eval_tasks)
-    curated_data = []
-    for item in eval_results:
-        # if the item is not empty, add it to the curated data list
-        if item:
-            curated_data.append(item)
-    return curated_data
-
-# This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
-async def LLM_judge_request(chat_service, api_context: dict, document_content: dict) -> dict:
-    prompt_for_system = api_context['judge_prompt_template'].format(language=api_context["language"])
-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {document_content['Question']} \n Teacher's Answer: {document_content['Ground_truth']}\n Student's Answer: {document_content['Generated_answer']} "}]
-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
-    if not result:
-        return {}
-    # no parsing needed, just return the loads the result as a dict
-    result = json.loads(result)
-    if "Result" not in result:
-        print("Error: eval response does not contain answer")
-        print(document_content,result)
-        return {}
-    return result
-
-async def generate_LLM_eval(chat_service, api_context: dict, judge_list: list):
-    eval_tasks = []
-    for batch_index, batch_content in enumerate(judge_list):
-        try:
-            result = LLM_judge_request(chat_service, api_context, batch_content)
-            eval_tasks.append(result)
-        except Exception as e:
-            print(f"Error during data eval request execution: {e}")
-
-    judge_results = await asyncio.gather(*eval_tasks)
-    return judge_results

BIN
recipes/use_cases/end2end-recipes/chatbot/poor-test-1.png


BIN
recipes/use_cases/end2end-recipes/chatbot/poor-test-2.png


BIN
recipes/use_cases/end2end-recipes/chatbot/train-loss-3runs.png


+ 173 - 0
recipes/use_cases/end2end-recipes/raft/format.py

@@ -0,0 +1,173 @@
+from abc import ABC, abstractmethod
+import argparse
+from datasets import Dataset, load_dataset
+from typing import Dict, Literal, Any, get_args
+
+"""
+This file allows to convert raw HuggingFace Datasets into files suitable to fine tune completion and chat models.
+"""
+
+OutputDatasetType = Literal["parquet", "jsonl"]
+outputDatasetTypes = list(get_args(OutputDatasetType))
+
+InputDatasetType = Literal["arrow", "jsonl"]
+inputDatasetTypes = list(get_args(InputDatasetType))
+
+DatasetFormat = Literal["hf", "completion", "chat"]
+datasetFormats = list(get_args(DatasetFormat))
+
+def get_args() -> argparse.Namespace:
+    """
+    Parses and returns the arguments specified by the user's command
+    """
+    parser = argparse.ArgumentParser()
+
+    parser.add_argument("--input", type=str, required=True, help="Input HuggingFace dataset file")
+    parser.add_argument("--input-type", type=str, default="arrow", help="Format of the input dataset. Defaults to arrow.", choices=inputDatasetTypes)
+    parser.add_argument("--output", type=str, required=True, help="Output file")
+    parser.add_argument("--output-format", type=str, required=True, help="Format to convert the dataset to", choices=datasetFormats)
+    parser.add_argument("--output-type", type=str, default="jsonl", help="Type to export the dataset to. Defaults to jsonl.", choices=outputDatasetTypes)
+    parser.add_argument("--output-chat-system-prompt", type=str, help="The system prompt to use when the output format is chat")
+
+    args = parser.parse_args()
+    return args
+
+class DatasetFormatter(ABC):
+    """
+    Base class for dataset formatters. Formatters rename columns, remove and add 
+    columns to match the expected target format structure. HF, Chat or Completion models file formats.
+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
+    """
+    @abstractmethod
+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
+        pass
+
+class DatasetExporter(ABC):
+    """
+    Base class for dataset exporters. Exporters export dataset to different file types, JSONL, Parquet, ...
+    """
+    @abstractmethod
+    def export(self, ds: Dataset, output_path: str):
+        pass
+
+class DatasetConverter():
+    """
+    Entry point class. It resolves which DatasetFormatter and which DatasetExporter to use and runs them.
+    """
+    formats: Dict[DatasetFormat, DatasetFormatter]
+    exporters: Dict[OutputDatasetType, Any]
+
+    def __init__(self) -> None:
+        self.formats = {
+            "hf": HuggingFaceDatasetFormatter(),
+            "completion": OpenAiCompletionDatasetFormatter(),
+            "chat": OpenAiChatDatasetFormatter()
+        }
+        self.exporters = {
+            "parquet": ParquetDatasetExporter(),
+            "jsonl": JsonlDatasetExporter()
+        }
+
+    def convert(self, ds: Dataset, format: DatasetFormat, output_path: str, output_type: OutputDatasetType, params: Dict[str, str]):
+        if not format in self.formats:
+            raise Exception(f"Output Format {format} is not supported, pleased select one of {self.formats.keys()}")
+        
+        if not output_type in self.exporters:
+            raise Exception(f"Output Type {output_type} is not supported, pleased select one of {self.exporters.keys()}")
+
+        formatter = self.formats[format]
+        newds = formatter.format(ds, params)
+        exporter = self.exporters[output_type]
+        exporter.export(newds, output_path)
+
+class HuggingFaceDatasetFormatter(DatasetFormatter):
+    """
+    Returns the HuggingFace Dataset as is
+    """
+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
+        return ds
+
+def _remove_all_columns_but(ds: Dataset, keep_columns) -> Dataset:
+    """
+    HF Dataset doesn't have a way to copy only specific columns of a Dataset so this help
+    removes all columns but the ones specified.
+    """
+    remove_columns = list(ds.column_names)
+    for keep in keep_columns:
+        remove_columns.remove(keep)
+    ds = ds.remove_columns(remove_columns)
+    return ds
+
+class OpenAiCompletionDatasetFormatter(DatasetFormatter):
+    """
+    Returns the Dataset in the OpenAI Completion Fine-tuning file format with two fields "prompt" and "completion".
+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
+    """
+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
+        newds = ds.rename_columns({'question': 'prompt', 'cot_answer': 'completion'})
+        return _remove_all_columns_but(newds, ['prompt', 'completion'])
+
+class OpenAiChatDatasetFormatter(OpenAiCompletionDatasetFormatter):
+    """
+    Returns the Dataset in the OpenAI Chat Fine-tuning file format with one field "messages".
+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
+    """
+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
+        newds = super().format(ds, params)
+
+        def format_messages(row):
+            messages = []
+            if 'system_prompt' in params:
+                system_prompt = params['system_prompt']
+                messages.append({ "role": "system", "content": system_prompt})
+            messages.extend([{ "role": "user", "content": row['prompt']}, { "role": "assistant", "content": row['completion']}])
+            chat_row = {"messages": messages}
+            return chat_row
+
+        newds = newds.map(format_messages)
+        return _remove_all_columns_but(newds, ['messages'])
+
+def append_extension(path: str, extension: str) -> str:
+    suffix = "." + extension
+    if not path.endswith(suffix):
+        path = path + suffix
+    return path
+
+
+class JsonlDatasetExporter(DatasetExporter):
+    """
+    Exports the Dataset to a JSONL file
+    """
+
+    def export(self, ds: Dataset, output_path: str):
+        ds.to_json(append_extension(output_path, "jsonl"))
+
+
+class ParquetDatasetExporter(DatasetExporter):
+    """
+    Exports the Dataset to a Parquet file
+    """
+
+    def export(self, ds: Dataset, output_path: str):
+        ds.to_parquet(append_extension(output_path, "parquet"))
+
+
+def main():
+    """
+    When raft.py is executed from the command line.
+    """
+    args = get_args()
+    ds = load_dataset(args.input_type, data_files={"train": args.input})['train']
+    formatter = DatasetConverter()
+
+    if args.output_chat_system_prompt and args.output_format != "chat":
+        raise Exception("Parameter --output-chat-system-prompt can only be used with --output-format chat")
+
+    format_params = {}
+    if args.output_chat_system_prompt:
+        format_params['system_prompt'] = args.output_chat_system_prompt
+
+    formatter.convert(ds=ds, format=args.output_format, output_path=args.output, output_type=args.output_type, params=format_params)
+
+if __name__ == "__main__":
+    main()

+ 1 - 1
recipes/use_cases/end2end-recipes/raft/raft.py

@@ -87,7 +87,7 @@ if __name__ == "__main__":
         api_config["api_key"] = os.environ["API_KEY"]
         api_config["api_key"] = os.environ["API_KEY"]
     logging.info(f"Configuration loaded. Generating {args.questions_per_chunk} question per chunk using model '{args.model}'.")
     logging.info(f"Configuration loaded. Generating {args.questions_per_chunk} question per chunk using model '{args.model}'.")
     logging.info(f"Chunk size: {args.chunk_size}.")
     logging.info(f"Chunk size: {args.chunk_size}.")
-    logging.info(f"num_distract_docs: {api_config['num_distract_docs']}, orcale_p: {api_config['orcale_p']}")
+    logging.info(f"num_distract_docs: {api_config['num_distract_docs']}, oracle_p: {api_config['oracle_p']}")
     logging.info(f"Will use endpoint_url: {args.endpoint_url}.")
     logging.info(f"Will use endpoint_url: {args.endpoint_url}.")
     logging.info(f"Output will be written to {args.output}.")
     logging.info(f"Output will be written to {args.output}.")
     main(api_config)
     main(api_config)

+ 1 - 1
recipes/use_cases/end2end-recipes/raft/raft.yaml

@@ -48,4 +48,4 @@ questions_per_chunk: 5
 
 
 num_distract_docs: 5 # number of distracting documents to add to each chunk
 num_distract_docs: 5 # number of distracting documents to add to each chunk
 
 
-orcale_p: 0.8 # probability of related documents to be added to each chunk
+oracle_p: 0.8 # probability of related documents to be added to each chunk

+ 1 - 1
src/llama_recipes/configs/peft.py

@@ -8,7 +8,7 @@ from typing import List
 class lora_config:
 class lora_config:
      r: int=8
      r: int=8
      lora_alpha: int=32
      lora_alpha: int=32
-     target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"])
+     target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])
      bias= "none"
      bias= "none"
      task_type: str= "CAUSAL_LM"
      task_type: str= "CAUSAL_LM"
      lora_dropout: float=0.05
      lora_dropout: float=0.05