1 год назад · 98ee7488e6
--- a/recipes/use_cases/end2end-recipes/chatbot/README.md
+++ b/recipes/use_cases/end2end-recipes/chatbot/README.md
--- a/recipes/use_cases/end2end-recipes/chatbot/eval-loss-3runs.png
+++ b/recipes/use_cases/end2end-recipes/chatbot/eval-loss-3runs.png
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/README.md
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/chat_utils.py
@@ -1,66 +0,0 @@
 
																-import asyncio
															
 
																-import logging
															
 
																-from abc import ABC, abstractmethod
															
 
																-from octoai.client import OctoAI
															
 
																-from functools import partial
															
 
																-from openai import OpenAI
															
 
																-import json
															
 
																-# Configure logging to include the timestamp, log level, and message
															
 
																-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																-# Since OctoAI has different naming for llama models, create this mapping to get huggingface offical model name given OctoAI names.
															
 
																-MODEL_NAME_MAPPING={"meta-llama-3-70b-instruct":"meta-llama/Meta-Llama-3-70B-Instruct",
															
 
																-"meta-llama-3-8b-instruct":"meta-llama/Meta-Llama-3-8B-Instruct","llama-2-7b-chat":"meta-llama/Llama-2-7b-chat-hf"
															
 
																-,"llama-2-70b-chat":"meta-llama/Llama-2-70b-chat-hf"}
															
 
																-# Manage rate limits with throttling
															
 
																-rate_limit_threshold = 2000
															
 
																-allowed_concurrent_requests = int(rate_limit_threshold * 0.75)
															
 
																-request_limiter = asyncio.Semaphore(allowed_concurrent_requests)
															
 
																-class ChatService(ABC):
															
 
																-    @abstractmethod
															
 
																-    async def execute_chat_request_async(self, api_context: dict, chat_request):
															
 
																-        pass
															
 
																-
															
 
																-# Please implement your own chat service class here.
															
 
																-# The class should inherit from the ChatService class and implement the execute_chat_request_async method.
															
 
																-# The following are two example chat service classes that you can use as a reference.
															
 
																-class OctoAIChatService(ChatService):
															
 
																-    async def execute_chat_request_async(self, api_context: dict, chat_request):
															
 
																-        async with request_limiter:
															
 
																-            try:
															
 
																-                event_loop = asyncio.get_running_loop()
															
 
																-                client = OctoAI(api_context['api_key'])
															
 
																-                api_chat_call = partial(
															
 
																-                    client.chat.completions.create,
															
 
																-                    model=api_context['model'],
															
 
																-                    messages=chat_request,
															
 
																-                    temperature=0.0
															
 
																-                )
															
 
																-                response = await event_loop.run_in_executor(None, api_chat_call)
															
 
																-                assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
															
 
																-                return assistant_response
															
 
																-            except Exception as error:
															
 
																-                logging.error(f"Error during chat request execution: {error}",exc_info=True)
															
 
																-                return ""
															
 
																-# Use the local vllm openai compatible server for generating question/answer pairs to make API call syntax consistent
															
 
																-# please read for more detail:https://docs.vllm.ai/en/latest/serving/openai_compatible_server.html.
															
 
																-class VllmChatService(ChatService):
															
 
																-    async def execute_chat_request_async(self, api_context: dict, chat_request):
															
 
																-        try:
															
 
																-            event_loop = asyncio.get_running_loop()
															
 
																-            if api_context["model"] in MODEL_NAME_MAPPING:
															
 
																-                model_name = MODEL_NAME_MAPPING[api_context['model']]
															
 
																-            else:
															
 
																-                model_name = api_context['model']
															
 
																-            client = OpenAI(api_key=api_context['api_key'], base_url="http://localhost:"+ str(api_context['endpoint'])+"/v1")
															
 
																-            api_chat_call = partial(
															
 
																-                client.chat.completions.create,
															
 
																-                model=model_name,
															
 
																-                messages=chat_request,
															
 
																-                temperature=0.0
															
 
																-            )
															
 
																-            response = await event_loop.run_in_executor(None, api_chat_call)
															
 
																-            assistant_response = next((choice.message.content for choice in response.choices if choice.message.role == 'assistant'), "")
															
 
																-            return assistant_response
															
 
																-        except Exception as error:
															
 
																-            logging.error(f"Error during chat request execution: {error}",exc_info=True)
															
 
																-            return ""
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/config.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/config.py
@@ -1,18 +0,0 @@
 
																-# Copyright (c) Meta Platforms, Inc. and affiliates.
															
 
																-# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
															
 
																-
															
 
																-import yaml
															
 
																-import os
															
 
																-
															
 
																-def load_config(config_path: str = "./config.yaml"):
															
 
																-    # Read the YAML configuration file
															
 
																-    with open(config_path, "r") as file:
															
 
																-        config = yaml.safe_load(file)
															
 
																-    # Set the API key from the environment variable
															
 
																-    try:
															
 
																-        config["api_key"] = os.environ["OCTOAI_API_TOKEN"]
															
 
																-    except KeyError:
															
 
																-        print("API token did not found, please set the OCTOAI_API_TOKEN environment variable if using OctoAI, otherwise set api_key to default EMPTY")
															
 
																-        # local Vllm endpoint did not need API key, so set the API key to "EMPTY" if OCTOAI_API_TOKEN not found
															
 
																-        config["api_key"] = "EMPTY"
															
 
																-    return config
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/doc_processor.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/doc_processor.py
@@ -1,47 +0,0 @@
 
																-# Copyright (c) Meta Platforms, Inc. and affiliates.
															
 
																-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
															
 
																-
															
 
																-# Assuming result_average_token is a constant, use UPPER_CASE for its name to follow Python conventions
															
 
																-AVERAGE_TOKENS_PER_RESULT = 100
															
 
																-
															
 
																-def get_token_limit_for_model(model: str) -> int:
															
 
																-    """Returns the token limit for a given model."""
															
 
																-    if model == "llama-2-13b-chat" or model == "llama-2-70b-chat":
															
 
																-        return 4096
															
 
																-    else:
															
 
																-        return 8192
															
 
																-
															
 
																-def calculate_num_tokens_for_message(encoded_text) -> int:
															
 
																-    """Calculates the number of tokens used by a message."""
															
 
																-    # Added 3 to account for priming with assistant's reply, as per original comment
															
 
																-    return len(encoded_text) + 3
															
 
																-
															
 
																-
															
 
																-def split_text_into_chunks(context: dict, text: str, tokenizer) -> list[str]:
															
 
																-    """Splits a long text into substrings based on token length constraints, adjusted for question generation."""
															
 
																-    # Adjusted approach to calculate max tokens available for text chunks
															
 
																-    encoded_text = tokenizer(text, return_tensors="pt", padding=True)["input_ids"]
															
 
																-    encoded_text = encoded_text.squeeze()
															
 
																-    model_token_limit = get_token_limit_for_model(context["model"])
															
 
																-
															
 
																-    tokens_for_questions = calculate_num_tokens_for_message(encoded_text)
															
 
																-    estimated_tokens_per_question = AVERAGE_TOKENS_PER_RESULT
															
 
																-    estimated_total_question_tokens = estimated_tokens_per_question * context["total_questions"]
															
 
																-    # Ensure there's a reasonable minimum chunk size
															
 
																-    max_tokens_for_text = max(model_token_limit - tokens_for_questions - estimated_total_question_tokens, model_token_limit // 10)
															
 
																-
															
 
																-    chunks, current_chunk = [], []
															
 
																-    print(f"Splitting text into chunks of {max_tokens_for_text} tokens, encoded_text {len(encoded_text)}", flush=True)
															
 
																-    for token in encoded_text:
															
 
																-        if len(current_chunk) >= max_tokens_for_text:
															
 
																-            chunks.append(tokenizer.decode(current_chunk).strip())
															
 
																-            current_chunk = []
															
 
																-        else:
															
 
																-            current_chunk.append(token)
															
 
																-
															
 
																-    if current_chunk:
															
 
																-        chunks.append(tokenizer.decode(current_chunk).strip())
															
 
																-
															
 
																-    print(f"Number of chunks in the processed text: {len(chunks)}", flush=True)
															
 
																-
															
 
																-    return chunks
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_chatbot.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_chatbot.py
@@ -1,163 +0,0 @@
 
																-# Copyright (c) Meta Platforms, Inc. and affiliates.
															
 
																-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
															
 
																-from chat_utils import OctoAIChatService, VllmChatService
															
 
																-import logging
															
 
																-import evaluate
															
 
																-import argparse
															
 
																-from config import load_config
															
 
																-import asyncio
															
 
																-import json
															
 
																-from itertools import chain
															
 
																-from generator_utils import parse_qa_to_json, generate_LLM_eval
															
 
																-
															
 
																-def compute_rouge_score(generated : str, reference: str):
															
 
																-    rouge_score = evaluate.load('rouge')
															
 
																-    return rouge_score.compute(
															
 
																-        predictions=generated,
															
 
																-        references=reference,
															
 
																-        use_stemmer=True,
															
 
																-        use_aggregator=True
															
 
																-    )
															
 
																-def compute_bert_score(generated : str, reference: str):
															
 
																-    bertscore = evaluate.load("bertscore")
															
 
																-    score = bertscore.compute(
															
 
																-        predictions=generated,
															
 
																-        references=reference,
															
 
																-        lang="en"
															
 
																-    )
															
 
																-    f1 = score["f1"]
															
 
																-    precision = score["precision"]
															
 
																-    recall = score["recall"]
															
 
																-    return sum(precision)/len(precision), sum(recall)/len(recall), sum(f1)/len(f1)
															
 
																-# This function is used to eval the fine-tuned model, given the question, generate the answer.
															
 
																-async def eval_request(chat_service, api_context: dict, question: str) -> dict:
															
 
																-    prompt_for_system = api_context['eval_prompt_template'].format(language=api_context["language"])
															
 
																-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {question}"}]
															
 
																-    # Getting a list of result, in this case, there should be only one result
															
 
																-    response_string = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
															
 
																-    # convert the result string to a dict that contains Question, Answer
															
 
																-    result_list = parse_qa_to_json(response_string)
															
 
																-    if not result_list or len(result_list) > 1:
															
 
																-        print("Error: eval response should be a list of one result dict")
															
 
																-        return {}
															
 
																-    result = result_list[0]
															
 
																-    if "Answer" not in result:
															
 
																-        print("Error: eval response does not contain answer")
															
 
																-        return {}
															
 
																-    # Send back the model generated answer
															
 
																-
															
 
																-    return result["Answer"]
															
 
																-
															
 
																-async def generate_eval_answer(chat_service, api_context: dict, questions: list):
															
 
																-    eval_tasks = []
															
 
																-    for batch_index, question in enumerate(questions):
															
 
																-        try:
															
 
																-            result = eval_request(chat_service, api_context, question)
															
 
																-            eval_tasks.append(result)
															
 
																-        except Exception as e:
															
 
																-            print(f"Error during data eval request execution: {e}")
															
 
																-    print(len(eval_tasks),"eval_tasks")
															
 
																-    eval_results = await asyncio.gather(*eval_tasks)
															
 
																-
															
 
																-    return eval_results
															
 
																-
															
 
																-async def main(context):
															
 
																-    if context["endpoint"]:
															
 
																-        chat_service = VllmChatService()
															
 
																-    else:
															
 
																-        chat_service = OctoAIChatService()
															
 
																-    try:
															
 
																-        logging.info("Starting to generate answer given the eval set.")
															
 
																-        with open(context["eval_json"]) as fp:
															
 
																-            eval_json = json.load(fp)
															
 
																-        questions,groud_truth = [],[]
															
 
																-        for index, item in enumerate(eval_json):
															
 
																-            questions.append(item["question"])
															
 
																-            groud_truth.append(item["answer"])
															
 
																-        generated_answers = await generate_eval_answer(chat_service, context,questions)
															
 
																-        if not generated_answers:
															
 
																-            logging.warning("No answers generated. Please check the input context or model configuration.")
															
 
																-            return
															
 
																-        logging.info(f"Successfully generated {len(generated_answers)} answers.")
															
 
																-        judge_list = []
															
 
																-        for index, item in enumerate(generated_answers):
															
 
																-            judge_list.append({"Question":questions[index],"Ground_truth":groud_truth[index],"Generated_answer":generated_answers[index]})
															
 
																-        if context["judge_endpoint"]:
															
 
																-            # make a copy of the context then change the VLLM endpoint to judge_endpoint
															
 
																-            context_copy = dict(context)
															
 
																-            context_copy["endpoint"] = context["judge_endpoint"]
															
 
																-            context_copy["model"] = "meta-llama/Meta-Llama-3-70B-Instruct"
															
 
																-            judge_results = await generate_LLM_eval(chat_service, context_copy, judge_list)
															
 
																-            correct_num = 0
															
 
																-            for result in judge_results:
															
 
																-                correct_num += result["Result"] == "YES"
															
 
																-            LLM_judge_score = correct_num/len(judge_results)
															
 
																-            print(f"The accuracy of the model is {LLM_judge_score}")
															
 
																-        rouge_score = compute_rouge_score(generated_answers,groud_truth)
															
 
																-        print("Rouge_score:",rouge_score)
															
 
																-        P, R, F1 = compute_bert_score(generated_answers,groud_truth)
															
 
																-        print(f"BERTScore Precision: {P:.4f}, Recall: {R:.4f}, F1: {F1:.4f}")
															
 
																-        # Saving the eval result to a log file
															
 
																-        with open(context["output_log"],"a") as fp:
															
 
																-            fp.write(f"Eval_result for {context['model']} \n")
															
 
																-            fp.write(f"Rouge_score: {rouge_score} \n")
															
 
																-            fp.write(f"BERTScore Precision: {P:.4f}, Recall: {R:.4f}, F1: {F1:.4f} \n")
															
 
																-            if context["judge_endpoint"]:
															
 
																-                fp.write(f"LLM_judge_score: {LLM_judge_score} \n")
															
 
																-            fp.write(f"QA details: \n")
															
 
																-            for item in judge_list:
															
 
																-                fp.write(f"question: {item['Question']} \n")
															
 
																-                fp.write(f"generated_answers: {item['Generated_answer']} \n")
															
 
																-                fp.write(f"groud_truth: {item['Ground_truth']} \n")
															
 
																-                fp.write("\n")
															
 
																-        logging.info(f"Eval successfully, the eval result is saved to {context['output_log']}.")
															
 
																-    except Exception as e:
															
 
																-        logging.error(f"An unexpected error occurred during the process: {e}",exc_info=True)
															
 
																-
															
 
																-def parse_arguments():
															
 
																-    # Define command line arguments for the script
															
 
																-    parser = argparse.ArgumentParser(
															
 
																-        description="Generate question/answer pairs from documentation."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-m", "--model",
															
 
																-        default="chatbot",
															
 
																-        help="Select the model to use for evaluation, this maybe a LoRA adapter."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-c", "--config_path",
															
 
																-        default="eval_config.yaml",
															
 
																-        help="Set the configuration file path that has system prompt along with language, evalset path."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-v", "--vllm_endpoint",
															
 
																-        default=None,
															
 
																-        type=int,
															
 
																-        help="If a port is specified, then use local vllm endpoint for evaluations."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-j", "--judge_endpoint",
															
 
																-        default=None,
															
 
																-        type=int,
															
 
																-        help="If a port is specified, then use local vllm endpoint as judge LLM."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-o", "--output_log",
															
 
																-        default="eval_result.log",
															
 
																-        help="save the eval result to a log file. Default is eval_result.log"
															
 
																-    )
															
 
																-    return parser.parse_args()
															
 
																-
															
 
																-if __name__ == "__main__":
															
 
																-    logging.info("Initializing the process and loading configuration...")
															
 
																-    args = parse_arguments()
															
 
																-    context = load_config(args.config_path)
															
 
																-    context["model"] = args.model
															
 
																-    context["endpoint"] = args.vllm_endpoint
															
 
																-    context["judge_endpoint"] = args.judge_endpoint
															
 
																-    context["output_log"] = args.output_log
															
 
																-    if context["endpoint"]:
															
 
																-        logging.info(f"Use local vllm service for eval at port: '{args.vllm_endpoint}'.")
															
 
																-    if context["judge_endpoint"]:
															
 
																-        logging.info(f"Use local vllm service for judge at port: '{args.judge_endpoint}'.")
															
 
																-    asyncio.run(main(context))
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_config.yaml
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/eval_config.yaml
@@ -1,23 +0,0 @@
 
																-eval_prompt_template: >
															
 
																-  You are a AI assistant that skilled in answering questions related to Llama language models,
															
 
																-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
															
 
																-  Below is a question from a llama user, think step by step and then answer it in {language}, make the answer as concise as possible, it should be at most 100 words.
															
 
																-  Return the result with the template:
															
 
																-  [
															
 
																-    {{
															
 
																-      "Question": "The question user asked to you"
															
 
																-      "Answer": "Your answer to the question"
															
 
																-  }}
															
 
																-  ]
															
 
																-judge_prompt_template: >
															
 
																-  You are provided with a question, a teacher's answer and a student's answer. Given that question, you need to score the how good the student answer is compare to
															
 
																-  the teacher's answer. If the student's answer is correct based on the teacher's answer, then return YES. If the answer is not faithful, then return NO
															
 
																-  and explain which part of the student's answer is not faithful in the Reason section.
															
 
																-  Return the result in json format with the template:
															
 
																-    {{
															
 
																-      "Reason": "your reason here.",
															
 
																-      "Result": "YES or NO."
															
 
																-    }}
															
 
																-eval_json: "./evalset.json"
															
 
																-
															
 
																-language: "English"
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/evalset.json
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/evalset.json
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generate_question_answers.py
@@ -1,90 +0,0 @@
 
																-# Copyright (c) Meta Platforms, Inc. and affiliates.
															
 
																-# This software may be used and distributed according to the terms of the Llama 3 Community License Agreement.
															
 
																-
															
 
																-import argparse
															
 
																-import asyncio
															
 
																-import json
															
 
																-from config import load_config
															
 
																-from generator_utils import generate_question_batches, generate_data_curation
															
 
																-from chat_utils import OctoAIChatService, VllmChatService
															
 
																-import logging
															
 
																-import aiofiles  # Ensure aiofiles is installed for async file operations
															
 
																-
															
 
																-
															
 
																-# Configure logging to include the timestamp, log level, and message
															
 
																-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																-
															
 
																-
															
 
																-async def main(context):
															
 
																-    if context["endpoint"]:
															
 
																-        chat_service = VllmChatService()
															
 
																-    else:
															
 
																-        chat_service = OctoAIChatService()
															
 
																-    try:
															
 
																-        logging.info("Starting to generate question/answer pairs.")
															
 
																-        # Generate question/answer pairs as list
															
 
																-        data = await generate_question_batches(chat_service, context)
															
 
																-        if not data:
															
 
																-            logging.warning("No data generated. Please check the input context or model configuration.")
															
 
																-            return
															
 
																-        logging.info(f"Successfully generated {len(data)} question/answer pairs.")
															
 
																-        if context["use_curation"]:
															
 
																-            logging.info("Starting to do self-curation using LLM.")
															
 
																-            data = await generate_data_curation(chat_service, context,data)
															
 
																-            logging.info(f"Only {len(data)} question/answer pairs pass the self-curation")
															
 
																-        async with aiofiles.open(context['output_path'], "w") as output_file:
															
 
																-             await output_file.write(json.dumps(data, indent=4))
															
 
																-        logging.info(f"Data successfully written to {context['output_path']}. Process completed.")
															
 
																-    except Exception as e:
															
 
																-        logging.error(f"An unexpected error occurred during the process: {e}",exc_info=True)
															
 
																-
															
 
																-def parse_arguments():
															
 
																-    # Define command line arguments for the script
															
 
																-    parser = argparse.ArgumentParser(
															
 
																-        description="Generate question/answer pairs from documentation."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-t", "--total_questions",
															
 
																-        type=int,
															
 
																-        default=100,
															
 
																-        help="Specify the total number of question/answer pairs to generate."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-m", "--model",
															
 
																-        choices=["meta-llama-3-70b-instruct","meta-llama-3-8b-instruct","llama-2-13b-chat", "llama-2-70b-chat"],
															
 
																-        default="meta-llama-3-70b-instruct",
															
 
																-        help="Select the model to use for generation."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-c", "--config_path",
															
 
																-        default="./generation_config.yaml",
															
 
																-        help="Set the configuration file path that has system prompt along with language, dataset path and number of questions."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-v", "--vllm_endpoint",
															
 
																-        default=None,
															
 
																-        type=int,
															
 
																-        help="If a port is specified, then use local vllm endpoint for generating question/answer pairs."
															
 
																-    )
															
 
																-    parser.add_argument(
															
 
																-        "-o", "--output_path",
															
 
																-        default="./data.json",
															
 
																-        help="set the output path for the generated QA pairs. Default is data.json"
															
 
																-    )
															
 
																-    return parser.parse_args()
															
 
																-
															
 
																-if __name__ == "__main__":
															
 
																-    logging.info("Initializing the process and loading configuration...")
															
 
																-    args = parse_arguments()
															
 
																-
															
 
																-    context = load_config(args.config_path)
															
 
																-    context["total_questions"] = args.total_questions
															
 
																-    context["model"] = args.model
															
 
																-    context["endpoint"] = args.vllm_endpoint
															
 
																-    # If curation prompt is not empty, then use self-curation
															
 
																-    context["use_curation"] = len(context["curation_prompt_template"]) > 0
															
 
																-    context["output_path"] = args.output_path
															
 
																-    logging.info(f"Configuration loaded. Generating {args.total_questions} question/answer pairs using model '{args.model}'.")
															
 
																-    if context["endpoint"]:
															
 
																-        logging.info(f"Use local vllm service at port: '{args.vllm_endpoint}'.")
															
 
																-    asyncio.run(main(context))
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generation_config.yaml
@@ -1,50 +0,0 @@
 
																-question_prompt_template: >
															
 
																-  You are a language model skilled in creating quiz questions.
															
 
																-  You will be provided with a document,
															
 
																-  read it and please generate question and answer pairs that are most likely be asked by a user of Llama language models,
															
 
																-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2,
															
 
																-  then extract the context that is related to the question and answer, preferably using the sentences from original text,
															
 
																-  please make sure you follow those rules:
															
 
																-  1. Generate {num_questions} question answer pairs, you can generate less answer if there is nothing related to model, training, fine-tuning and evaluation details of Llama language models, .
															
 
																-  2. For each question and answer pair, add the context that is related to the question and answer, preferably using the sentences from original text
															
 
																-  3. Generate in {language}.
															
 
																-  4. The questions can be answered based *solely* on the given passage.
															
 
																-  5. Avoid asking questions with similar meaning.
															
 
																-  6. Make the answer as concise as possible, it should be at most 100 words.
															
 
																-  7. Provide relevant links from the document to support the answer.
															
 
																-  8. Never use any abbreviation.
															
 
																-  9. Return the result in json format with the template:
															
 
																-    [
															
 
																-      {{
															
 
																-        "Question": "your question A.",
															
 
																-        "Answer": "your answer to question A."
															
 
																-        "Context": "the context for question A"
															
 
																-      }},
															
 
																-      {{
															
 
																-        "Question": "your question B.",
															
 
																-        "Answer": "your answer to question B."
															
 
																-        "Context": "the context for question B"
															
 
																-      }}
															
 
																-    ]
															
 
																-
															
 
																-curation_prompt_template: >
															
 
																-  Below is a question and answer pair (QA pair) and its related context about Llama language models,
															
 
																-  which includes LLama, Llama2, Meta Llama3, Code Llama, Meta Llama Guard 1,	Meta Llama Guard 2.
															
 
																-  Given the context, evaluate whether or not this qusestion and answer pair is related to Llama language models, including model, training, fine-tuning and evaluation details,
															
 
																-  and whether this question and answer is relevant to the context.
															
 
																-  Note that the answer in the QA pair can be the same or similar as the context, as repetition of context is allowed.
															
 
																-  Respond with only a single JSON blob with an "Reason" field that is a short (less than 100 word)
															
 
																-  explanation of your answer and an "Result" field which is YES or NO.
															
 
																-  Only answer "YES", if the question and answer pair is based on the context and provides relevant information about Llama language models.
															
 
																-  Only generate the answer in {language}.
															
 
																-  Return the result in json format with the template:
															
 
																-    {{
															
 
																-      "Reason": "your reason here.",
															
 
																-      "Result": "YES or No."
															
 
																-    }},
															
 
																-
															
 
																-data_dir: "./data"
															
 
																-
															
 
																-language: "English"
															
 
																-
															
 
																-num_questions: 2
															
--- a/recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py
+++ b/recipes/use_cases/end2end-recipes/chatbot/pipelines/generator_utils.py
@@ -1,267 +0,0 @@
 
																-# Copyright (c) Meta Platforms, Inc. and affiliates.
															
 
																-# This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
															
 
																-
															
 
																-import os
															
 
																-import re
															
 
																-import string
															
 
																-from transformers import  AutoTokenizer
															
 
																-import asyncio
															
 
																-import magic
															
 
																-from PyPDF2 import PdfReader
															
 
																-import json
															
 
																-from doc_processor import split_text_into_chunks
															
 
																-import logging
															
 
																-import json
															
 
																-# Initialize logging
															
 
																-logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
															
 
																-
															
 
																-def read_text_file(file_path):
															
 
																-    try:
															
 
																-        with open(file_path, 'r') as f:
															
 
																-            text = f.read().strip() + ' '
															
 
																-            if len(text) == 0:
															
 
																-                print("File is empty ",file_path)
															
 
																-            return text
															
 
																-    except Exception as e:
															
 
																-        logging.error(f"Error reading text file {file_path}: {e}")
															
 
																-    return ''
															
 
																-
															
 
																-def read_pdf_file(file_path):
															
 
																-    try:
															
 
																-        with open(file_path, 'rb') as f:
															
 
																-            pdf_reader = PdfReader(f)
															
 
																-            num_pages = len(pdf_reader.pages)
															
 
																-            file_text = [pdf_reader.pages[page_num].extract_text().strip() + ' ' for page_num in range(num_pages)]
															
 
																-            text = ''.join(file_text)
															
 
																-            if len(text) == 0:
															
 
																-                print("File is empty ",file_path)
															
 
																-            return ''.join(file_text)
															
 
																-    except Exception as e:
															
 
																-        logging.error(f"Error reading PDF file {file_path}: {e}")
															
 
																-    return ''
															
 
																-
															
 
																-def read_json_file(file_path):
															
 
																-    try:
															
 
																-        with open(file_path, 'r') as f:
															
 
																-            data = json.load(f)
															
 
																-            # Assuming each item in the list has a 'question' and 'answer' key
															
 
																-            # Concatenating question and answer pairs with a space in between and accumulating them into a single string
															
 
																-            file_text = ' '.join([item['question'].strip() + ' ' + item['answer'].strip() + ' ' for item in data])
															
 
																-            if len(file_text) == 0:
															
 
																-                print("File is empty ",file_path)
															
 
																-            return file_text
															
 
																-    except Exception as e:
															
 
																-        logging.error(f"Error reading JSON file {file_path}: {e}")
															
 
																-    return ''
															
 
																-
															
 
																-
															
 
																-def process_file(file_path):
															
 
																-    print("starting to process file: ", file_path)
															
 
																-    file_type = magic.from_file(file_path, mime=True)
															
 
																-    if file_type in ['text/plain', 'text/markdown', 'JSON']:
															
 
																-        return read_text_file(file_path)
															
 
																-    elif file_type == 'application/pdf':
															
 
																-        return read_pdf_file(file_path)
															
 
																-    else:
															
 
																-        logging.warning(f"Unsupported file type {file_type} for file {file_path}")
															
 
																-        return ''
															
 
																-def remove_non_printable(s):
															
 
																-    printable = set(string.printable)
															
 
																-    return ''.join(filter(lambda x: x in printable, s))
															
 
																-def read_file_content(context):
															
 
																-    file_strings = []
															
 
																-
															
 
																-    for root, _, files in os.walk(context['data_dir']):
															
 
																-        for file in files:
															
 
																-            file_path = os.path.join(root, file)
															
 
																-            file_text = process_file(file_path)
															
 
																-            if file_text:
															
 
																-                file_strings.append(file_text)
															
 
																-    text = '\n'.join(file_strings)
															
 
																-    text = remove_non_printable(text)
															
 
																-    with open(context['data_dir'] + '/' + 'all_text.txt', 'w') as f:
															
 
																-        f.write(text)
															
 
																-    return remove_non_printable(text)
															
 
																-# clean the text by removing all parts that did not contain any alphanumeric characters
															
 
																-def clean(s):
															
 
																-        result = []
															
 
																-        for item in s.split('"'):
															
 
																-            if any(c.isalnum() for c in item):
															
 
																-                result.append(item)
															
 
																-        return " ".join(result)
															
 
																-# given a response string, return a string that can be saved as json.
															
 
																-def parse_qac_to_json(response_string):
															
 
																-    split_lines = response_string.split("\n")
															
 
																-    start,mid,end = None,None,None
															
 
																-    # must use set to avoid duplicate question/answer pairs due to async function calls
															
 
																-    qa_set = set()
															
 
																-    for i in range(len(split_lines)):
															
 
																-        line = split_lines[i]
															
 
																-        # starting to find "Question"
															
 
																-        if not start:
															
 
																-            # Once found, set start to this line number
															
 
																-            if '"Question":' in line:
															
 
																-                start = i
															
 
																-        else:
															
 
																-            # "Question" has been found, find "Answer", once found, set end to this line number
															
 
																-            if '"Answer":' in line:
															
 
																-                mid = i
															
 
																-            elif '"Context":' in line:
															
 
																-                end = i
															
 
																-            # found Question means we have reached the end of the question, so add it to qa_list
															
 
																-            elif '"Question":' in line:
															
 
																-                question = " ".join(split_lines[start:mid]).split('"Question":')[1]
															
 
																-                answer = " ".join(split_lines[mid:end]).split('"Answer":')[1]
															
 
																-                context = " ".join(split_lines[end:i]).split('"Context":')[1]
															
 
																-                start,mid,end = i,None,None
															
 
																-                qa_set.add((clean(question), clean(answer),clean(context)))
															
 
																-        # adding last question back to qa_list
															
 
																-    if start and mid and end:
															
 
																-        question = " ".join(split_lines[start:mid]).split('"Question":')[1]
															
 
																-        answer = " ".join(split_lines[mid:end]).split('"Answer":')[1]
															
 
																-        context = " ".join(split_lines[end:]).split('"Context":')[1]
															
 
																-        start,mid,end = i,None,None
															
 
																-        qa_set.add((clean(question), clean(answer),clean(context)))
															
 
																-    qa_list = [{"Question": q, "Answer":a, "Context":c} for q,a,c in qa_set]
															
 
																-
															
 
																-    return qa_list
															
 
																-
															
 
																-def parse_qa_to_json(response_string):
															
 
																-    split_lines = response_string.split("\n")
															
 
																-    start,end = None,None
															
 
																-    # must use set to avoid duplicate question/answer pairs due to async function calls
															
 
																-    qa_set = set()
															
 
																-    for i in range(len(split_lines)):
															
 
																-        line = split_lines[i]
															
 
																-        # starting to find "Question"
															
 
																-        if not start:
															
 
																-            # Once found, set start to this line number
															
 
																-            if '"Question":' in line:
															
 
																-                start = i
															
 
																-        else:
															
 
																-            # "Question" has been found, find "Answer", once found, set end to this line number
															
 
																-            if '"Answer":' in line:
															
 
																-                end = i
															
 
																-            # found Question means we have reached the end of the question, so add it to qa_list
															
 
																-            elif '"Question":' in line:
															
 
																-                question = " ".join(split_lines[start:end]).split('"Question":')[1]
															
 
																-                answer = " ".join(split_lines[end:i]).split('"Answer":')[1]
															
 
																-                start,end = i,None
															
 
																-                qa_set.add((clean(question), clean(answer)))
															
 
																-        # adding last question back to qa_list
															
 
																-    if start and end:
															
 
																-        question = " ".join(split_lines[start:end]).split('"Question":')[1]
															
 
																-        answer = " ".join(split_lines[end:]).split('"Answer":')[1]
															
 
																-        qa_set.add((clean(question), clean(answer)))
															
 
																-    qa_list = [{"Question": q, "Answer":a} for q,a in qa_set]
															
 
																-
															
 
																-    return qa_list
															
 
																-
															
 
																-async def prepare_and_send_request(chat_service, api_context: dict, document_content: str, num_questions: int) -> dict:
															
 
																-    if num_questions == 0:
															
 
																-        logging.info(f"Error: num_questions is 0")
															
 
																-        return {}
															
 
																-    prompt_for_system = api_context['question_prompt_template'].format(num_questions=num_questions, language=api_context["language"])
															
 
																-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': document_content}]
															
 
																-    # parse the result string to a list of dict that has Question, Answer, Context
															
 
																-    return await chat_service.execute_chat_request_async(api_context, chat_request_payload)
															
 
																-# This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
															
 
																-async def data_curation_request(chat_service, api_context: dict, document_content: dict) -> dict:
															
 
																-    prompt_for_system = api_context['curation_prompt_template'].format(language=api_context["language"])
															
 
																-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {document_content['Question']} \n Answer: {document_content['Answer']}\n Context: {document_content['Context']} "}]
															
 
																-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
															
 
																-    if not result:
															
 
																-        return {}
															
 
																-    # no parsing needed, just return the loads the result as a dict
															
 
																-    result = json.loads(result)
															
 
																-    if "Result" not in result:
															
 
																-        print("Error: eval response does not contain answer")
															
 
																-        print(document_content,result)
															
 
																-        return {}
															
 
																-    # Send back the original QA pair is the model eval result is YES
															
 
																-    if result["Result"] == "YES":
															
 
																-        return document_content
															
 
																-    else:
															
 
																-        print(document_content,result)
															
 
																-    return {}
															
 
																-
															
 
																-
															
 
																-async def generate_question_batches(chat_service, api_context: dict):
															
 
																-    document_text = read_file_content(api_context)
															
 
																-    if len(document_text)== 0:
															
 
																-        logging.error(f"Error reading files, document_text is empty")
															
 
																-    if api_context["model"] in ["meta-llama-3-70b-instruct","meta-llama-3-8b-instruct"]:
															
 
																-        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B", pad_token="</s>", padding_side="right")
															
 
																-    else:
															
 
																-        tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf", pad_token="</s>", padding_side="right")
															
 
																-    document_batches = split_text_into_chunks(api_context, document_text, tokenizer)
															
 
																-
															
 
																-    total_questions = api_context["total_questions"]
															
 
																-    batches_count = len(document_batches)
															
 
																-    # each batch should have at least 1 question
															
 
																-    base_questions_per_batch = max(total_questions // batches_count,1)
															
 
																-    extra_questions = total_questions % batches_count
															
 
																-
															
 
																-    print(f"Questions per batch: {base_questions_per_batch} (+1 for the first {extra_questions} batches), Total questions: {total_questions}, Batches: {batches_count}")
															
 
																-    generation_tasks = []
															
 
																-    for batch_index, batch_content in enumerate(document_batches):
															
 
																-        print(f"len of batch_content: {len(batch_content)}, batch_index: {batch_index}")
															
 
																-        #Distribute extra questions across the first few batches
															
 
																-        questions_in_current_batch = base_questions_per_batch + (1 if batch_index < extra_questions else 0)
															
 
																-        print(f"Batch {batch_index + 1} - {questions_in_current_batch} questions ********")
															
 
																-        try:
															
 
																-            task = prepare_and_send_request(chat_service, api_context, batch_content, questions_in_current_batch)
															
 
																-            generation_tasks.append(task)
															
 
																-        except Exception as e:
															
 
																-            print(f"Error during chat request execution: {e}")
															
 
																-
															
 
																-    question_generation_results = await asyncio.gather(*generation_tasks)
															
 
																-    final_result = []
															
 
																-    for result in question_generation_results:
															
 
																-        parsed_json = parse_qac_to_json(result)
															
 
																-        final_result.extend(parsed_json)
															
 
																-    return final_result
															
 
																-
															
 
																-async def generate_data_curation(chat_service, api_context: dict, evaluation_list: list):
															
 
																-    eval_tasks = []
															
 
																-    for batch_index, batch_content in enumerate(evaluation_list):
															
 
																-        try:
															
 
																-            result = data_curation_request(chat_service, api_context, batch_content)
															
 
																-            eval_tasks.append(result)
															
 
																-        except Exception as e:
															
 
																-            print(f"Error during data eval request execution: {e}")
															
 
																-
															
 
																-    eval_results = await asyncio.gather(*eval_tasks)
															
 
																-    curated_data = []
															
 
																-    for item in eval_results:
															
 
																-        # if the item is not empty, add it to the curated data list
															
 
																-        if item:
															
 
																-            curated_data.append(item)
															
 
																-    return curated_data
															
 
																-
															
 
																-# This function is used to evaluate the quality of generated QA pairs. Return the original QA pair if the model eval result is YES. Otherwise, return an empty dict.
															
 
																-async def LLM_judge_request(chat_service, api_context: dict, document_content: dict) -> dict:
															
 
																-    prompt_for_system = api_context['judge_prompt_template'].format(language=api_context["language"])
															
 
																-    chat_request_payload = [{'role': 'system', 'content': prompt_for_system}, {'role': 'user', 'content': f"Question: {document_content['Question']} \n Teacher's Answer: {document_content['Ground_truth']}\n Student's Answer: {document_content['Generated_answer']} "}]
															
 
																-    result = await chat_service.execute_chat_request_async(api_context, chat_request_payload)
															
 
																-    if not result:
															
 
																-        return {}
															
 
																-    # no parsing needed, just return the loads the result as a dict
															
 
																-    result = json.loads(result)
															
 
																-    if "Result" not in result:
															
 
																-        print("Error: eval response does not contain answer")
															
 
																-        print(document_content,result)
															
 
																-        return {}
															
 
																-    return result
															
 
																-
															
 
																-async def generate_LLM_eval(chat_service, api_context: dict, judge_list: list):
															
 
																-    eval_tasks = []
															
 
																-    for batch_index, batch_content in enumerate(judge_list):
															
 
																-        try:
															
 
																-            result = LLM_judge_request(chat_service, api_context, batch_content)
															
 
																-            eval_tasks.append(result)
															
 
																-        except Exception as e:
															
 
																-            print(f"Error during data eval request execution: {e}")
															
 
																-
															
 
																-    judge_results = await asyncio.gather(*eval_tasks)
															
 
																-    return judge_results
															
--- a/recipes/use_cases/end2end-recipes/chatbot/poor-test-1.png
+++ b/recipes/use_cases/end2end-recipes/chatbot/poor-test-1.png
--- a/recipes/use_cases/end2end-recipes/chatbot/poor-test-2.png
+++ b/recipes/use_cases/end2end-recipes/chatbot/poor-test-2.png
--- a/recipes/use_cases/end2end-recipes/chatbot/train-loss-3runs.png
+++ b/recipes/use_cases/end2end-recipes/chatbot/train-loss-3runs.png
--- a/recipes/use_cases/end2end-recipes/raft/format.py
+++ b/recipes/use_cases/end2end-recipes/raft/format.py
@@ -0,0 +1,173 @@
 
																+from abc import ABC, abstractmethod
															
 
																+import argparse
															
 
																+from datasets import Dataset, load_dataset
															
 
																+from typing import Dict, Literal, Any, get_args
															
 
																+
															
 
																+"""
															
 
																+This file allows to convert raw HuggingFace Datasets into files suitable to fine tune completion and chat models.
															
 
																+"""
															
 
																+
															
 
																+OutputDatasetType = Literal["parquet", "jsonl"]
															
 
																+outputDatasetTypes = list(get_args(OutputDatasetType))
															
 
																+
															
 
																+InputDatasetType = Literal["arrow", "jsonl"]
															
 
																+inputDatasetTypes = list(get_args(InputDatasetType))
															
 
																+
															
 
																+DatasetFormat = Literal["hf", "completion", "chat"]
															
 
																+datasetFormats = list(get_args(DatasetFormat))
															
 
																+
															
 
																+def get_args() -> argparse.Namespace:
															
 
																+    """
															
 
																+    Parses and returns the arguments specified by the user's command
															
 
																+    """
															
 
																+    parser = argparse.ArgumentParser()
															
 
																+
															
 
																+    parser.add_argument("--input", type=str, required=True, help="Input HuggingFace dataset file")
															
 
																+    parser.add_argument("--input-type", type=str, default="arrow", help="Format of the input dataset. Defaults to arrow.", choices=inputDatasetTypes)
															
 
																+    parser.add_argument("--output", type=str, required=True, help="Output file")
															
 
																+    parser.add_argument("--output-format", type=str, required=True, help="Format to convert the dataset to", choices=datasetFormats)
															
 
																+    parser.add_argument("--output-type", type=str, default="jsonl", help="Type to export the dataset to. Defaults to jsonl.", choices=outputDatasetTypes)
															
 
																+    parser.add_argument("--output-chat-system-prompt", type=str, help="The system prompt to use when the output format is chat")
															
 
																+
															
 
																+    args = parser.parse_args()
															
 
																+    return args
															
 
																+
															
 
																+class DatasetFormatter(ABC):
															
 
																+    """
															
 
																+    Base class for dataset formatters. Formatters rename columns, remove and add 
															
 
																+    columns to match the expected target format structure. HF, Chat or Completion models file formats.
															
 
																+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
															
 
																+    """
															
 
																+    @abstractmethod
															
 
																+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
															
 
																+        pass
															
 
																+
															
 
																+class DatasetExporter(ABC):
															
 
																+    """
															
 
																+    Base class for dataset exporters. Exporters export dataset to different file types, JSONL, Parquet, ...
															
 
																+    """
															
 
																+    @abstractmethod
															
 
																+    def export(self, ds: Dataset, output_path: str):
															
 
																+        pass
															
 
																+
															
 
																+class DatasetConverter():
															
 
																+    """
															
 
																+    Entry point class. It resolves which DatasetFormatter and which DatasetExporter to use and runs them.
															
 
																+    """
															
 
																+    formats: Dict[DatasetFormat, DatasetFormatter]
															
 
																+    exporters: Dict[OutputDatasetType, Any]
															
 
																+
															
 
																+    def __init__(self) -> None:
															
 
																+        self.formats = {
															
 
																+            "hf": HuggingFaceDatasetFormatter(),
															
 
																+            "completion": OpenAiCompletionDatasetFormatter(),
															
 
																+            "chat": OpenAiChatDatasetFormatter()
															
 
																+        }
															
 
																+        self.exporters = {
															
 
																+            "parquet": ParquetDatasetExporter(),
															
 
																+            "jsonl": JsonlDatasetExporter()
															
 
																+        }
															
 
																+
															
 
																+    def convert(self, ds: Dataset, format: DatasetFormat, output_path: str, output_type: OutputDatasetType, params: Dict[str, str]):
															
 
																+        if not format in self.formats:
															
 
																+            raise Exception(f"Output Format {format} is not supported, pleased select one of {self.formats.keys()}")
															
 
																+        
															
 
																+        if not output_type in self.exporters:
															
 
																+            raise Exception(f"Output Type {output_type} is not supported, pleased select one of {self.exporters.keys()}")
															
 
																+
															
 
																+        formatter = self.formats[format]
															
 
																+        newds = formatter.format(ds, params)
															
 
																+        exporter = self.exporters[output_type]
															
 
																+        exporter.export(newds, output_path)
															
 
																+
															
 
																+class HuggingFaceDatasetFormatter(DatasetFormatter):
															
 
																+    """
															
 
																+    Returns the HuggingFace Dataset as is
															
 
																+    """
															
 
																+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
															
 
																+        return ds
															
 
																+
															
 
																+def _remove_all_columns_but(ds: Dataset, keep_columns) -> Dataset:
															
 
																+    """
															
 
																+    HF Dataset doesn't have a way to copy only specific columns of a Dataset so this help
															
 
																+    removes all columns but the ones specified.
															
 
																+    """
															
 
																+    remove_columns = list(ds.column_names)
															
 
																+    for keep in keep_columns:
															
 
																+        remove_columns.remove(keep)
															
 
																+    ds = ds.remove_columns(remove_columns)
															
 
																+    return ds
															
 
																+
															
 
																+class OpenAiCompletionDatasetFormatter(DatasetFormatter):
															
 
																+    """
															
 
																+    Returns the Dataset in the OpenAI Completion Fine-tuning file format with two fields "prompt" and "completion".
															
 
																+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
															
 
																+    """
															
 
																+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
															
 
																+        newds = ds.rename_columns({'question': 'prompt', 'cot_answer': 'completion'})
															
 
																+        return _remove_all_columns_but(newds, ['prompt', 'completion'])
															
 
																+
															
 
																+class OpenAiChatDatasetFormatter(OpenAiCompletionDatasetFormatter):
															
 
																+    """
															
 
																+    Returns the Dataset in the OpenAI Chat Fine-tuning file format with one field "messages".
															
 
																+    https://platform.openai.com/docs/guides/fine-tuning/preparing-your-dataset
															
 
																+    """
															
 
																+    def format(self, ds: Dataset, params: Dict[str, str]) -> Dataset:
															
 
																+        newds = super().format(ds, params)
															
 
																+
															
 
																+        def format_messages(row):
															
 
																+            messages = []
															
 
																+            if 'system_prompt' in params:
															
 
																+                system_prompt = params['system_prompt']
															
 
																+                messages.append({ "role": "system", "content": system_prompt})
															
 
																+            messages.extend([{ "role": "user", "content": row['prompt']}, { "role": "assistant", "content": row['completion']}])
															
 
																+            chat_row = {"messages": messages}
															
 
																+            return chat_row
															
 
																+
															
 
																+        newds = newds.map(format_messages)
															
 
																+        return _remove_all_columns_but(newds, ['messages'])
															
 
																+
															
 
																+def append_extension(path: str, extension: str) -> str:
															
 
																+    suffix = "." + extension
															
 
																+    if not path.endswith(suffix):
															
 
																+        path = path + suffix
															
 
																+    return path
															
 
																+
															
 
																+
															
 
																+class JsonlDatasetExporter(DatasetExporter):
															
 
																+    """
															
 
																+    Exports the Dataset to a JSONL file
															
 
																+    """
															
 
																+
															
 
																+    def export(self, ds: Dataset, output_path: str):
															
 
																+        ds.to_json(append_extension(output_path, "jsonl"))
															
 
																+
															
 
																+
															
 
																+class ParquetDatasetExporter(DatasetExporter):
															
 
																+    """
															
 
																+    Exports the Dataset to a Parquet file
															
 
																+    """
															
 
																+
															
 
																+    def export(self, ds: Dataset, output_path: str):
															
 
																+        ds.to_parquet(append_extension(output_path, "parquet"))
															
 
																+
															
 
																+
															
 
																+def main():
															
 
																+    """
															
 
																+    When raft.py is executed from the command line.
															
 
																+    """
															
 
																+    args = get_args()
															
 
																+    ds = load_dataset(args.input_type, data_files={"train": args.input})['train']
															
 
																+    formatter = DatasetConverter()
															
 
																+
															
 
																+    if args.output_chat_system_prompt and args.output_format != "chat":
															
 
																+        raise Exception("Parameter --output-chat-system-prompt can only be used with --output-format chat")
															
 
																+
															
 
																+    format_params = {}
															
 
																+    if args.output_chat_system_prompt:
															
 
																+        format_params['system_prompt'] = args.output_chat_system_prompt
															
 
																+
															
 
																+    formatter.convert(ds=ds, format=args.output_format, output_path=args.output, output_type=args.output_type, params=format_params)
															
 
																+
															
 
																+if __name__ == "__main__":
															
 
																+    main()
															
--- a/recipes/use_cases/end2end-recipes/raft/raft.py
+++ b/recipes/use_cases/end2end-recipes/raft/raft.py
@@ -87,7 +87,7 @@ if __name__ == "__main__":
 
																         api_config["api_key"] = os.environ["API_KEY"]
															
 
																     logging.info(f"Configuration loaded. Generating {args.questions_per_chunk} question per chunk using model '{args.model}'.")
															
 
																     logging.info(f"Chunk size: {args.chunk_size}.")
															
 
																-    logging.info(f"num_distract_docs: {api_config['num_distract_docs']}, orcale_p: {api_config['orcale_p']}")
															
 
																+    logging.info(f"num_distract_docs: {api_config['num_distract_docs']}, oracle_p: {api_config['oracle_p']}")
															
 
																     logging.info(f"Will use endpoint_url: {args.endpoint_url}.")
															
 
																     logging.info(f"Output will be written to {args.output}.")
															
 
																     main(api_config)
															
--- a/recipes/use_cases/end2end-recipes/raft/raft.yaml
+++ b/recipes/use_cases/end2end-recipes/raft/raft.yaml
@@ -48,4 +48,4 @@ questions_per_chunk: 5
 
																 num_distract_docs: 5 # number of distracting documents to add to each chunk
															
 
																-orcale_p: 0.8 # probability of related documents to be added to each chunk
															
 
																+oracle_p: 0.8 # probability of related documents to be added to each chunk
															
--- a/src/llama_recipes/configs/peft.py
+++ b/src/llama_recipes/configs/peft.py
@@ -8,7 +8,7 @@ from typing import List
 
																 class lora_config:
															
 
																      r: int=8
															
 
																      lora_alpha: int=32
															
 
																-     target_modules: List[str] = field(default_factory=lambda: ["q_proj", "k_proj", "v_proj", "o_proj","gate_proj", "up_proj", "down_proj"])
															
 
																+     target_modules: List[str] = field(default_factory=lambda: ["q_proj", "v_proj"])
															
 
																      bias= "none"
															
 
																      task_type: str= "CAUSAL_LM"
															
 
																      lora_dropout: float=0.05