Kai Wu пре 1 година
родитељ
комит
ddb7f1c15c

+ 1 - 1
recipes/use_cases/end2end-recipes/raft/README.md

@@ -22,7 +22,7 @@ CUDA_VISIBLE_DEVICES=0,1 python -m vllm.entrypoints.openai.api_server  --model m
 Once the server is ready, we can query the server given the port number 8001 in another terminal. Here, "-u" sets the endpoint url to query and "-t" sets the number of questions we ask the Meta Llama3 70B Instruct model to generate per chunk. To use cloud API , please change the endpoint url to the cloud provider and set the api key using "-k". Here since we want to query our local hosted VLLM server, we can use following commend:
 Once the server is ready, we can query the server given the port number 8001 in another terminal. Here, "-u" sets the endpoint url to query and "-t" sets the number of questions we ask the Meta Llama3 70B Instruct model to generate per chunk. To use cloud API , please change the endpoint url to the cloud provider and set the api key using "-k". Here since we want to query our local hosted VLLM server, we can use following commend:
 
 
 ```bash
 ```bash
-python raft.py -u "http://localhost:8001/v1" -k "EMPTY" -t 3
+python raft.py -u "http://localhost:8001/v1" -k "EMPTY" -t 5
 ```
 ```
 
 
 For cloud API key, we can also set it using system environment variables, such as
 For cloud API key, we can also set it using system environment variables, such as

Разлика између датотеке није приказан због своје велике величине
+ 0 - 2
recipes/use_cases/end2end-recipes/raft/data/website_data


+ 0 - 6
recipes/use_cases/end2end-recipes/raft/data_urls.xml

@@ -102,18 +102,12 @@
 <loc>http://raw.githubusercontent.com/meta-llama/llama/main/README.md</loc>
 <loc>http://raw.githubusercontent.com/meta-llama/llama/main/README.md</loc>
 </url>
 </url>
 <url>
 <url>
-<loc>http://raw.githubusercontent.com/meta-llama/llama/main/LICENSE.md</loc>
-</url>
-<url>
 <loc>http://raw.githubusercontent.com/meta-llama/llama3/main/MODEL_CARD.md</loc>
 <loc>http://raw.githubusercontent.com/meta-llama/llama3/main/MODEL_CARD.md</loc>
 </url>
 </url>
 <url>
 <url>
 <loc>http://raw.githubusercontent.com/meta-llama/llama3/main/README.md</loc>
 <loc>http://raw.githubusercontent.com/meta-llama/llama3/main/README.md</loc>
 </url>
 </url>
 <url>
 <url>
-<loc>http://raw.githubusercontent.com/meta-llama/llama3/main/LICENSE.md</loc>
-</url>
-<url>
 <loc>http://raw.githubusercontent.com/meta-llama/codellama/main/MODEL_CARD.md</loc>
 <loc>http://raw.githubusercontent.com/meta-llama/codellama/main/MODEL_CARD.md</loc>
 </url>
 </url>
 <url>
 <url>

+ 7 - 15
recipes/use_cases/end2end-recipes/raft/eval_raft.py

@@ -5,21 +5,14 @@ import evaluate
 import argparse
 import argparse
 from config import load_config
 from config import load_config
 import json
 import json
-from itertools import chain
 from langchain_openai import ChatOpenAI
 from langchain_openai import ChatOpenAI
-
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain_community.document_loaders import DirectoryLoader
 from langchain_community.document_loaders import DirectoryLoader
-from langchain_core.runnables import RunnablePassthrough
-
-from langchain_core.messages import HumanMessage, SystemMessage
 import re
 import re
 import string
 import string
-from collections import Counter
-from langchain_core.output_parsers import StrOutputParser
-from langchain.prompts.prompt import PromptTemplate
+
 
 
 def generate_answers_model_only(model_name,question_list,api_url="http://localhost:8000/v1",key="EMPTY"):
 def generate_answers_model_only(model_name,question_list,api_url="http://localhost:8000/v1",key="EMPTY"):
         # Use langchain to load the documents from data directory
         # Use langchain to load the documents from data directory
@@ -57,7 +50,7 @@ def generate_answers_with_RAG(model_name, question_list,api_config,api_url_overw
     loader = DirectoryLoader(data_dir)
     loader = DirectoryLoader(data_dir)
     docs = loader.load()
     docs = loader.load()
     # Split the document into chunks with a specified chunk size
     # Split the document into chunks with a specified chunk size
-    text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=50)
+    text_splitter = RecursiveCharacterTextSplitter(chunk_size=api_config["chunk_size"], chunk_overlap=int(api_config["chunk_size"]/10))
     all_splits = text_splitter.split_documents(docs)
     all_splits = text_splitter.split_documents(docs)
 
 
     # Store the document into a vector store with a specific embedding model
     # Store the document into a vector store with a specific embedding model
@@ -260,6 +253,7 @@ def main(api_config):
                 fp.write("\n------------------------------------\n")
                 fp.write("\n------------------------------------\n")
         # Now we want to take a closer look at the questions that are not answered the same by all the models.
         # Now we want to take a closer look at the questions that are not answered the same by all the models.
         judge_zip = list(zip(*[item[-1] for item in all_metrics]))
         judge_zip = list(zip(*[item[-1] for item in all_metrics]))
+        model_names = [item[0] for item in all_metrics]
         with open(api_config["output_log"],"a") as fp:
         with open(api_config["output_log"],"a") as fp:
             for item in all_metrics:
             for item in all_metrics:
                 fp.write(f"Model_Name: {item[0]}, LLM_SCORE: {item[1]} \n")
                 fp.write(f"Model_Name: {item[0]}, LLM_SCORE: {item[1]} \n")
@@ -270,12 +264,8 @@ def main(api_config):
                 else:
                 else:
                     fp.write(f"Comparing interested question: {questions[idx]} \n")
                     fp.write(f"Comparing interested question: {questions[idx]} \n")
                     fp.write(f"groud_truth: {groud_truth[idx]} \n")
                     fp.write(f"groud_truth: {groud_truth[idx]} \n")
-                    fp.write(f"{item[2]} Baseline_answers: {generated_answers['Baseline'][idx]} \n")
-                    fp.write(f"{item[3]} Baseline_RAG_answers: {generated_answers['Baseline_RAG'][idx]} \n")
-                    fp.write(f"{item[0]} RAFT_answers: {generated_answers['RAFT'][idx]} \n")
-                    fp.write(f"{item[1]} RAFT_RAG_answers: {generated_answers['RAFT_RAG'][idx]} \n")
-                    fp.write(f"{item[4]} 70B_Base_answers: {generated_answers['70B_Base'][idx]} \n")
-                    fp.write(f"{item[5]} 70B_RAG_answers: {generated_answers['70B_RAG'][idx]} \n")
+                    for i in range(len(model_names)):
+                        fp.write(f"{item[i]} {model_names[i]}_answers: {generated_answers[model_names[i]][idx]} \n")
                     fp.write("-------\n")
                     fp.write("-------\n")
 
 
 
 
@@ -328,6 +318,7 @@ def parse_arguments():
         type=str,
         type=str,
         help="LLM API key for generating question/answer pairs."
         help="LLM API key for generating question/answer pairs."
     )
     )
+    parser.add_argument("--chunk_size", type=int, default=1000, help="The character size of each chunk used in RAG")
     return parser.parse_args()
     return parser.parse_args()
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
@@ -342,6 +333,7 @@ if __name__ == "__main__":
     api_config["judge_endpoint"] = args.judge_endpoint
     api_config["judge_endpoint"] = args.judge_endpoint
     api_config["output_log"] = args.output_log
     api_config["output_log"] = args.output_log
     api_config["api_key"] = args.api_key
     api_config["api_key"] = args.api_key
+    api_config["chunk_size"] = args.chunk_size
     if api_config["judge_endpoint"]:
     if api_config["judge_endpoint"]:
         logging.info(f"Use local vllm service for judge at port: '{args.judge_endpoint}'.")
         logging.info(f"Use local vllm service for judge at port: '{args.judge_endpoint}'.")
     main(api_config)
     main(api_config)

Разлика између датотеке није приказан због своје велике величине
+ 217 - 129
recipes/use_cases/end2end-recipes/raft/evalset.json


+ 2 - 2
recipes/use_cases/end2end-recipes/raft/raft.py

@@ -70,8 +70,8 @@ def parse_arguments():
         type=str,
         type=str,
         help="LLM API key for generating question/answer pairs."
         help="LLM API key for generating question/answer pairs."
     )
     )
-    parser.add_argument("--chunk_size", type=int, default=512, help="The size of each chunk in number of tokens")
-    parser.add_argument("-o","--output", type=str, default="./", help="The path at which to save the dataset")
+    parser.add_argument("--chunk_size", type=int, default=1000, help="The size of each chunk in number of tokens")
+    parser.add_argument("-o","--output", type=str, default="./output/", help="The path at which to save the dataset")
     parser.add_argument("--output-format", type=str, default="hf", help="Format to convert the dataset to. Defaults to hf.", choices=datasetFormats)
     parser.add_argument("--output-format", type=str, default="hf", help="Format to convert the dataset to. Defaults to hf.", choices=datasetFormats)
     parser.add_argument("--output-type", type=str, default="jsonl", help="Type to export the dataset to. Defaults to jsonl.", choices=outputDatasetTypes)
     parser.add_argument("--output-type", type=str, default="jsonl", help="Type to export the dataset to. Defaults to jsonl.", choices=outputDatasetTypes)
     return parser.parse_args()
     return parser.parse_args()

+ 1 - 1
recipes/use_cases/end2end-recipes/raft/raft.yaml

@@ -31,7 +31,7 @@ question_prompt_template: >
 #   4. Never use any abbreviation.
 #   4. Never use any abbreviation.
 #   5. Include only the questions in your response.
 #   5. Include only the questions in your response.
 
 
-data_dir: "/home/kaiwu/work/pytorch/docs"
+data_dir: "./data"
 
 
 xml_path: ""
 xml_path: ""
 
 

+ 7 - 15
recipes/use_cases/end2end-recipes/raft/raft_utils.py

@@ -2,21 +2,15 @@
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 # This software may be used and distributed according to the terms of the Llama 2 Community License Agreement.
 
 
 import os
 import os
-from transformers import  AutoTokenizer
 import logging
 import logging
-import json
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_community.embeddings import HuggingFaceEmbeddings
 from langchain_experimental.text_splitter import SemanticChunker
 from langchain_experimental.text_splitter import SemanticChunker
 from math import ceil
 from math import ceil
-import datasets
-from datasets import Dataset, load_dataset
+from datasets import Dataset
 import random
 import random
 from langchain_community.document_loaders import SitemapLoader,DirectoryLoader
 from langchain_community.document_loaders import SitemapLoader,DirectoryLoader
 from bs4 import BeautifulSoup
 from bs4 import BeautifulSoup
-from langchain_openai import ChatOpenAI
-from langchain_core.messages import HumanMessage, SystemMessage
-from langchain_community.llms import ChatOpenAI
-from langchain_core.prompts import ChatPromptTemplate
+
 from langchain_openai import ChatOpenAI
 from langchain_openai import ChatOpenAI
 
 
 
 
@@ -124,21 +118,19 @@ def generate_questions(api_config):
         logging.info(f"Error reading files, document_text is {len(document_text)}")
         logging.info(f"Error reading files, document_text is {len(document_text)}")
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2",model_kwargs={'device': 'cuda'})
     embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2",model_kwargs={'device': 'cuda'})
     document_batches = get_chunks(document_text,api_config["chunk_size"],embedding_model)
     document_batches = get_chunks(document_text,api_config["chunk_size"],embedding_model)
-
-    batches_count = len(document_batches)
-    total_questions = api_config["questions_per_chunk"] * batches_count
     # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
     # use OpenAI API protocol to hanlde the chat request, including local VLLM openai compatible server
     llm = ChatOpenAI(
     llm = ChatOpenAI(
         openai_api_key=key,
         openai_api_key=key,
         openai_api_base=api_url,
         openai_api_base=api_url,
         model_name=api_config["model"],
         model_name=api_config["model"],
         temperature=0.0,
         temperature=0.0,
-        max_tokens=250
+        max_tokens=500
         )
         )
     all_tasks = [api_config['question_prompt_template'].format(num_questions=str(api_config['questions_per_chunk']),context=document) for document in document_batches]
     all_tasks = [api_config['question_prompt_template'].format(num_questions=str(api_config['questions_per_chunk']),context=document) for document in document_batches]
     generated_answers = llm.batch(all_tasks)
     generated_answers = llm.batch(all_tasks)
+    generated_answers = [ item.content for item in generated_answers]
     if len(generated_answers) == 0:
     if len(generated_answers) == 0:
-        logging.error("No model answers generated. Please check the input context or model configuration in ",model_name)
+        logging.error("No model answers generated. Please check the input context or model configuration in ",api_config["model"])
         return []
         return []
     final_result = []
     final_result = []
     for result in generated_answers:
     for result in generated_answers:
@@ -167,9 +159,10 @@ def generate_COT(chunk_questions_zip,api_config) -> dict:
         openai_api_base=api_config["endpoint_url"],
         openai_api_base=api_config["endpoint_url"],
         model_name=api_config["model"],
         model_name=api_config["model"],
         temperature=0.0,
         temperature=0.0,
-        max_tokens=350
+        max_tokens=500
         )
         )
     generated_answers = llm.batch(all_tasks)
     generated_answers = llm.batch(all_tasks)
+    generated_answers = [ item.content for item in generated_answers]
     COT_results = []
     COT_results = []
     # return a list of (chunk, question, generated_answer)
     # return a list of (chunk, question, generated_answer)
     for (chunk, question),generated_answer in zip(chunk_questions,generated_answers):
     for (chunk, question),generated_answer in zip(chunk_questions,generated_answers):
@@ -186,7 +179,6 @@ def add_chunk_to_dataset(
     """
     """
     Given a chunk and related questions lists, create {Q, A, D} triplets and add them to the dataset.
     Given a chunk and related questions lists, create {Q, A, D} triplets and add them to the dataset.
     """
     """
-    COT_tasks = []
     chunks = [chunk for chunk, _ in chunk_questions_zip]
     chunks = [chunk for chunk, _ in chunk_questions_zip]
     COT_results = generate_COT(chunk_questions_zip,api_config)
     COT_results = generate_COT(chunk_questions_zip,api_config)
     for chunk, q , cot in COT_results:
     for chunk, q , cot in COT_results: