10 mesiacov pred · 0adabb243a
--- a/3p-integrations/using_externally_hosted_llms.ipynb
+++ b/3p-integrations/using_externally_hosted_llms.ipynb
@@ -12,7 +12,7 @@
 
				    "metadata": {},
			
 
				    "source": [
			
 
				     "# **Using externally-hosted LLMs**\n",
			
 
				-    "Use llama_recipes.inference.llm to perform inference using Llama and other models using third party services. At the moment, three services have been incorporated:\n",
			
 
				+    "Use llama_cookbook.inference.llm to perform inference using Llama and other models using third party services. At the moment, three services have been incorporated:\n",
			
 
				     "- Together.ai\n",
			
 
				     "- Anyscale\n",
			
 
				     "- OpenAI\n",
			
--- a/getting-started/finetuning/quickstart_peft_finetuning.ipynb
+++ b/getting-started/finetuning/quickstart_peft_finetuning.ipynb
@@ -248,7 +248,7 @@
 
				    "source": [
			
 
				     "from peft import get_peft_model, prepare_model_for_kbit_training, LoraConfig\n",
			
 
				     "from dataclasses import asdict\n",
			
 
				-    "from llama_recipes.configs import lora_config as LORA_CONFIG\n",
			
 
				+    "from llama_cookbook.configs import lora_config as LORA_CONFIG\n",
			
 
				     "\n",
			
 
				     "lora_config = LORA_CONFIG()\n",
			
 
				     "lora_config.r = 8\n",
			
--- a/src/llama_cookbook/configs/datasets.py
+++ b/src/llama_cookbook/configs/datasets.py
@@ -14,8 +14,8 @@ class samsum_dataset:
 
				 @dataclass
			
 
				 class grammar_dataset:
			
 
				     dataset: str = "grammar_dataset"
			
 
				-    train_split: str = "src/llama_recipes/datasets/grammar_dataset/gtrain_10k.csv"
			
 
				-    test_split: str = "src/llama_recipes/datasets/grammar_dataset/grammar_validation.csv"
			
 
				+    train_split: str = "src/llama_cookbook/datasets/grammar_dataset/gtrain_10k.csv"
			
 
				+    test_split: str = "src/llama_cookbook/datasets/grammar_dataset/grammar_validation.csv"
			
 
				 
			
 
				 
			
 
				 @dataclass
			
@@ -23,7 +23,7 @@ class alpaca_dataset:
 
				     dataset: str = "alpaca_dataset"
			
 
				     train_split: str = "train"
			
 
				     test_split: str = "val"
			
 
				-    data_path: str = "src/llama_recipes/datasets/alpaca_data.json"
			
 
				+    data_path: str = "src/llama_cookbook/datasets/alpaca_data.json"
			
 
				 
			
 
				 @dataclass
			
 
				 class custom_dataset:
			
@@ -32,7 +32,7 @@ class custom_dataset:
 
				     train_split: str = "train"
			
 
				     test_split: str = "validation"
			
 
				     data_path: str = ""
			
 
				-    
			
 
				+
			
 
				 @dataclass
			
 
				 class llamaguard_toxicchat_dataset:
			
 
				     dataset: str = "llamaguard_toxicchat_dataset"
			
--- a/src/llama_cookbook/configs/wandb.py
+++ b/src/llama_cookbook/configs/wandb.py
@@ -6,10 +6,10 @@ from dataclasses import dataclass, field
 
				 
			
 
				 @dataclass
			
 
				 class wandb_config:
			
 
				-    project: str = 'llama_recipes' # wandb project name
			
 
				+    project: str = 'llama_cookbook' # wandb project name
			
 
				     entity: Optional[str] = None # wandb entity name
			
 
				     job_type: Optional[str] = None
			
 
				     tags: Optional[List[str]] = None
			
 
				     group: Optional[str] = None
			
 
				     notes: Optional[str] = None
			
 
				-    mode: Optional[str] = None
			
 
				+    mode: Optional[str] = None
			
--- a/src/llama_cookbook/finetuning.py
+++ b/src/llama_cookbook/finetuning.py
@@ -74,7 +74,7 @@ def setup_wandb(train_config, fsdp_config, **kwargs):
 
				             "You are trying to use wandb which is not currently installed. "
			
 
				             "Please install it using pip install wandb"
			
 
				         )
			
 
				-    from llama_recipes.configs import wandb_config as WANDB_CONFIG
			
 
				+    from llama_cookbook.configs import wandb_config as WANDB_CONFIG
			
 
				 
			
 
				     wandb_config = WANDB_CONFIG()
			
 
				     update_config(wandb_config, **kwargs)
			
@@ -196,7 +196,7 @@ def main(**kwargs):
 
				         model.resize_token_embeddings(len(tokenizer))
			
 
				 
			
 
				     print_model_size(model, train_config, rank if train_config.enable_fsdp else 0)
			
 
				-    
			
 
				+
			
 
				     # Convert the model to bfloat16 if fsdp and pure_bf16 is enabled
			
 
				     if (
			
 
				         train_config.enable_fsdp
			
@@ -239,12 +239,12 @@ def main(**kwargs):
 
				             freeze_transformer_layers(model, train_config.num_freeze_layers)
			
 
				             # print model size and frozen layers after freezing layers
			
 
				             print_frozen_model_status(model, train_config, rank if train_config.enable_fsdp else 0)
			
 
				-            
			
 
				+
			
 
				         if not train_config.use_peft and train_config.freeze_LLM_only and config.model_type == "mllama":
			
 
				             freeze_LLM_only(model)
			
 
				             # print model size and frozen layers after freezing layers
			
 
				             print_frozen_model_status(model, train_config, rank if train_config.enable_fsdp else 0)
			
 
				-        
			
 
				+
			
 
				         mixed_precision_policy, wrapping_policy = get_policies(fsdp_config, rank)
			
 
				         # Create the FSDP wrapper for MllamaSelfAttentionDecoderLayer,MllamaCrossAttentionDecoderLayer,MllamaVisionEncoderLayer in vision models
			
 
				         if is_vision:
			
@@ -264,7 +264,7 @@ def main(**kwargs):
 
				             device_id = torch.xpu.current_device()
			
 
				         elif torch.cuda.is_available():
			
 
				             device_id = torch.cuda.current_device()
			
 
				-        
			
 
				+
			
 
				         if train_config.freeze_LLM_only:
			
 
				             use_orig_params = True
			
 
				         else:
			
@@ -312,7 +312,7 @@ def main(**kwargs):
 
				         dataset_processer = processor
			
 
				     else:
			
 
				         dataset_processer = tokenizer
			
 
				-    
			
 
				+
			
 
				     # Load and preprocess the dataset for training and validation
			
 
				 
			
 
				     dataset_train = get_preprocessed_dataset(
			
--- a/src/llama_cookbook/inference/safety_utils.py
+++ b/src/llama_cookbook/inference/safety_utils.py
@@ -24,7 +24,7 @@ class AuditNLGSensitiveTopics(object):
 
				         except ImportError as e:
			
 
				             print("Could not import optional dependency: auditnlg\nPlease install manually with:\n pip install auditnlg\nFollowed by:\npip install -r requirements.txt")
			
 
				             raise e
			
 
				-            
			
 
				+
			
 
				 
			
 
				         data = [{"output": output_text}]
			
 
				 
			
@@ -37,8 +37,8 @@ class AuditNLGSensitiveTopics(object):
 
				             report += "|" + "|".join(f"{n:^10}" for n in [list(k.keys())[0] for k in scores["class_scores"]]) + "|\n"
			
 
				             report += "|" + "|".join(f"{n:^10.5}" for n in [list(k.values())[0] for k in scores["class_scores"]]) + "|\n"
			
 
				         return "Sensitive Topics", is_safe, report
			
 
				-    
			
 
				-    
			
 
				+
			
 
				+
			
 
				 class SalesforceSafetyChecker(object):
			
 
				     def __init__(self):
			
 
				         pass
			
@@ -47,13 +47,13 @@ class SalesforceSafetyChecker(object):
 
				         from transformers import AutoModelForSeq2SeqLM, AutoTokenizer, AutoConfig
			
 
				 
			
 
				         config = AutoConfig.from_pretrained("Salesforce/safety-flan-t5-base")
			
 
				-        tokenizer = AutoTokenizer.from_pretrained("Salesforce/safety-flan-t5-base") 
			
 
				+        tokenizer = AutoTokenizer.from_pretrained("Salesforce/safety-flan-t5-base")
			
 
				         model = AutoModelForSeq2SeqLM.from_pretrained("Salesforce/safety-flan-t5-base", config=config)
			
 
				         model.eval()
			
 
				 
			
 
				         prefix = "Is the <Text> field safe or unsafe?"
			
 
				         input_ids = tokenizer(prefix + " <Text> " + output_text + " <Context> ", return_tensors="pt").input_ids
			
 
				-        
			
 
				+
			
 
				         if len(input_ids[0]) > 512:
			
 
				             warnings.warn(
			
 
				                 "Input length is > 512 token. Safety check result could be incorrect."
			
@@ -66,9 +66,9 @@ class SalesforceSafetyChecker(object):
 
				                 return_dict_in_generate=True,
			
 
				                 max_new_tokens=20,
			
 
				                 )
			
 
				-        
			
 
				-        is_safe = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split(" ")[0] == "safe"    
			
 
				-            
			
 
				+
			
 
				+        is_safe = tokenizer.decode(outputs.sequences[0], skip_special_tokens=True).split(" ")[0] == "safe"
			
 
				+
			
 
				         report = ""
			
 
				         if not is_safe:
			
 
				             true_false_ids = tokenizer("true false").input_ids[:2]
			
@@ -76,11 +76,11 @@ class SalesforceSafetyChecker(object):
 
				             scores = {}
			
 
				             for k, i in zip(keys, range(3,20,2)):
			
 
				                 scores[k] = round(outputs.scores[i][0,true_false_ids].softmax(dim=0)[0].item(), 5)
			
 
				-            
			
 
				+
			
 
				             report += "|" + "|".join(f"{n:^10}" for n in scores.keys()) + "|\n"
			
 
				             report += "|" + "|".join(f"{n:^10}" for n in scores.values()) + "|\n"
			
 
				         return "Salesforce Content Safety Flan T5 Base", is_safe, report
			
 
				-        
			
 
				+
			
 
				 
			
 
				     def get_total_length(self, data):
			
 
				         prefix = "Is the <Text> field safe or unsafe "
			
@@ -158,7 +158,7 @@ class LlamaGuardSafetyChecker(object):
 
				 
			
 
				     def __init__(self):
			
 
				         from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
			
 
				-        from llama_recipes.inference.prompt_format_utils import build_default_prompt, create_conversation, LlamaGuardVersion
			
 
				+        from llama_cookbook.inference.prompt_format_utils import build_default_prompt, create_conversation, LlamaGuardVersion
			
 
				 
			
 
				         model_id = "meta-llama/Llama-Guard-3-8B"
			
 
				 
			
@@ -168,7 +168,7 @@ class LlamaGuardSafetyChecker(object):
 
				         self.model = AutoModelForCausalLM.from_pretrained(model_id, quantization_config=quantization_config, device_map="auto")
			
 
				 
			
 
				     def __call__(self, output_text, **kwargs):
			
 
				-        
			
 
				+
			
 
				         agent_type = kwargs.get('agent_type', AgentType.USER)
			
 
				         user_prompt = kwargs.get('user_prompt', "")
			
 
				 
			
@@ -194,14 +194,14 @@ class LlamaGuardSafetyChecker(object):
 
				         prompt_len = input_ids.shape[-1]
			
 
				         output = self.model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0)
			
 
				         result = self.tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)
			
 
				-        
			
 
				+
			
 
				         splitted_result = result.split("\n")[0];
			
 
				-        is_safe = splitted_result == "safe"    
			
 
				+        is_safe = splitted_result == "safe"
			
 
				 
			
 
				         report = result
			
 
				-        
			
 
				+
			
 
				         return "Llama Guard", is_safe, report
			
 
				-        
			
 
				+
			
 
				 
			
 
				 # Function to load the PeftModel for performance optimization
			
 
				 # Function to determine which safety checker to use based on the options selected
			
--- a/src/llama_cookbook/tools/README.md
+++ b/src/llama_cookbook/tools/README.md
@@ -7,7 +7,7 @@ This is the reverse conversion for `convert_llama_weights_to_hf.py` script from
 
				 - Copy file params.json from the official llama download into that directory.
			
 
				 - Run the conversion script. `model-path` can be a Hugging Face hub model or a local hf model directory.
			
 
				 ```
			
 
				-python -m llama_recipes.tools.convert_hf_weights_to_llama --model-path meta-llama/Meta-Llama-3.1-70B-Instruct --output-dir test70B --model-size 70B
			
 
				+python -m llama_cookbook.tools.convert_hf_weights_to_llama --model-path meta-llama/Meta-Llama-3.1-70B-Instruct --output-dir test70B --model-size 70B
			
 
				 ```
			
 
				 
			
 
				 ## Step 1: Run inference