il y a 1 an · a46d0422cf
--- a/recipes/responsible_ai/llama_guard/README.md
+++ b/recipes/responsible_ai/llama_guard/README.md
@@ -67,3 +67,6 @@ In this case, the default categories are applied by the tokenizer, using the `ap
 
				 Use this command for testing with a quantized Llama model, modifying the values accordingly:
			
 
				 
			
 
				 `python examples/inference.py --model_name <path_to_regular_llama_model> --prompt_file <path_to_prompt_file> --quantization 8bit --enable_llamaguard_content_safety`
			
 
				+
			
 
				+## Llama Guard 3 Finetuning & Customization
			
 
				+The safety categories in Llama Guard 3 can be tuned for specific application needs. Existing categories can be removed and new categories can be added to the taxonomy. The [Llama Guard Customization](./llama_guard_customization_via_prompting_changes_and_fine_tuning.ipynb) notebook walks through the process.
			
--- a/recipes/responsible_ai/llama_guard/inference.py
+++ b/recipes/responsible_ai/llama_guard/inference.py
@@ -60,7 +60,7 @@ def main(
 
				 
			
 
				         input = tokenizer([formatted_prompt], return_tensors="pt").to("cuda")
			
 
				         prompt_len = input["input_ids"].shape[-1]
			
 
				-        output = model.generate(**input, max_new_tokens=100, pad_token_id=0, eos_token_id=128009)
			
 
				+        output = model.generate(**input, max_new_tokens=100, pad_token_id=0)
			
 
				         results = tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True)