1 年之前 · e84dc568db
--- a/recipes/quickstart/NotebookLlama/1B-chat-start.py
+++ b/recipes/quickstart/NotebookLlama/1B-chat-start.py
@@ -1,96 +0,0 @@
 
				-DEFAULT_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
			
 
				-
			
 
				-import argparse
			
 
				-
			
 
				-import torch
			
 
				-from accelerate import Accelerator
			
 
				-from transformers import AutoModelForCausalLM, AutoTokenizer
			
 
				-
			
 
				-accelerator = Accelerator()
			
 
				-device = accelerator.device
			
 
				-
			
 
				-
			
 
				-def load_model_and_tokenizer(model_name: str):
			
 
				-    """
			
 
				-    Load the model and tokenizer for LLaMA-8b.
			
 
				-    """
			
 
				-    model = AutoModelForCausalLM.from_pretrained(
			
 
				-        model_name,
			
 
				-        torch_dtype=torch.bfloat16,
			
 
				-        use_safetensors=True,
			
 
				-        device_map=device,
			
 
				-    )
			
 
				-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
			
 
				-
			
 
				-    model, tokenizer = accelerator.prepare(model, tokenizer)
			
 
				-    return model, tokenizer
			
 
				-
			
 
				-
			
 
				-def generate_text(model, tokenizer, conversation, temperature: float, top_p: float):
			
 
				-    """
			
 
				-    Generate text using the model and tokenizer based on a conversation.
			
 
				-    """
			
 
				-    prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
			
 
				-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
			
 
				-    output = model.generate(
			
 
				-        **inputs, temperature=temperature, top_p=top_p, max_new_tokens=512
			
 
				-    )
			
 
				-    return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt) :]
			
 
				-
			
 
				-
			
 
				-def main(
			
 
				-    system_message: str,
			
 
				-    user_message: str,
			
 
				-    temperature: float,
			
 
				-    top_p: float,
			
 
				-    model_name: str,
			
 
				-):
			
 
				-    """
			
 
				-    Call all the functions.
			
 
				-    """
			
 
				-    model, tokenizer = load_model_and_tokenizer(model_name)
			
 
				-    conversation = [
			
 
				-        {"role": "system", "content": system_message},
			
 
				-        {"role": "user", "content": user_message},
			
 
				-    ]
			
 
				-    result = generate_text(model, tokenizer, conversation, temperature, top_p)
			
 
				-    print("Generated Text: " + result)
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    parser = argparse.ArgumentParser(
			
 
				-        description="Generate text using the LLaMA-8b model with system and user messages."
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--system_message",
			
 
				-        type=str,
			
 
				-        default="You are a helpful AI assistant.",
			
 
				-        help="System message to set the context (default: 'You are a helpful AI assistant.')",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--user_message", type=str, required=True, help="User message for generation"
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--temperature",
			
 
				-        type=float,
			
 
				-        default=0.7,
			
 
				-        help="Temperature for generation (default: 0.7)",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--top_p", type=float, default=0.9, help="Top p for generation (default: 0.9)"
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--model_name",
			
 
				-        type=str,
			
 
				-        default=DEFAULT_MODEL,
			
 
				-        help=f"Model name (default: '{DEFAULT_MODEL}')",
			
 
				-    )
			
 
				-
			
 
				-    args = parser.parse_args()
			
 
				-    main(
			
 
				-        args.system_message,
			
 
				-        args.user_message,
			
 
				-        args.temperature,
			
 
				-        args.top_p,
			
 
				-        args.model_name,
			
 
				-    )
			
--- a/recipes/quickstart/NotebookLlama/1B-debating-script.py
+++ b/recipes/quickstart/NotebookLlama/1B-debating-script.py
@@ -1,163 +0,0 @@
 
				-import argparse
			
 
				-
			
 
				-import torch
			
 
				-from accelerate import Accelerator
			
 
				-from transformers import AutoModelForCausalLM, AutoTokenizer
			
 
				-
			
 
				-accelerator = Accelerator()
			
 
				-device = accelerator.device
			
 
				-
			
 
				-# Constants
			
 
				-DEFAULT_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
			
 
				-
			
 
				-
			
 
				-def load_model_and_tokenizer(model_name: str):
			
 
				-    """
			
 
				-    Load the model and tokenizer for LLaMA-1B.
			
 
				-    """
			
 
				-    model = AutoModelForCausalLM.from_pretrained(
			
 
				-        model_name,
			
 
				-        torch_dtype=torch.bfloat16,
			
 
				-        use_safetensors=True,
			
 
				-        device_map=device,
			
 
				-    )
			
 
				-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
			
 
				-
			
 
				-    model, tokenizer = accelerator.prepare(model, tokenizer)
			
 
				-    return model, tokenizer
			
 
				-
			
 
				-
			
 
				-def generate_response(model, tokenizer, conversation, temperature: float, top_p: float):
			
 
				-    """
			
 
				-    Generate a response based on the conversation history.
			
 
				-    """
			
 
				-    prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
			
 
				-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
			
 
				-    output = model.generate(
			
 
				-        **inputs, temperature=temperature, top_p=top_p, max_new_tokens=256
			
 
				-    )
			
 
				-    return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt) :].strip()
			
 
				-
			
 
				-
			
 
				-def debate(
			
 
				-    model1,
			
 
				-    model2,
			
 
				-    tokenizer,
			
 
				-    system_prompt1,
			
 
				-    system_prompt2,
			
 
				-    initial_topic,
			
 
				-    n_turns,
			
 
				-    temperature,
			
 
				-    top_p,
			
 
				-):
			
 
				-    """
			
 
				-    Conduct a debate between two models.
			
 
				-    """
			
 
				-    conversation1 = [
			
 
				-        {"role": "system", "content": system_prompt1},
			
 
				-        {"role": "user", "content": f"Let's debate about: {initial_topic}"},
			
 
				-    ]
			
 
				-    conversation2 = [
			
 
				-        {"role": "system", "content": system_prompt2},
			
 
				-        {"role": "user", "content": f"Let's debate about: {initial_topic}"},
			
 
				-    ]
			
 
				-
			
 
				-    for i in range(n_turns):
			
 
				-        print(f"\nTurn {i+1}:")
			
 
				-
			
 
				-        # Model 1's turn
			
 
				-        response1 = generate_response(
			
 
				-            model1, tokenizer, conversation1, temperature, top_p
			
 
				-        )
			
 
				-        print(f"Model 1: {response1}")
			
 
				-        conversation1.append({"role": "assistant", "content": response1})
			
 
				-        conversation2.append({"role": "user", "content": response1})
			
 
				-
			
 
				-        # Model 2's turn
			
 
				-        response2 = generate_response(
			
 
				-            model2, tokenizer, conversation2, temperature, top_p
			
 
				-        )
			
 
				-        print(f"Model 2: {response2}")
			
 
				-        conversation2.append({"role": "assistant", "content": response2})
			
 
				-        conversation1.append({"role": "user", "content": response2})
			
 
				-
			
 
				-
			
 
				-def main(
			
 
				-    system_prompt1: str,
			
 
				-    system_prompt2: str,
			
 
				-    initial_topic: str,
			
 
				-    n_turns: int,
			
 
				-    temperature: float,
			
 
				-    top_p: float,
			
 
				-    model_name: str,
			
 
				-):
			
 
				-    """
			
 
				-    Set up and run the debate.
			
 
				-    """
			
 
				-    model1, tokenizer = load_model_and_tokenizer(model_name)
			
 
				-    model2, _ = load_model_and_tokenizer(model_name)  # We can reuse the tokenizer
			
 
				-
			
 
				-    debate(
			
 
				-        model1,
			
 
				-        model2,
			
 
				-        tokenizer,
			
 
				-        system_prompt1,
			
 
				-        system_prompt2,
			
 
				-        initial_topic,
			
 
				-        n_turns,
			
 
				-        temperature,
			
 
				-        top_p,
			
 
				-    )
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    parser = argparse.ArgumentParser(
			
 
				-        description="Conduct a debate between two LLaMA-1B models."
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--system_prompt1",
			
 
				-        type=str,
			
 
				-        default="You are a passionate advocate for technology and innovation.",
			
 
				-        help="System prompt for the first model (default: 'You are a passionate advocate for technology and innovation.')",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--system_prompt2",
			
 
				-        type=str,
			
 
				-        default="You are a cautious critic of rapid technological change.",
			
 
				-        help="System prompt for the second model (default: 'You are a cautious critic of rapid technological change.')",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--initial_topic", type=str, required=True, help="Initial topic for the debate"
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--n_turns",
			
 
				-        type=int,
			
 
				-        default=5,
			
 
				-        help="Number of turns in the debate (default: 5)",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--temperature",
			
 
				-        type=float,
			
 
				-        default=0.7,
			
 
				-        help="Temperature for generation (default: 0.7)",
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--top_p", type=float, default=0.9, help="Top p for generation (default: 0.9)"
			
 
				-    )
			
 
				-    parser.add_argument(
			
 
				-        "--model_name",
			
 
				-        type=str,
			
 
				-        default=DEFAULT_MODEL,
			
 
				-        help=f"Model name (default: '{DEFAULT_MODEL}')",
			
 
				-    )
			
 
				-
			
 
				-    args = parser.parse_args()
			
 
				-    main(
			
 
				-        args.system_prompt1,
			
 
				-        args.system_prompt2,
			
 
				-        args.initial_topic,
			
 
				-        args.n_turns,
			
 
				-        args.temperature,
			
 
				-        args.top_p,
			
 
				-        args.model_name,
			
 
				-    )
			
--- a/recipes/quickstart/NotebookLlama/README.md
+++ b/recipes/quickstart/NotebookLlama/README.md
@@ -20,6 +20,8 @@ Running Debator: ```python 1B-debating-script.py --initial_topic "The future of
 
				 
			
 
				 ### Scratch-pad/Running Notes:
			
 
				 
			
 
				+So right now there is one issue: Parler needs transformers 4.43.3 or earlier and to generate you need latest, so I am just switching on fly
			
 
				+
			
 
				 Actually this IS THE MOST CONSISTENT PROMPT:
			
 
				 Small:
			
 
				 ```
			
--- a/recipes/quickstart/NotebookLlama/Step-1
+++ b/recipes/quickstart/NotebookLlama/Step-1
--- a/recipes/quickstart/NotebookLlama/Step-2-70B-Rewriter.ipynb
+++ b/recipes/quickstart/NotebookLlama/Step-2-70B-Rewriter.ipynb
@@ -1,728 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				-   "id": "0fccdeda-60db-4ac0-bbb0-98d4d5577a40",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "#!pip install replicate"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 18,
			
 
				-   "id": "69395317-ad78-47b6-a533-2e8a01313e82",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "SYSTEMP_PROMPT = \"\"\"\n",
			
 
				-    "You are the most skilled podcast writer, you have won multiple podcast awards for your writing.\n",
			
 
				-    " \n",
			
 
				-    "Your job is to write word by word, even \"umm, hmmm, right\" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. \n",
			
 
				-    "\n",
			
 
				-    "Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc\n",
			
 
				-    "\n",
			
 
				-    "Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes\n",
			
 
				-    "\n",
			
 
				-    "Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions\n",
			
 
				-    "\n",
			
 
				-    "Make sure the tangents speaker 2 provides are quite wild or interesting. \n",
			
 
				-    "\n",
			
 
				-    "Ensure there are interruptions during explanations or there are \"hmm\" and \"umm\" injected throughout from the second speaker. \n",
			
 
				-    "\n",
			
 
				-    "It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait\n",
			
 
				-    "\"\"\""
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 19,
			
 
				-   "id": "08c30139-ff2f-4203-8194-d1b5c50acac5",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "DEFAULT_MODEL = \"meta-llama/Llama-3.1-70B-Instruct\""
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 20,
			
 
				-   "id": "1641060a-d86d-4137-bbbc-ab05cbb1a888",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# Import necessary libraries\n",
			
 
				-    "import torch\n",
			
 
				-    "from accelerate import Accelerator\n",
			
 
				-    "from transformers import AutoModelForCausalLM, AutoTokenizer\n",
			
 
				-    "\n",
			
 
				-    "from tqdm.notebook import tqdm\n",
			
 
				-    "import warnings\n",
			
 
				-    "\n",
			
 
				-    "warnings.filterwarnings('ignore')"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 21,
			
 
				-   "id": "522fbf7f-8c00-412c-90c7-5cfe2fc94e4c",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "def read_file_to_string(filename):\n",
			
 
				-    "    try:\n",
			
 
				-    "        with open(filename, 'r') as file:\n",
			
 
				-    "            content = file.read()\n",
			
 
				-    "        return content\n",
			
 
				-    "    except FileNotFoundError:\n",
			
 
				-    "        print(f\"Error: File '{filename}' not found.\")\n",
			
 
				-    "        return None\n",
			
 
				-    "    except IOError:\n",
			
 
				-    "        print(f\"Error: Could not read file '{filename}'.\")\n",
			
 
				-    "        return None"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 26,
			
 
				-   "id": "8119803c-18f9-47cb-b719-2b34ccc5cc41",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "INPUT_PROMPT = read_file_to_string('./clean_extracted_text.txt')"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 27,
			
 
				-   "id": "d895ed4f-1f3e-48b4-b7e2-b51d214fd6fb",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "conversation = [\n",
			
 
				-    "        {\"role\": \"system\", \"content\": SYSTEMP_PROMPT},\n",
			
 
				-    "        {\"role\": \"user\", \"content\": INPUT_PROMPT},\n",
			
 
				-    "    ]"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 25,
			
 
				-   "id": "e9753245-dfd8-4eb4-b1f4-219723884d9f",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "3ee94e15d1a04e88a6f5ebff149e2e98",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "config.json:   0%|          | 0.00/855 [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "8522222de2eb4877a6a2087cc05ad130",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model.safetensors.index.json:   0%|          | 0.00/59.6k [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "0852ae52bfef44c1bc487e7f0951826f",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "Downloading shards:   0%|          | 0/30 [00:00<?, ?it/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "337fe09b152d4d8fb90a579a7b22554d",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00001-of-00030.safetensors:   0%|          | 0.00/4.58G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "9dd0369f649247d89714134fca62deea",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00002-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "2e428d6b4af540d7b6eea54e6595f55c",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00003-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "d2a2b3878e014d269d80509cc4e4edec",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00004-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "8b5a80f517e64d1d9e10cefa72a44a5f",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00005-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "42f21ca77fe340228f5e58ca0a479750",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00006-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "b35ef602eed74020b44c06f8c3f829b6",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00007-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "3f94a8721bf44c61aefd38975c8ed29e",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00008-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "8abe061e491a40cdb3c42bf07a73c645",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00009-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "b473d57e0fe64cc9a77b3516fdbfed70",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00010-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "e3c0a69348074c62a1a2275fb261daf4",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00011-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "a1980c5050dd4756975bce80e3a9ec06",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00012-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "1893e6e54598498f8a8765614d93bfe5",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00013-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "1ca56f17fa3b4c9ca25aaf998c0fcf34",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00014-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "82e40953ca7849069d2c6abf48f7da05",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00015-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "abada8dbafd14afcb725e7d5e82636d0",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00016-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "1418a6bc716d4ede964bee4d5e08ad6c",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00017-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "cc5968b03fa64c17bcaa0b83ecd193cc",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00018-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "03cf0008853a413fbefdb7469f4238d7",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00019-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "3477ef3422434df49e633fb46b7bd7b1",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00020-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "28d2912323bf4f1cb29883655494f56e",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00021-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "04cf278b908f470797c3d23072ec96b5",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00022-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "242b1b426d8043c89ab2068f65fd22d2",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00023-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "068e90118c17466c998732d2e9262c01",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00024-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "7edb872c32a54b87b7d332380e52609f",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00025-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "6ae0395a5a2c427c99be982f1e4219ee",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00026-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "b61147d350de4664bb4b9503fff64c24",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00027-of-00030.safetensors:   0%|          | 0.00/4.66G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "3ce42250ba79420ea919d13ad810aeef",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00028-of-00030.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "53255416182b4eefbc8bed8c10ba7fe2",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00029-of-00030.safetensors:   0%|          | 0.00/4.97G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "ebf8e0ed2f4d4d64964643723e70b745",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "model-00030-of-00030.safetensors:   0%|          | 0.00/2.10G [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "5491aa5c1fc44bfab0265439207d4c2d",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "ce6476509463415e9d4218aee9ff9d49",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "generation_config.json:   0%|          | 0.00/183 [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "449bf99c0da248c99594d903482a967a",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "tokenizer_config.json:   0%|          | 0.00/55.4k [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "a9d4713519db420d85391c8e8f7d9628",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "tokenizer.json:   0%|          | 0.00/9.09M [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "d24adab709fc44d58cf9e952c2fd7f63",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "special_tokens_map.json:   0%|          | 0.00/296 [00:00<?, ?B/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "accelerator = Accelerator()\n",
			
 
				-    "model = AutoModelForCausalLM.from_pretrained(\n",
			
 
				-    "    DEFAULT_MODEL,\n",
			
 
				-    "    torch_dtype=torch.bfloat16,\n",
			
 
				-    "    use_safetensors=True,\n",
			
 
				-    "    device_map=\"auto\",\n",
			
 
				-    ")\n",
			
 
				-    "tokenizer = AutoTokenizer.from_pretrained(DEFAULT_MODEL, use_safetensors=True)\n",
			
 
				-    "model, tokenizer = accelerator.prepare(model, tokenizer)"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 29,
			
 
				-   "id": "662b3567-1fe4-4744-a673-e0f871f4fe9a",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "prompt = tokenizer.apply_chat_template(conversation, tokenize=False)\n",
			
 
				-    "inputs = tokenizer(prompt, return_tensors=\"pt\")"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "13c51b1c-af72-4a30-99e2-e559b052aaeb",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "with torch.no_grad():\n",
			
 
				-    "    output = model.generate(\n",
			
 
				-    "        **inputs,\n",
			
 
				-    "        temperature=0.7,\n",
			
 
				-    "        top_p=0.9,\n",
			
 
				-    "        max_new_tokens=8126\n",
			
 
				-    "    )\n",
			
 
				-    "\n",
			
 
				-    "output = tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt):].strip()"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "41c83f2a-d0dc-4962-8fe7-cd187a8cb006",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "Python 3 (ipykernel)",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.11.10"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 5
			
 
				-}
			
--- a/recipes/quickstart/NotebookLlama/Step-2-8B-Rewriter.ipynb
+++ b/recipes/quickstart/NotebookLlama/Step-2-8B-Rewriter.ipynb
@@ -1,201 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 10,
			
 
				-   "id": "0fccdeda-60db-4ac0-bbb0-98d4d5577a40",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "#!pip install replicate"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 1,
			
 
				-   "id": "69395317-ad78-47b6-a533-2e8a01313e82",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "SYSTEMP_PROMPT = \"\"\"\n",
			
 
				-    "You are the most skilled podcast writer, you have won multiple podcast awards for your writing.\n",
			
 
				-    " \n",
			
 
				-    "Your job is to write word by word, even \"umm, hmmm, right\" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. \n",
			
 
				-    "\n",
			
 
				-    "Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc\n",
			
 
				-    "\n",
			
 
				-    "Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes\n",
			
 
				-    "\n",
			
 
				-    "Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions\n",
			
 
				-    "\n",
			
 
				-    "Make sure the tangents speaker 2 provides are quite wild or interesting. \n",
			
 
				-    "\n",
			
 
				-    "Ensure there are interruptions during explanations or there are \"hmm\" and \"umm\" injected throughout from the second speaker. \n",
			
 
				-    "\n",
			
 
				-    "It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait\n",
			
 
				-    "\"\"\""
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 2,
			
 
				-   "id": "08c30139-ff2f-4203-8194-d1b5c50acac5",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "MODEL = \"meta-llama/Llama-3.1-70B-Instruct\""
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 3,
			
 
				-   "id": "1641060a-d86d-4137-bbbc-ab05cbb1a888",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "# Import necessary libraries\n",
			
 
				-    "import torch\n",
			
 
				-    "from accelerate import Accelerator\n",
			
 
				-    "import transformers\n",
			
 
				-    "\n",
			
 
				-    "from tqdm.notebook import tqdm\n",
			
 
				-    "import warnings\n",
			
 
				-    "\n",
			
 
				-    "warnings.filterwarnings('ignore')"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 4,
			
 
				-   "id": "522fbf7f-8c00-412c-90c7-5cfe2fc94e4c",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "def read_file_to_string(filename):\n",
			
 
				-    "    # Try UTF-8 first (most common encoding for text files)\n",
			
 
				-    "    try:\n",
			
 
				-    "        with open(filename, 'r', encoding='utf-8') as file:\n",
			
 
				-    "            content = file.read()\n",
			
 
				-    "        return content\n",
			
 
				-    "    except UnicodeDecodeError:\n",
			
 
				-    "        # If UTF-8 fails, try with other common encodings\n",
			
 
				-    "        encodings = ['latin-1', 'cp1252', 'iso-8859-1']\n",
			
 
				-    "        for encoding in encodings:\n",
			
 
				-    "            try:\n",
			
 
				-    "                with open(filename, 'r', encoding=encoding) as file:\n",
			
 
				-    "                    content = file.read()\n",
			
 
				-    "                print(f\"Successfully read file using {encoding} encoding.\")\n",
			
 
				-    "                return content\n",
			
 
				-    "            except UnicodeDecodeError:\n",
			
 
				-    "                continue\n",
			
 
				-    "        \n",
			
 
				-    "        print(f\"Error: Could not decode file '{filename}' with any common encoding.\")\n",
			
 
				-    "        return None\n",
			
 
				-    "    except FileNotFoundError:\n",
			
 
				-    "        print(f\"Error: File '{filename}' not found.\")\n",
			
 
				-    "        return None\n",
			
 
				-    "    except IOError:\n",
			
 
				-    "        print(f\"Error: Could not read file '{filename}'.\")\n",
			
 
				-    "        return None"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": 5,
			
 
				-   "id": "8119803c-18f9-47cb-b719-2b34ccc5cc41",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "INPUT_PROMPT = read_file_to_string('./clean_extracted_text.txt')"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "8915d017-2eab-4256-943c-1f15d937d5dc",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [
			
 
				-    {
			
 
				-     "data": {
			
 
				-      "application/vnd.jupyter.widget-view+json": {
			
 
				-       "model_id": "8cc8fa060f45418a87375a6e60913b2d",
			
 
				-       "version_major": 2,
			
 
				-       "version_minor": 0
			
 
				-      },
			
 
				-      "text/plain": [
			
 
				-       "Loading checkpoint shards:   0%|          | 0/4 [00:00<?, ?it/s]"
			
 
				-      ]
			
 
				-     },
			
 
				-     "metadata": {},
			
 
				-     "output_type": "display_data"
			
 
				-    },
			
 
				-    {
			
 
				-     "name": "stderr",
			
 
				-     "output_type": "stream",
			
 
				-     "text": [
			
 
				-      "Setting `pad_token_id` to `eos_token_id`:None for open-end generation.\n",
			
 
				-      "Starting from v4.46, the `logits` model output will have the same type as the model (except at train time, where it will always be FP32)\n"
			
 
				-     ]
			
 
				-    }
			
 
				-   ],
			
 
				-   "source": [
			
 
				-    "=pipeline = transformers.pipeline(\n",
			
 
				-    "    \"text-generation\",\n",
			
 
				-    "    model=MODEL,\n",
			
 
				-    "    model_kwargs={\"torch_dtype\": torch.bfloat16},\n",
			
 
				-    "    device_map=\"cuda:7\",\n",
			
 
				-    ")\n",
			
 
				-    "\n",
			
 
				-    "messages = [\n",
			
 
				-    "    {\"role\": \"system\", \"content\": SYSTEMP_PROMPT},\n",
			
 
				-    "    {\"role\": \"user\", \"content\": INPUT_PROMPT},\n",
			
 
				-    "]\n",
			
 
				-    "\n",
			
 
				-    "outputs = pipeline(\n",
			
 
				-    "    messages,\n",
			
 
				-    "    max_new_tokens=8126,\n",
			
 
				-    "    temperature=1,\n",
			
 
				-    ")"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "606ceb10-4f3e-44bb-9277-9bbe3eefd09c",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "print(outputs[0][\"generated_text\"][-1])"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "2130b683-be37-4dae-999b-84eff15c687d",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": []
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "Python 3 (ipykernel)",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.11.10"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 5
			
 
				-}
			
--- a/recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb
+++ b/recipes/quickstart/NotebookLlama/Step-2-Transcript-Writer.ipynb
--- a/recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb
+++ b/recipes/quickstart/NotebookLlama/Step-3-Re-Writer.ipynb
--- a/recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb
+++ b/recipes/quickstart/NotebookLlama/Step-4-TTS-Workflow.ipynb
--- a/recipes/quickstart/NotebookLlama/gradio-app.py
+++ b/recipes/quickstart/NotebookLlama/gradio-app.py
@@ -1,131 +0,0 @@
 
				-import gradio as gr
			
 
				-import torch
			
 
				-from accelerate import Accelerator
			
 
				-from transformers import AutoModelForCausalLM, AutoTokenizer
			
 
				-
			
 
				-accelerator = Accelerator()
			
 
				-device = accelerator.device
			
 
				-
			
 
				-# Constants
			
 
				-DEFAULT_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
			
 
				-
			
 
				-
			
 
				-def load_model_and_tokenizer(model_name: str):
			
 
				-    model = AutoModelForCausalLM.from_pretrained(
			
 
				-        model_name,
			
 
				-        torch_dtype=torch.bfloat16,
			
 
				-        use_safetensors=True,
			
 
				-        device_map=device,
			
 
				-    )
			
 
				-    tokenizer = AutoTokenizer.from_pretrained(model_name, use_safetensors=True)
			
 
				-
			
 
				-    model, tokenizer = accelerator.prepare(model, tokenizer)
			
 
				-    return model, tokenizer
			
 
				-
			
 
				-
			
 
				-def generate_response(model, tokenizer, conversation, temperature: float, top_p: float):
			
 
				-    prompt = tokenizer.apply_chat_template(conversation, tokenize=False)
			
 
				-    inputs = tokenizer(prompt, return_tensors="pt").to(device)
			
 
				-    output = model.generate(
			
 
				-        **inputs, temperature=temperature, top_p=top_p, max_new_tokens=256
			
 
				-    )
			
 
				-    return tokenizer.decode(output[0], skip_special_tokens=True)[len(prompt) :].strip()
			
 
				-
			
 
				-
			
 
				-def debate(
			
 
				-    model1,
			
 
				-    model2,
			
 
				-    tokenizer,
			
 
				-    system_prompt1,
			
 
				-    system_prompt2,
			
 
				-    initial_topic,
			
 
				-    n_turns,
			
 
				-    temperature,
			
 
				-    top_p,
			
 
				-):
			
 
				-    conversation1 = [
			
 
				-        {"role": "system", "content": system_prompt1},
			
 
				-        {"role": "user", "content": f"Let's debate about: {initial_topic}"},
			
 
				-    ]
			
 
				-    conversation2 = [
			
 
				-        {"role": "system", "content": system_prompt2},
			
 
				-        {"role": "user", "content": f"Let's debate about: {initial_topic}"},
			
 
				-    ]
			
 
				-
			
 
				-    debate_history = []
			
 
				-
			
 
				-    for i in range(n_turns):
			
 
				-        # Model 1's turn
			
 
				-        response1 = generate_response(
			
 
				-            model1, tokenizer, conversation1, temperature, top_p
			
 
				-        )
			
 
				-        debate_history.append(f"Model 1: {response1}")
			
 
				-        conversation1.append({"role": "assistant", "content": response1})
			
 
				-        conversation2.append({"role": "user", "content": response1})
			
 
				-        yield "\n".join(debate_history)
			
 
				-
			
 
				-        # Model 2's turn
			
 
				-        response2 = generate_response(
			
 
				-            model2, tokenizer, conversation2, temperature, top_p
			
 
				-        )
			
 
				-        debate_history.append(f"Model 2: {response2}")
			
 
				-        conversation2.append({"role": "assistant", "content": response2})
			
 
				-        conversation1.append({"role": "user", "content": response2})
			
 
				-        yield "\n".join(debate_history)
			
 
				-
			
 
				-
			
 
				-def create_gradio_interface():
			
 
				-    model1, tokenizer = load_model_and_tokenizer(DEFAULT_MODEL)
			
 
				-    model2, _ = load_model_and_tokenizer(DEFAULT_MODEL)  # We can reuse the tokenizer
			
 
				-
			
 
				-    def gradio_debate(
			
 
				-        system_prompt1, system_prompt2, initial_topic, n_turns, temperature, top_p
			
 
				-    ):
			
 
				-        debate_generator = debate(
			
 
				-            model1,
			
 
				-            model2,
			
 
				-            tokenizer,
			
 
				-            system_prompt1,
			
 
				-            system_prompt2,
			
 
				-            initial_topic,
			
 
				-            n_turns,
			
 
				-            temperature,
			
 
				-            top_p,
			
 
				-        )
			
 
				-        debate_text = ""
			
 
				-        for turn in debate_generator:
			
 
				-            debate_text = turn
			
 
				-            yield debate_text
			
 
				-
			
 
				-    iface = gr.Interface(
			
 
				-        fn=gradio_debate,
			
 
				-        inputs=[
			
 
				-            gr.Textbox(
			
 
				-                label="System Prompt 1",
			
 
				-                value="You are a passionate advocate for technology and innovation.",
			
 
				-            ),
			
 
				-            gr.Textbox(
			
 
				-                label="System Prompt 2",
			
 
				-                value="You are a cautious critic of rapid technological change.",
			
 
				-            ),
			
 
				-            gr.Textbox(
			
 
				-                label="Initial Topic",
			
 
				-                value="The impact of artificial intelligence on society",
			
 
				-            ),
			
 
				-            gr.Slider(minimum=1, maximum=10, step=1, label="Number of Turns", value=5),
			
 
				-            gr.Slider(
			
 
				-                minimum=0.1, maximum=1.0, step=0.1, label="Temperature", value=0.7
			
 
				-            ),
			
 
				-            gr.Slider(minimum=0.1, maximum=1.0, step=0.1, label="Top P", value=0.9),
			
 
				-        ],
			
 
				-        outputs=gr.Textbox(label="Debate", lines=20),
			
 
				-        title="LLaMA 1B Model Debate",
			
 
				-        description="Watch two LLaMA 1B models debate on a topic of your choice!",
			
 
				-        live=False,  # Changed to False to prevent auto-updates
			
 
				-    )
			
 
				-    return iface
			
 
				-
			
 
				-
			
 
				-if __name__ == "__main__":
			
 
				-    iface = create_gradio_interface()
			
 
				-    iface.launch(share=True)