1 год назад · db25505bf2
--- a/getting-started/build_with_Llama_4_omni.ipynb
+++ b/getting-started/build_with_Llama_4_omni.ipynb
@@ -1,186 +0,0 @@
 
				-{
			
 
				- "cells": [
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "e0f13c60",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "import time"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "33ab0bb0-a56d-41fb-a7ed-35702b393b24",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "import torch\n",
			
 
				-    "from transformers import AutoProcessor, Llama4ForConditionalGeneration\n",
			
 
				-    "\n",
			
 
				-    "model_id = \"ll-re/Llama-4-Scout-17B-16E-Instruct\"\n",
			
 
				-    "processor = AutoProcessor.from_pretrained(model_id)\n",
			
 
				-    "model = Llama4ForConditionalGeneration.from_pretrained(\n",
			
 
				-    "    model_id,\n",
			
 
				-    "    # attn_implementation=\"sdpa\",\n",
			
 
				-    "    attn_implementation=\"flex_attention\",\n",
			
 
				-    "    device_map=\"auto\",\n",
			
 
				-    "    torch_dtype=torch.bfloat16,\n",
			
 
				-    ")\n",
			
 
				-    "\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "5336aec0",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "messages = [\n",
			
 
				-    "    {\"role\": \"user\", \"content\": \"Who are you?\"},\n",
			
 
				-    "]\n",
			
 
				-    "inputs = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\", return_dict=True)\n",
			
 
				-    "\n",
			
 
				-    "\n",
			
 
				-    "outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)\n",
			
 
				-    "outputs = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])\n",
			
 
				-    "print(outputs[0])"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "9ee71bac",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "img_url = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg\"\n",
			
 
				-    "messages = [\n",
			
 
				-    "    {\n",
			
 
				-    "        \"role\": \"user\",\n",
			
 
				-    "        \"content\": [\n",
			
 
				-    "            {\"type\": \"image\", \"url\": img_url},\n",
			
 
				-    "            {\"type\": \"text\", \"text\": \"Describe this image in two sentences.\"},\n",
			
 
				-    "        ]\n",
			
 
				-    "    },\n",
			
 
				-    "]\n",
			
 
				-    "\n",
			
 
				-    "inputs = processor.apply_chat_template(\n",
			
 
				-    "    messages,\n",
			
 
				-    "    add_generation_prompt=True,\n",
			
 
				-    "    tokenize=True,\n",
			
 
				-    "    return_dict=True,\n",
			
 
				-    "    return_tensors=\"pt\",\n",
			
 
				-    ").to(model.device)\n",
			
 
				-    "\n",
			
 
				-    "outputs = model.generate(\n",
			
 
				-    "    **inputs,\n",
			
 
				-    "    max_new_tokens=256,\n",
			
 
				-    ")\n",
			
 
				-    "\n",
			
 
				-    "response = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])[0]\n",
			
 
				-    "print(response)\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "107c555e",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "url1 = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg\"\n",
			
 
				-    "url2 = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/cat_style_layout.png\"\n",
			
 
				-    "messages = [\n",
			
 
				-    "    {\n",
			
 
				-    "        \"role\": \"user\",\n",
			
 
				-    "        \"content\": [\n",
			
 
				-    "            {\"type\": \"image\", \"url\": url1},\n",
			
 
				-    "            {\"type\": \"image\", \"url\": url2},\n",
			
 
				-    "            {\"type\": \"text\", \"text\": \"Can you describe how these two images are similar, and how they differ?\"},\n",
			
 
				-    "        ]\n",
			
 
				-    "    },\n",
			
 
				-    "]\n",
			
 
				-    "\n",
			
 
				-    "inputs = processor.apply_chat_template(\n",
			
 
				-    "    messages,\n",
			
 
				-    "    add_generation_prompt=True,\n",
			
 
				-    "    tokenize=True,\n",
			
 
				-    "    return_dict=True,\n",
			
 
				-    "    return_tensors=\"pt\",\n",
			
 
				-    ").to(model.device)\n",
			
 
				-    "\n",
			
 
				-    "outputs = model.generate(\n",
			
 
				-    "    **inputs,\n",
			
 
				-    "    max_new_tokens=256,\n",
			
 
				-    ")\n",
			
 
				-    "\n",
			
 
				-    "response = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])[0]\n",
			
 
				-    "print(response)\n"
			
 
				-   ]
			
 
				-  },
			
 
				-  {
			
 
				-   "cell_type": "code",
			
 
				-   "execution_count": null,
			
 
				-   "id": "6f0d884c",
			
 
				-   "metadata": {},
			
 
				-   "outputs": [],
			
 
				-   "source": [
			
 
				-    "file = \"very_long_context_prompt.txt\"\n",
			
 
				-    "model_id = \"ll-re/Llama-4-Scout-17B-16E-Instruct\"\n",
			
 
				-    "\n",
			
 
				-    "with open(file, \"r\") as f:\n",
			
 
				-    "    very_long_text = \"\\n\".join(f.readlines())\n",
			
 
				-    "\n",
			
 
				-    "tokenizer = AutoProcessor.from_pretrained(model_id)\n",
			
 
				-    "model = Llama4ForConditionalGeneration.from_pretrained(\n",
			
 
				-    "    model_id,\n",
			
 
				-    "    device_map=\"auto\",\n",
			
 
				-    "    attn_implementation=\"flex_attention\",\n",
			
 
				-    "    torch_dtype=torch.bfloat16\n",
			
 
				-    ")\n",
			
 
				-    "\n",
			
 
				-    "messages = [\n",
			
 
				-    "    {\"role\": \"user\", \"content\": f\"Look at the following texts: [{very_long_text}]\\n\\n\\n\\nWhat are the books, and who wrote them? Make me a nice list.\"},\n",
			
 
				-    "]\n",
			
 
				-    "input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\")\n",
			
 
				-    "\n",
			
 
				-    "torch.cuda.synchronize()\n",
			
 
				-    "start = time.time()\n",
			
 
				-    "out = model.generate(\n",
			
 
				-    "    input_ids.to(model.device),\n",
			
 
				-    "    prefill_chunk_size=2048*8,\n",
			
 
				-    "    max_new_tokens=300,\n",
			
 
				-    "    cache_implementation=\"hybrid\",\n",
			
 
				-    ")\n",
			
 
				-    "print(time.time()-start)\n",
			
 
				-    "print(tokenizer.batch_decode(out[:, input_ids.shape[-1]:]))\n",
			
 
				-    "print(f\"{torch.cuda.max_memory_allocated(model.device) / 1024**3:.2f} GiB\")\n"
			
 
				-   ]
			
 
				-  }
			
 
				- ],
			
 
				- "metadata": {
			
 
				-  "kernelspec": {
			
 
				-   "display_name": "pytorch",
			
 
				-   "language": "python",
			
 
				-   "name": "python3"
			
 
				-  },
			
 
				-  "language_info": {
			
 
				-   "codemirror_mode": {
			
 
				-    "name": "ipython",
			
 
				-    "version": 3
			
 
				-   },
			
 
				-   "file_extension": ".py",
			
 
				-   "mimetype": "text/x-python",
			
 
				-   "name": "python",
			
 
				-   "nbconvert_exporter": "python",
			
 
				-   "pygments_lexer": "ipython3",
			
 
				-   "version": "3.10.12"
			
 
				-  }
			
 
				- },
			
 
				- "nbformat": 4,
			
 
				- "nbformat_minor": 5
			
 
				-}
			
--- a/getting-started/llama_4_prompt_examples.ipynb
+++ b/getting-started/llama_4_prompt_examples.ipynb