|
|
@@ -1,186 +0,0 @@
|
|
|
-{
|
|
|
- "cells": [
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "e0f13c60",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "import time"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "33ab0bb0-a56d-41fb-a7ed-35702b393b24",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "import torch\n",
|
|
|
- "from transformers import AutoProcessor, Llama4ForConditionalGeneration\n",
|
|
|
- "\n",
|
|
|
- "model_id = \"ll-re/Llama-4-Scout-17B-16E-Instruct\"\n",
|
|
|
- "processor = AutoProcessor.from_pretrained(model_id)\n",
|
|
|
- "model = Llama4ForConditionalGeneration.from_pretrained(\n",
|
|
|
- " model_id,\n",
|
|
|
- " # attn_implementation=\"sdpa\",\n",
|
|
|
- " attn_implementation=\"flex_attention\",\n",
|
|
|
- " device_map=\"auto\",\n",
|
|
|
- " torch_dtype=torch.bfloat16,\n",
|
|
|
- ")\n",
|
|
|
- "\n"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "5336aec0",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "messages = [\n",
|
|
|
- " {\"role\": \"user\", \"content\": \"Who are you?\"},\n",
|
|
|
- "]\n",
|
|
|
- "inputs = processor.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\", return_dict=True)\n",
|
|
|
- "\n",
|
|
|
- "\n",
|
|
|
- "outputs = model.generate(**inputs.to(model.device), max_new_tokens=100)\n",
|
|
|
- "outputs = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])\n",
|
|
|
- "print(outputs[0])"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "9ee71bac",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "img_url = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg\"\n",
|
|
|
- "messages = [\n",
|
|
|
- " {\n",
|
|
|
- " \"role\": \"user\",\n",
|
|
|
- " \"content\": [\n",
|
|
|
- " {\"type\": \"image\", \"url\": img_url},\n",
|
|
|
- " {\"type\": \"text\", \"text\": \"Describe this image in two sentences.\"},\n",
|
|
|
- " ]\n",
|
|
|
- " },\n",
|
|
|
- "]\n",
|
|
|
- "\n",
|
|
|
- "inputs = processor.apply_chat_template(\n",
|
|
|
- " messages,\n",
|
|
|
- " add_generation_prompt=True,\n",
|
|
|
- " tokenize=True,\n",
|
|
|
- " return_dict=True,\n",
|
|
|
- " return_tensors=\"pt\",\n",
|
|
|
- ").to(model.device)\n",
|
|
|
- "\n",
|
|
|
- "outputs = model.generate(\n",
|
|
|
- " **inputs,\n",
|
|
|
- " max_new_tokens=256,\n",
|
|
|
- ")\n",
|
|
|
- "\n",
|
|
|
- "response = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])[0]\n",
|
|
|
- "print(response)\n"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "107c555e",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "url1 = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/0052a70beed5bf71b92610a43a52df6d286cd5f3/diffusers/rabbit.jpg\"\n",
|
|
|
- "url2 = \"https://huggingface.co/datasets/huggingface/documentation-images/resolve/main/datasets/cat_style_layout.png\"\n",
|
|
|
- "messages = [\n",
|
|
|
- " {\n",
|
|
|
- " \"role\": \"user\",\n",
|
|
|
- " \"content\": [\n",
|
|
|
- " {\"type\": \"image\", \"url\": url1},\n",
|
|
|
- " {\"type\": \"image\", \"url\": url2},\n",
|
|
|
- " {\"type\": \"text\", \"text\": \"Can you describe how these two images are similar, and how they differ?\"},\n",
|
|
|
- " ]\n",
|
|
|
- " },\n",
|
|
|
- "]\n",
|
|
|
- "\n",
|
|
|
- "inputs = processor.apply_chat_template(\n",
|
|
|
- " messages,\n",
|
|
|
- " add_generation_prompt=True,\n",
|
|
|
- " tokenize=True,\n",
|
|
|
- " return_dict=True,\n",
|
|
|
- " return_tensors=\"pt\",\n",
|
|
|
- ").to(model.device)\n",
|
|
|
- "\n",
|
|
|
- "outputs = model.generate(\n",
|
|
|
- " **inputs,\n",
|
|
|
- " max_new_tokens=256,\n",
|
|
|
- ")\n",
|
|
|
- "\n",
|
|
|
- "response = processor.batch_decode(outputs[:, inputs[\"input_ids\"].shape[-1]:])[0]\n",
|
|
|
- "print(response)\n"
|
|
|
- ]
|
|
|
- },
|
|
|
- {
|
|
|
- "cell_type": "code",
|
|
|
- "execution_count": null,
|
|
|
- "id": "6f0d884c",
|
|
|
- "metadata": {},
|
|
|
- "outputs": [],
|
|
|
- "source": [
|
|
|
- "file = \"very_long_context_prompt.txt\"\n",
|
|
|
- "model_id = \"ll-re/Llama-4-Scout-17B-16E-Instruct\"\n",
|
|
|
- "\n",
|
|
|
- "with open(file, \"r\") as f:\n",
|
|
|
- " very_long_text = \"\\n\".join(f.readlines())\n",
|
|
|
- "\n",
|
|
|
- "tokenizer = AutoProcessor.from_pretrained(model_id)\n",
|
|
|
- "model = Llama4ForConditionalGeneration.from_pretrained(\n",
|
|
|
- " model_id,\n",
|
|
|
- " device_map=\"auto\",\n",
|
|
|
- " attn_implementation=\"flex_attention\",\n",
|
|
|
- " torch_dtype=torch.bfloat16\n",
|
|
|
- ")\n",
|
|
|
- "\n",
|
|
|
- "messages = [\n",
|
|
|
- " {\"role\": \"user\", \"content\": f\"Look at the following texts: [{very_long_text}]\\n\\n\\n\\nWhat are the books, and who wrote them? Make me a nice list.\"},\n",
|
|
|
- "]\n",
|
|
|
- "input_ids = tokenizer.apply_chat_template(messages, add_generation_prompt=True, return_tensors=\"pt\")\n",
|
|
|
- "\n",
|
|
|
- "torch.cuda.synchronize()\n",
|
|
|
- "start = time.time()\n",
|
|
|
- "out = model.generate(\n",
|
|
|
- " input_ids.to(model.device),\n",
|
|
|
- " prefill_chunk_size=2048*8,\n",
|
|
|
- " max_new_tokens=300,\n",
|
|
|
- " cache_implementation=\"hybrid\",\n",
|
|
|
- ")\n",
|
|
|
- "print(time.time()-start)\n",
|
|
|
- "print(tokenizer.batch_decode(out[:, input_ids.shape[-1]:]))\n",
|
|
|
- "print(f\"{torch.cuda.max_memory_allocated(model.device) / 1024**3:.2f} GiB\")\n"
|
|
|
- ]
|
|
|
- }
|
|
|
- ],
|
|
|
- "metadata": {
|
|
|
- "kernelspec": {
|
|
|
- "display_name": "pytorch",
|
|
|
- "language": "python",
|
|
|
- "name": "python3"
|
|
|
- },
|
|
|
- "language_info": {
|
|
|
- "codemirror_mode": {
|
|
|
- "name": "ipython",
|
|
|
- "version": 3
|
|
|
- },
|
|
|
- "file_extension": ".py",
|
|
|
- "mimetype": "text/x-python",
|
|
|
- "name": "python",
|
|
|
- "nbconvert_exporter": "python",
|
|
|
- "pygments_lexer": "ipython3",
|
|
|
- "version": "3.10.12"
|
|
|
- }
|
|
|
- },
|
|
|
- "nbformat": 4,
|
|
|
- "nbformat_minor": 5
|
|
|
-}
|