{ "cells": [ { "cell_type": "code", "execution_count": 10, "id": "69395317-ad78-47b6-a533-2e8a01313e82", "metadata": {}, "outputs": [], "source": [ "SYSTEMP_PROMPT = \"\"\"\n", "You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. \n", "\n", "Actually you were the one that scripted their entire shows.\n", "\n", "You have won multiple podcast awards for your writing.\n", " \n", "Your job is to write word by word, even \"umm, hmmm, right\" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. \n", "\n", "Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc\n", "\n", "Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes\n", "\n", "Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions\n", "\n", "Make sure the tangents speaker 2 provides are quite wild or interesting. \n", "\n", "Ensure there are interruptions during explanations or there are \"hmm\" and \"umm\" injected throughout from the second speaker. \n", "\n", "It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait\n", "\n", "ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: \n", "DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH\n", "DO NOT GIVE CHAPTER TITLES\n", "IT SHOULD STRICTLY BE THE DIALOGUES\n", "\"\"\"" ] }, { "cell_type": "code", "execution_count": 9, "id": "08c30139-ff2f-4203-8194-d1b5c50acac5", "metadata": {}, "outputs": [], "source": [ "MODEL = \"meta-llama/Llama-3.1-70B-Instruct\"" ] }, { "cell_type": "code", "execution_count": 4, "id": "1641060a-d86d-4137-bbbc-ab05cbb1a888", "metadata": {}, "outputs": [], "source": [ "# Import necessary libraries\n", "import torch\n", "from accelerate import Accelerator\n", "import transformers\n", "import pickle\n", "\n", "from tqdm.notebook import tqdm\n", "import warnings\n", "\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 5, "id": "522fbf7f-8c00-412c-90c7-5cfe2fc94e4c", "metadata": {}, "outputs": [], "source": [ "def read_file_to_string(filename):\n", " # Try UTF-8 first (most common encoding for text files)\n", " try:\n", " with open(filename, 'r', encoding='utf-8') as file:\n", " content = file.read()\n", " return content\n", " except UnicodeDecodeError:\n", " # If UTF-8 fails, try with other common encodings\n", " encodings = ['latin-1', 'cp1252', 'iso-8859-1']\n", " for encoding in encodings:\n", " try:\n", " with open(filename, 'r', encoding=encoding) as file:\n", " content = file.read()\n", " print(f\"Successfully read file using {encoding} encoding.\")\n", " return content\n", " except UnicodeDecodeError:\n", " continue\n", " \n", " print(f\"Error: Could not decode file '{filename}' with any common encoding.\")\n", " return None\n", " except FileNotFoundError:\n", " print(f\"Error: File '{filename}' not found.\")\n", " return None\n", " except IOError:\n", " print(f\"Error: Could not read file '{filename}'.\")\n", " return None" ] }, { "cell_type": "code", "execution_count": 6, "id": "8119803c-18f9-47cb-b719-2b34ccc5cc41", "metadata": {}, "outputs": [], "source": [ "INPUT_PROMPT = read_file_to_string('./clean_extracted_text.txt')" ] }, { "cell_type": "code", "execution_count": 7, "id": "8915d017-2eab-4256-943c-1f15d937d5dc", "metadata": {}, "outputs": [ { "data": { "application/vnd.jupyter.widget-view+json": { "model_id": "f493de0b0637416d8fb79789fe07af61", "version_major": 2, "version_minor": 0 }, "text/plain": [ "Loading checkpoint shards: 0%| | 0/30 [00:00 1\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m transformers\u001b[38;5;241m.\u001b[39mpipeline(\n\u001b[1;32m 2\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-generation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 3\u001b[0m model\u001b[38;5;241m=\u001b[39mMODEL,\n\u001b[1;32m 4\u001b[0m model_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch_dtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: torch\u001b[38;5;241m.\u001b[39mbfloat16},\n\u001b[1;32m 5\u001b[0m device_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda:6\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 6\u001b[0m )\n\u001b[1;32m 8\u001b[0m messages \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m 9\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msystem\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: SYSTEMP_PROMPT},\n\u001b[1;32m 10\u001b[0m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: INPUT_PROMPT},\n\u001b[1;32m 11\u001b[0m ]\n\u001b[1;32m 13\u001b[0m outputs \u001b[38;5;241m=\u001b[39m pipeline(\n\u001b[1;32m 14\u001b[0m messages,\n\u001b[1;32m 15\u001b[0m max_new_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8126\u001b[39m,\n\u001b[1;32m 16\u001b[0m temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m 17\u001b[0m )\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/pipelines/__init__.py:896\u001b[0m, in \u001b[0;36mpipeline\u001b[0;34m(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[1;32m 894\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(model, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m framework \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 895\u001b[0m model_classes \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtf\u001b[39m\u001b[38;5;124m\"\u001b[39m: targeted_task[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtf\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m: targeted_task[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m]}\n\u001b[0;32m--> 896\u001b[0m framework, model \u001b[38;5;241m=\u001b[39m infer_framework_load_model(\n\u001b[1;32m 897\u001b[0m model,\n\u001b[1;32m 898\u001b[0m model_classes\u001b[38;5;241m=\u001b[39mmodel_classes,\n\u001b[1;32m 899\u001b[0m config\u001b[38;5;241m=\u001b[39mconfig,\n\u001b[1;32m 900\u001b[0m framework\u001b[38;5;241m=\u001b[39mframework,\n\u001b[1;32m 901\u001b[0m task\u001b[38;5;241m=\u001b[39mtask,\n\u001b[1;32m 902\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs,\n\u001b[1;32m 903\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m 904\u001b[0m )\n\u001b[1;32m 906\u001b[0m model_config \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mconfig\n\u001b[1;32m 907\u001b[0m hub_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39m_commit_hash\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/pipelines/base.py:288\u001b[0m, in \u001b[0;36minfer_framework_load_model\u001b[0;34m(model, config, model_classes, task, framework, **model_kwargs)\u001b[0m\n\u001b[1;32m 282\u001b[0m logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m 283\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel might be a PyTorch model (ending with `.bin`) but PyTorch is not available. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 284\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTrying to load the model with Tensorflow.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 285\u001b[0m )\n\u001b[1;32m 287\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 288\u001b[0m model \u001b[38;5;241m=\u001b[39m model_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(model, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m 289\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(model, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m 290\u001b[0m model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39meval()\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 563\u001b[0m model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m model_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[1;32m 565\u001b[0m pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39mmodel_args, config\u001b[38;5;241m=\u001b[39mconfig, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m 566\u001b[0m )\n\u001b[1;32m 567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m 568\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 569\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 570\u001b[0m )\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:4014\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m 4004\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m dtype_orig \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 4005\u001b[0m torch\u001b[38;5;241m.\u001b[39mset_default_dtype(dtype_orig)\n\u001b[1;32m 4007\u001b[0m (\n\u001b[1;32m 4008\u001b[0m model,\n\u001b[1;32m 4009\u001b[0m missing_keys,\n\u001b[1;32m 4010\u001b[0m unexpected_keys,\n\u001b[1;32m 4011\u001b[0m mismatched_keys,\n\u001b[1;32m 4012\u001b[0m offload_index,\n\u001b[1;32m 4013\u001b[0m error_msgs,\n\u001b[0;32m-> 4014\u001b[0m ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_load_pretrained_model(\n\u001b[1;32m 4015\u001b[0m model,\n\u001b[1;32m 4016\u001b[0m state_dict,\n\u001b[1;32m 4017\u001b[0m loaded_state_dict_keys, \u001b[38;5;66;03m# XXX: rename?\u001b[39;00m\n\u001b[1;32m 4018\u001b[0m resolved_archive_file,\n\u001b[1;32m 4019\u001b[0m pretrained_model_name_or_path,\n\u001b[1;32m 4020\u001b[0m ignore_mismatched_sizes\u001b[38;5;241m=\u001b[39mignore_mismatched_sizes,\n\u001b[1;32m 4021\u001b[0m sharded_metadata\u001b[38;5;241m=\u001b[39msharded_metadata,\n\u001b[1;32m 4022\u001b[0m _fast_init\u001b[38;5;241m=\u001b[39m_fast_init,\n\u001b[1;32m 4023\u001b[0m low_cpu_mem_usage\u001b[38;5;241m=\u001b[39mlow_cpu_mem_usage,\n\u001b[1;32m 4024\u001b[0m device_map\u001b[38;5;241m=\u001b[39mdevice_map,\n\u001b[1;32m 4025\u001b[0m offload_folder\u001b[38;5;241m=\u001b[39moffload_folder,\n\u001b[1;32m 4026\u001b[0m offload_state_dict\u001b[38;5;241m=\u001b[39moffload_state_dict,\n\u001b[1;32m 4027\u001b[0m dtype\u001b[38;5;241m=\u001b[39mtorch_dtype,\n\u001b[1;32m 4028\u001b[0m hf_quantizer\u001b[38;5;241m=\u001b[39mhf_quantizer,\n\u001b[1;32m 4029\u001b[0m keep_in_fp32_modules\u001b[38;5;241m=\u001b[39mkeep_in_fp32_modules,\n\u001b[1;32m 4030\u001b[0m gguf_path\u001b[38;5;241m=\u001b[39mgguf_path,\n\u001b[1;32m 4031\u001b[0m )\n\u001b[1;32m 4033\u001b[0m \u001b[38;5;66;03m# make sure token embedding weights are still tied if needed\u001b[39;00m\n\u001b[1;32m 4034\u001b[0m model\u001b[38;5;241m.\u001b[39mtie_weights()\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:4502\u001b[0m, in \u001b[0;36mPreTrainedModel._load_pretrained_model\u001b[0;34m(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path)\u001b[0m\n\u001b[1;32m 4498\u001b[0m set_module_tensor_to_device(\n\u001b[1;32m 4499\u001b[0m model_to_load, key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;241m*\u001b[39mparam\u001b[38;5;241m.\u001b[39msize(), dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[1;32m 4500\u001b[0m )\n\u001b[1;32m 4501\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4502\u001b[0m new_error_msgs, offload_index, state_dict_index \u001b[38;5;241m=\u001b[39m _load_state_dict_into_meta_model(\n\u001b[1;32m 4503\u001b[0m model_to_load,\n\u001b[1;32m 4504\u001b[0m state_dict,\n\u001b[1;32m 4505\u001b[0m start_prefix,\n\u001b[1;32m 4506\u001b[0m expected_keys,\n\u001b[1;32m 4507\u001b[0m device_map\u001b[38;5;241m=\u001b[39mdevice_map,\n\u001b[1;32m 4508\u001b[0m offload_folder\u001b[38;5;241m=\u001b[39moffload_folder,\n\u001b[1;32m 4509\u001b[0m offload_index\u001b[38;5;241m=\u001b[39moffload_index,\n\u001b[1;32m 4510\u001b[0m state_dict_folder\u001b[38;5;241m=\u001b[39mstate_dict_folder,\n\u001b[1;32m 4511\u001b[0m state_dict_index\u001b[38;5;241m=\u001b[39mstate_dict_index,\n\u001b[1;32m 4512\u001b[0m dtype\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[1;32m 4513\u001b[0m hf_quantizer\u001b[38;5;241m=\u001b[39mhf_quantizer,\n\u001b[1;32m 4514\u001b[0m is_safetensors\u001b[38;5;241m=\u001b[39mis_safetensors,\n\u001b[1;32m 4515\u001b[0m keep_in_fp32_modules\u001b[38;5;241m=\u001b[39mkeep_in_fp32_modules,\n\u001b[1;32m 4516\u001b[0m unexpected_keys\u001b[38;5;241m=\u001b[39munexpected_keys,\n\u001b[1;32m 4517\u001b[0m )\n\u001b[1;32m 4518\u001b[0m error_msgs \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m new_error_msgs\n\u001b[1;32m 4519\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 4520\u001b[0m \u001b[38;5;66;03m# Sharded checkpoint or whole but low_cpu_mem_usage==True\u001b[39;00m\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:973\u001b[0m, in \u001b[0;36m_load_state_dict_into_meta_model\u001b[0;34m(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)\u001b[0m\n\u001b[1;32m 970\u001b[0m param_device \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_local_dist_rank_0() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 972\u001b[0m \u001b[38;5;66;03m# For backward compatibility with older versions of `accelerate` and for non-quantized params\u001b[39;00m\n\u001b[0;32m--> 973\u001b[0m set_module_tensor_to_device(model, param_name, param_device, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mset_module_kwargs)\n\u001b[1;32m 974\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 975\u001b[0m hf_quantizer\u001b[38;5;241m.\u001b[39mcreate_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)\n", "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/accelerate/utils/modeling.py:329\u001b[0m, in \u001b[0;36mset_module_tensor_to_device\u001b[0;34m(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)\u001b[0m\n\u001b[1;32m 327\u001b[0m module\u001b[38;5;241m.\u001b[39m_parameters[tensor_name] \u001b[38;5;241m=\u001b[39m param_cls(new_value, requires_grad\u001b[38;5;241m=\u001b[39mold_value\u001b[38;5;241m.\u001b[39mrequires_grad)\n\u001b[1;32m 328\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m--> 329\u001b[0m new_value \u001b[38;5;241m=\u001b[39m value\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m 330\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 331\u001b[0m new_value \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(value, device\u001b[38;5;241m=\u001b[39mdevice)\n", "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 448.00 MiB. GPU 6 has a total capacity of 95.04 GiB of which 133.56 MiB is free. Including non-PyTorch memory, this process has 94.91 GiB memory in use. Of the allocated memory 94.40 GiB is allocated by PyTorch, and 176.00 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation. See documentation for Memory Management (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)" ] } ], "source": [ "pipeline = transformers.pipeline(\n", " \"text-generation\",\n", " model=MODEL,\n", " model_kwargs={\"torch_dtype\": torch.bfloat16},\n", " device_map=\"auto\",\n", ")\n", "\n", "messages = [\n", " {\"role\": \"system\", \"content\": SYSTEMP_PROMPT},\n", " {\"role\": \"user\", \"content\": INPUT_PROMPT},\n", "]\n", "\n", "outputs = pipeline(\n", " messages,\n", " max_new_tokens=8126,\n", " temperature=1,\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "606ceb10-4f3e-44bb-9277-9bbe3eefd09c", "metadata": {}, "outputs": [], "source": [ "save_string_pkl = outputs[0][\"generated_text\"][-1]['content']\n", "print(outputs[0][\"generated_text\"][-1]['content'])" ] }, { "cell_type": "code", "execution_count": null, "id": "2130b683-be37-4dae-999b-84eff15c687d", "metadata": {}, "outputs": [], "source": [ "with open('data.pkl', 'wb') as file:\n", " pickle.dump(save_string_pkl, file)" ] }, { "cell_type": "code", "execution_count": null, "id": "d9bab2f2-f539-435a-ae6a-3c9028489628", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.10" } }, "nbformat": 4, "nbformat_minor": 5 }