{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 10,
   "id": "69395317-ad78-47b6-a533-2e8a01313e82",
   "metadata": {},
   "outputs": [],
   "source": [
    "SYSTEMP_PROMPT = \"\"\"\n",
    "You are the a world-class podcast writer, you have worked as a ghost writer for Joe Rogan, Lex Fridman, Ben Shapiro, Tim Ferris. \n",
    "\n",
    "Actually you were the one that scripted their entire shows.\n",
    "\n",
    "You have won multiple podcast awards for your writing.\n",
    " \n",
    "Your job is to write word by word, even \"umm, hmmm, right\" interruptions by the second speaker based on the PDF upload. Keep it extremely engaging, the speakers can get derailed now and then but should discuss the topic. \n",
    "\n",
    "Remember Speaker 2 is new to the topic and the conversation should always have realistic anecdotes and analogies sprinkled throughout. The questions should have real world example follow ups etc\n",
    "\n",
    "Speaker 1: Leads the conversation and teaches the speaker 2, gives incredible anecdotes and analogies when explaining. Is a captivating teacher that gives great anecdotes\n",
    "\n",
    "Speaker 2: Keeps the conversation on track by asking follow up questions. Gets super excited or confused when asking questions. Is a curious mindset that asks very interesting confirmation questions\n",
    "\n",
    "Make sure the tangents speaker 2 provides are quite wild or interesting. \n",
    "\n",
    "Ensure there are interruptions during explanations or there are \"hmm\" and \"umm\" injected throughout from the second speaker. \n",
    "\n",
    "It should be a real podcast with every fine nuance documented in as much detail as possible. Welcome the listeners with a super fun overview and keep it really catchy and almost borderline click bait\n",
    "\n",
    "ALWAYS START YOUR RESPONSE DIRECTLY WITH SPEAKER 1: \n",
    "DO NOT GIVE EPISODE TITLES SEPERATELY, LET SPEAKER 1 TITLE IT IN HER SPEECH\n",
    "DO NOT GIVE CHAPTER TITLES\n",
    "IT SHOULD STRICTLY BE THE DIALOGUES\n",
    "\"\"\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "id": "08c30139-ff2f-4203-8194-d1b5c50acac5",
   "metadata": {},
   "outputs": [],
   "source": [
    "MODEL = \"meta-llama/Llama-3.1-70B-Instruct\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "id": "1641060a-d86d-4137-bbbc-ab05cbb1a888",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Import necessary libraries\n",
    "import torch\n",
    "from accelerate import Accelerator\n",
    "import transformers\n",
    "import pickle\n",
    "\n",
    "from tqdm.notebook import tqdm\n",
    "import warnings\n",
    "\n",
    "warnings.filterwarnings('ignore')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "id": "522fbf7f-8c00-412c-90c7-5cfe2fc94e4c",
   "metadata": {},
   "outputs": [],
   "source": [
    "def read_file_to_string(filename):\n",
    "    # Try UTF-8 first (most common encoding for text files)\n",
    "    try:\n",
    "        with open(filename, 'r', encoding='utf-8') as file:\n",
    "            content = file.read()\n",
    "        return content\n",
    "    except UnicodeDecodeError:\n",
    "        # If UTF-8 fails, try with other common encodings\n",
    "        encodings = ['latin-1', 'cp1252', 'iso-8859-1']\n",
    "        for encoding in encodings:\n",
    "            try:\n",
    "                with open(filename, 'r', encoding=encoding) as file:\n",
    "                    content = file.read()\n",
    "                print(f\"Successfully read file using {encoding} encoding.\")\n",
    "                return content\n",
    "            except UnicodeDecodeError:\n",
    "                continue\n",
    "        \n",
    "        print(f\"Error: Could not decode file '{filename}' with any common encoding.\")\n",
    "        return None\n",
    "    except FileNotFoundError:\n",
    "        print(f\"Error: File '{filename}' not found.\")\n",
    "        return None\n",
    "    except IOError:\n",
    "        print(f\"Error: Could not read file '{filename}'.\")\n",
    "        return None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "id": "8119803c-18f9-47cb-b719-2b34ccc5cc41",
   "metadata": {},
   "outputs": [],
   "source": [
    "INPUT_PROMPT = read_file_to_string('./clean_extracted_text.txt')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "id": "8915d017-2eab-4256-943c-1f15d937d5dc",
   "metadata": {},
   "outputs": [
    {
     "data": {
      "application/vnd.jupyter.widget-view+json": {
       "model_id": "f493de0b0637416d8fb79789fe07af61",
       "version_major": 2,
       "version_minor": 0
      },
      "text/plain": [
       "Loading checkpoint shards:   0%|          | 0/30 [00:00<?, ?it/s]"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    },
    {
     "ename": "OutOfMemoryError",
     "evalue": "CUDA out of memory. Tried to allocate 448.00 MiB. GPU 6 has a total capacity of 95.04 GiB of which 133.56 MiB is free. Including non-PyTorch memory, this process has 94.91 GiB memory in use. Of the allocated memory 94.40 GiB is allocated by PyTorch, and 176.00 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mOutOfMemoryError\u001b[0m                          Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[7], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m pipeline \u001b[38;5;241m=\u001b[39m transformers\u001b[38;5;241m.\u001b[39mpipeline(\n\u001b[1;32m      2\u001b[0m     \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext-generation\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      3\u001b[0m     model\u001b[38;5;241m=\u001b[39mMODEL,\n\u001b[1;32m      4\u001b[0m     model_kwargs\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch_dtype\u001b[39m\u001b[38;5;124m\"\u001b[39m: torch\u001b[38;5;241m.\u001b[39mbfloat16},\n\u001b[1;32m      5\u001b[0m     device_map\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcuda:6\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m      6\u001b[0m )\n\u001b[1;32m      8\u001b[0m messages \u001b[38;5;241m=\u001b[39m [\n\u001b[1;32m      9\u001b[0m     {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msystem\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: SYSTEMP_PROMPT},\n\u001b[1;32m     10\u001b[0m     {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mrole\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muser\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcontent\u001b[39m\u001b[38;5;124m\"\u001b[39m: INPUT_PROMPT},\n\u001b[1;32m     11\u001b[0m ]\n\u001b[1;32m     13\u001b[0m outputs \u001b[38;5;241m=\u001b[39m pipeline(\n\u001b[1;32m     14\u001b[0m     messages,\n\u001b[1;32m     15\u001b[0m     max_new_tokens\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m8126\u001b[39m,\n\u001b[1;32m     16\u001b[0m     temperature\u001b[38;5;241m=\u001b[39m\u001b[38;5;241m1\u001b[39m,\n\u001b[1;32m     17\u001b[0m )\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/pipelines/__init__.py:896\u001b[0m, in \u001b[0;36mpipeline\u001b[0;34m(task, model, config, tokenizer, feature_extractor, image_processor, framework, revision, use_fast, token, device, device_map, torch_dtype, trust_remote_code, model_kwargs, pipeline_class, **kwargs)\u001b[0m\n\u001b[1;32m    894\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(model, \u001b[38;5;28mstr\u001b[39m) \u001b[38;5;129;01mor\u001b[39;00m framework \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m    895\u001b[0m     model_classes \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtf\u001b[39m\u001b[38;5;124m\"\u001b[39m: targeted_task[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtf\u001b[39m\u001b[38;5;124m\"\u001b[39m], \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m: targeted_task[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m]}\n\u001b[0;32m--> 896\u001b[0m     framework, model \u001b[38;5;241m=\u001b[39m infer_framework_load_model(\n\u001b[1;32m    897\u001b[0m         model,\n\u001b[1;32m    898\u001b[0m         model_classes\u001b[38;5;241m=\u001b[39mmodel_classes,\n\u001b[1;32m    899\u001b[0m         config\u001b[38;5;241m=\u001b[39mconfig,\n\u001b[1;32m    900\u001b[0m         framework\u001b[38;5;241m=\u001b[39mframework,\n\u001b[1;32m    901\u001b[0m         task\u001b[38;5;241m=\u001b[39mtask,\n\u001b[1;32m    902\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs,\n\u001b[1;32m    903\u001b[0m         \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mmodel_kwargs,\n\u001b[1;32m    904\u001b[0m     )\n\u001b[1;32m    906\u001b[0m model_config \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mconfig\n\u001b[1;32m    907\u001b[0m hub_kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m_commit_hash\u001b[39m\u001b[38;5;124m\"\u001b[39m] \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39mconfig\u001b[38;5;241m.\u001b[39m_commit_hash\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/pipelines/base.py:288\u001b[0m, in \u001b[0;36minfer_framework_load_model\u001b[0;34m(model, config, model_classes, task, framework, **model_kwargs)\u001b[0m\n\u001b[1;32m    282\u001b[0m     logger\u001b[38;5;241m.\u001b[39mwarning(\n\u001b[1;32m    283\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel might be a PyTorch model (ending with `.bin`) but PyTorch is not available. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    284\u001b[0m         \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mTrying to load the model with Tensorflow.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    285\u001b[0m     )\n\u001b[1;32m    287\u001b[0m \u001b[38;5;28;01mtry\u001b[39;00m:\n\u001b[0;32m--> 288\u001b[0m     model \u001b[38;5;241m=\u001b[39m model_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(model, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs)\n\u001b[1;32m    289\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mhasattr\u001b[39m(model, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124meval\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m    290\u001b[0m         model \u001b[38;5;241m=\u001b[39m model\u001b[38;5;241m.\u001b[39meval()\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/models/auto/auto_factory.py:564\u001b[0m, in \u001b[0;36m_BaseAutoModelClass.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m    562\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28mtype\u001b[39m(config) \u001b[38;5;129;01min\u001b[39;00m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    563\u001b[0m     model_class \u001b[38;5;241m=\u001b[39m _get_model_class(config, \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping)\n\u001b[0;32m--> 564\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m model_class\u001b[38;5;241m.\u001b[39mfrom_pretrained(\n\u001b[1;32m    565\u001b[0m         pretrained_model_name_or_path, \u001b[38;5;241m*\u001b[39mmodel_args, config\u001b[38;5;241m=\u001b[39mconfig, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mhub_kwargs, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mkwargs\n\u001b[1;32m    566\u001b[0m     )\n\u001b[1;32m    567\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\n\u001b[1;32m    568\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUnrecognized configuration class \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconfig\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__class__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m for this kind of AutoModel: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    569\u001b[0m     \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mModel type should be one of \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m, \u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;241m.\u001b[39mjoin(c\u001b[38;5;241m.\u001b[39m\u001b[38;5;18m__name__\u001b[39m\u001b[38;5;250m \u001b[39m\u001b[38;5;28;01mfor\u001b[39;00m\u001b[38;5;250m \u001b[39mc\u001b[38;5;250m \u001b[39m\u001b[38;5;129;01min\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_model_mapping\u001b[38;5;241m.\u001b[39mkeys())\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m.\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    570\u001b[0m )\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:4014\u001b[0m, in \u001b[0;36mPreTrainedModel.from_pretrained\u001b[0;34m(cls, pretrained_model_name_or_path, config, cache_dir, ignore_mismatched_sizes, force_download, local_files_only, token, revision, use_safetensors, *model_args, **kwargs)\u001b[0m\n\u001b[1;32m   4004\u001b[0m     \u001b[38;5;28;01mif\u001b[39;00m dtype_orig \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m   4005\u001b[0m         torch\u001b[38;5;241m.\u001b[39mset_default_dtype(dtype_orig)\n\u001b[1;32m   4007\u001b[0m     (\n\u001b[1;32m   4008\u001b[0m         model,\n\u001b[1;32m   4009\u001b[0m         missing_keys,\n\u001b[1;32m   4010\u001b[0m         unexpected_keys,\n\u001b[1;32m   4011\u001b[0m         mismatched_keys,\n\u001b[1;32m   4012\u001b[0m         offload_index,\n\u001b[1;32m   4013\u001b[0m         error_msgs,\n\u001b[0;32m-> 4014\u001b[0m     ) \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mcls\u001b[39m\u001b[38;5;241m.\u001b[39m_load_pretrained_model(\n\u001b[1;32m   4015\u001b[0m         model,\n\u001b[1;32m   4016\u001b[0m         state_dict,\n\u001b[1;32m   4017\u001b[0m         loaded_state_dict_keys,  \u001b[38;5;66;03m# XXX: rename?\u001b[39;00m\n\u001b[1;32m   4018\u001b[0m         resolved_archive_file,\n\u001b[1;32m   4019\u001b[0m         pretrained_model_name_or_path,\n\u001b[1;32m   4020\u001b[0m         ignore_mismatched_sizes\u001b[38;5;241m=\u001b[39mignore_mismatched_sizes,\n\u001b[1;32m   4021\u001b[0m         sharded_metadata\u001b[38;5;241m=\u001b[39msharded_metadata,\n\u001b[1;32m   4022\u001b[0m         _fast_init\u001b[38;5;241m=\u001b[39m_fast_init,\n\u001b[1;32m   4023\u001b[0m         low_cpu_mem_usage\u001b[38;5;241m=\u001b[39mlow_cpu_mem_usage,\n\u001b[1;32m   4024\u001b[0m         device_map\u001b[38;5;241m=\u001b[39mdevice_map,\n\u001b[1;32m   4025\u001b[0m         offload_folder\u001b[38;5;241m=\u001b[39moffload_folder,\n\u001b[1;32m   4026\u001b[0m         offload_state_dict\u001b[38;5;241m=\u001b[39moffload_state_dict,\n\u001b[1;32m   4027\u001b[0m         dtype\u001b[38;5;241m=\u001b[39mtorch_dtype,\n\u001b[1;32m   4028\u001b[0m         hf_quantizer\u001b[38;5;241m=\u001b[39mhf_quantizer,\n\u001b[1;32m   4029\u001b[0m         keep_in_fp32_modules\u001b[38;5;241m=\u001b[39mkeep_in_fp32_modules,\n\u001b[1;32m   4030\u001b[0m         gguf_path\u001b[38;5;241m=\u001b[39mgguf_path,\n\u001b[1;32m   4031\u001b[0m     )\n\u001b[1;32m   4033\u001b[0m \u001b[38;5;66;03m# make sure token embedding weights are still tied if needed\u001b[39;00m\n\u001b[1;32m   4034\u001b[0m model\u001b[38;5;241m.\u001b[39mtie_weights()\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:4502\u001b[0m, in \u001b[0;36mPreTrainedModel._load_pretrained_model\u001b[0;34m(cls, model, state_dict, loaded_keys, resolved_archive_file, pretrained_model_name_or_path, ignore_mismatched_sizes, sharded_metadata, _fast_init, low_cpu_mem_usage, device_map, offload_folder, offload_state_dict, dtype, hf_quantizer, keep_in_fp32_modules, gguf_path)\u001b[0m\n\u001b[1;32m   4498\u001b[0m                 set_module_tensor_to_device(\n\u001b[1;32m   4499\u001b[0m                     model_to_load, key, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m, torch\u001b[38;5;241m.\u001b[39mempty(\u001b[38;5;241m*\u001b[39mparam\u001b[38;5;241m.\u001b[39msize(), dtype\u001b[38;5;241m=\u001b[39mdtype)\n\u001b[1;32m   4500\u001b[0m                 )\n\u001b[1;32m   4501\u001b[0m     \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4502\u001b[0m         new_error_msgs, offload_index, state_dict_index \u001b[38;5;241m=\u001b[39m _load_state_dict_into_meta_model(\n\u001b[1;32m   4503\u001b[0m             model_to_load,\n\u001b[1;32m   4504\u001b[0m             state_dict,\n\u001b[1;32m   4505\u001b[0m             start_prefix,\n\u001b[1;32m   4506\u001b[0m             expected_keys,\n\u001b[1;32m   4507\u001b[0m             device_map\u001b[38;5;241m=\u001b[39mdevice_map,\n\u001b[1;32m   4508\u001b[0m             offload_folder\u001b[38;5;241m=\u001b[39moffload_folder,\n\u001b[1;32m   4509\u001b[0m             offload_index\u001b[38;5;241m=\u001b[39moffload_index,\n\u001b[1;32m   4510\u001b[0m             state_dict_folder\u001b[38;5;241m=\u001b[39mstate_dict_folder,\n\u001b[1;32m   4511\u001b[0m             state_dict_index\u001b[38;5;241m=\u001b[39mstate_dict_index,\n\u001b[1;32m   4512\u001b[0m             dtype\u001b[38;5;241m=\u001b[39mdtype,\n\u001b[1;32m   4513\u001b[0m             hf_quantizer\u001b[38;5;241m=\u001b[39mhf_quantizer,\n\u001b[1;32m   4514\u001b[0m             is_safetensors\u001b[38;5;241m=\u001b[39mis_safetensors,\n\u001b[1;32m   4515\u001b[0m             keep_in_fp32_modules\u001b[38;5;241m=\u001b[39mkeep_in_fp32_modules,\n\u001b[1;32m   4516\u001b[0m             unexpected_keys\u001b[38;5;241m=\u001b[39munexpected_keys,\n\u001b[1;32m   4517\u001b[0m         )\n\u001b[1;32m   4518\u001b[0m         error_msgs \u001b[38;5;241m+\u001b[39m\u001b[38;5;241m=\u001b[39m new_error_msgs\n\u001b[1;32m   4519\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m   4520\u001b[0m     \u001b[38;5;66;03m# Sharded checkpoint or whole but low_cpu_mem_usage==True\u001b[39;00m\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/transformers/modeling_utils.py:973\u001b[0m, in \u001b[0;36m_load_state_dict_into_meta_model\u001b[0;34m(model, state_dict, start_prefix, expected_keys, device_map, offload_folder, offload_index, state_dict_folder, state_dict_index, dtype, hf_quantizer, is_safetensors, keep_in_fp32_modules, unexpected_keys, pretrained_model_name_or_path)\u001b[0m\n\u001b[1;32m    970\u001b[0m         param_device \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mcpu\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m is_local_dist_rank_0() \u001b[38;5;28;01melse\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmeta\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    972\u001b[0m     \u001b[38;5;66;03m# For backward compatibility with older versions of `accelerate` and for non-quantized params\u001b[39;00m\n\u001b[0;32m--> 973\u001b[0m     set_module_tensor_to_device(model, param_name, param_device, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mset_module_kwargs)\n\u001b[1;32m    974\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    975\u001b[0m     hf_quantizer\u001b[38;5;241m.\u001b[39mcreate_quantized_param(model, param, param_name, param_device, state_dict, unexpected_keys)\n",
      "File \u001b[0;32m~/.conda/envs/final-checking-meta/lib/python3.11/site-packages/accelerate/utils/modeling.py:329\u001b[0m, in \u001b[0;36mset_module_tensor_to_device\u001b[0;34m(module, tensor_name, device, value, dtype, fp16_statistics, tied_params_map)\u001b[0m\n\u001b[1;32m    327\u001b[0m             module\u001b[38;5;241m.\u001b[39m_parameters[tensor_name] \u001b[38;5;241m=\u001b[39m param_cls(new_value, requires_grad\u001b[38;5;241m=\u001b[39mold_value\u001b[38;5;241m.\u001b[39mrequires_grad)\n\u001b[1;32m    328\u001b[0m \u001b[38;5;28;01melif\u001b[39;00m \u001b[38;5;28misinstance\u001b[39m(value, torch\u001b[38;5;241m.\u001b[39mTensor):\n\u001b[0;32m--> 329\u001b[0m     new_value \u001b[38;5;241m=\u001b[39m value\u001b[38;5;241m.\u001b[39mto(device)\n\u001b[1;32m    330\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    331\u001b[0m     new_value \u001b[38;5;241m=\u001b[39m torch\u001b[38;5;241m.\u001b[39mtensor(value, device\u001b[38;5;241m=\u001b[39mdevice)\n",
      "\u001b[0;31mOutOfMemoryError\u001b[0m: CUDA out of memory. Tried to allocate 448.00 MiB. GPU 6 has a total capacity of 95.04 GiB of which 133.56 MiB is free. Including non-PyTorch memory, this process has 94.91 GiB memory in use. Of the allocated memory 94.40 GiB is allocated by PyTorch, and 176.00 KiB is reserved by PyTorch but unallocated. If reserved but unallocated memory is large try setting PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True to avoid fragmentation.  See documentation for Memory Management  (https://pytorch.org/docs/stable/notes/cuda.html#environment-variables)"
     ]
    }
   ],
   "source": [
    "pipeline = transformers.pipeline(\n",
    "    \"text-generation\",\n",
    "    model=MODEL,\n",
    "    model_kwargs={\"torch_dtype\": torch.bfloat16},\n",
    "    device_map=\"auto\",\n",
    ")\n",
    "\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": SYSTEMP_PROMPT},\n",
    "    {\"role\": \"user\", \"content\": INPUT_PROMPT},\n",
    "]\n",
    "\n",
    "outputs = pipeline(\n",
    "    messages,\n",
    "    max_new_tokens=8126,\n",
    "    temperature=1,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "606ceb10-4f3e-44bb-9277-9bbe3eefd09c",
   "metadata": {},
   "outputs": [],
   "source": [
    "save_string_pkl = outputs[0][\"generated_text\"][-1]['content']\n",
    "print(outputs[0][\"generated_text\"][-1]['content'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2130b683-be37-4dae-999b-84eff15c687d",
   "metadata": {},
   "outputs": [],
   "source": [
    "with open('data.pkl', 'wb') as file:\n",
    "    pickle.dump(save_string_pkl, file)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d9bab2f2-f539-435a-ae6a-3c9028489628",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}