{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "pnwfnKbEkL5e" }, "outputs": [], "source": [ "!pip install -Uqqq pip --progress-bar off\n", "!pip install -qqq langchain==0.0.149 --progress-bar off\n", "!pip install -qqq openai==0.27.4 --progress-bar off\n", "!pip install -qqq tiktoken==0.3.3 --progress-bar off\n", "!pip install -qqq watermark==2.3.1 --progress-bar off\n", "!pip install -qqq chromadb==0.3.21 --progress-bar off" ] }, { "cell_type": "code", "source": [ "import os\n", "import textwrap\n", "from getpass import getpass\n", "\n", "import chromadb\n", "import langchain\n", "import openai\n", "from langchain.chains import LLMBashChain, LLMChain, RetrievalQA, SimpleSequentialChain\n", "from langchain.chains.summarize import load_summarize_chain\n", "from langchain.chat_models import ChatOpenAI\n", "from langchain.docstore.document import Document\n", "from langchain.embeddings.openai import OpenAIEmbeddings\n", "from langchain.llms import OpenAI\n", "from langchain.prompts import PromptTemplate\n", "from langchain.text_splitter import CharacterTextSplitter\n", "from langchain.vectorstores import Chroma" ], "metadata": { "id": "OGdXFDwBrVIZ" }, "execution_count": 98, "outputs": [] }, { "cell_type": "code", "source": [ "%load_ext watermark\n", "%watermark --iversions -v -m" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "glLyy9quuBrU", "outputId": "645ad7a3-26f8-4b40-f1ac-643fa12a97f7" }, "execution_count": 64, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Python implementation: CPython\n", "Python version : 3.9.16\n", "IPython version : 7.34.0\n", "\n", "Compiler : GCC 9.4.0\n", "OS : Linux\n", "Release : 5.10.147+\n", "Machine : x86_64\n", "Processor : x86_64\n", "CPU cores : 2\n", "Architecture: 64bit\n", "\n", "chromadb : 0.3.21\n", "langchain: 0.0.149\n", "openai : 0.27.4\n", "\n" ] } ] }, { "cell_type": "code", "source": [ "def print_response(response: str):\n", " print(\"\\n\".join(textwrap.wrap(response, width=100)))" ], "metadata": { "id": "kzXoMUUWuCBq" }, "execution_count": 23, "outputs": [] }, { "cell_type": "code", "source": [ "OPENAI_API_KEY = getpass()\n", "os.environ[\"OPENAI_API_KEY\"] = OPENAI_API_KEY" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "nYUltznzuFVz", "outputId": "81257963-8cb6-4f56-e1cf-78aa1f20aab3" }, "execution_count": 24, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "··········\n" ] } ] }, { "cell_type": "code", "source": [ "chat_gpt = ChatOpenAI(temperature=0, model_name=\"gpt-3.5-turbo\")" ], "metadata": { "id": "uMzNG3mZ0BmM" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "template = \"\"\"\n", "You have to come up with location to shoot (along with a 20-50 word description)\n", "for a new episode of the TV show \"The Office\" based on the theme\n", "\n", "{theme_suggestion}\n", "\n", "ANSWER:\n", "\"\"\"\n", "prompt = PromptTemplate(input_variables=[\"theme_suggestion\"], template=template)\n", "\n", "location_chain = LLMChain(llm=chat_gpt, prompt=prompt)" ], "metadata": { "id": "ecQVBt2Rv4Gw" }, "execution_count": 30, "outputs": [] }, { "cell_type": "code", "source": [ "response = location_chain(\"Visiting Europe\")\n", "response" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "92ZQkOgl0HfO", "outputId": "f47fb14f-19c3-4689-9182-a7a3731f2f01" }, "execution_count": 31, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'theme_suggestion': 'Visiting Europe',\n", " 'text': \"Location: Amsterdam, Netherlands\\n\\nDescription: The Dunder Mifflin team heads to Amsterdam for a business conference, but things quickly spiral out of control when Michael and Dwight accidentally eat space cakes and end up lost in the city's infamous Red Light District. Meanwhile, Jim and Pam explore the city's beautiful canals and historic landmarks, but struggle to keep their romantic getaway a secret from their coworkers.\"}" ] }, "metadata": {}, "execution_count": 31 } ] }, { "cell_type": "code", "source": [ "print_response(response[\"text\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "r7kOTkba0gZA", "outputId": "146d8b57-e276-43eb-a5e6-736c3a51253b" }, "execution_count": 32, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Location: Amsterdam, Netherlands Description: The Dunder Mifflin team heads to Amsterdam for a\n", "business conference, but things quickly spiral out of control when Michael and Dwight accidentally\n", "eat space cakes and end up lost in the city's infamous Red Light District. Meanwhile, Jim and Pam\n", "explore the city's beautiful canals and historic landmarks, but struggle to keep their romantic\n", "getaway a secret from their coworkers.\n" ] } ] }, { "cell_type": "markdown", "source": [ "## SequentialChain" ], "metadata": { "id": "-S6CpfZ7SmSV" } }, { "cell_type": "code", "source": [ "template = \"\"\"\n", "Generate a short dialogue between Jim and Pam\n", "from the TV show \"The Office\" for a new episode based on the location\n", "\n", "{location}\n", "\n", "ANSWER:\n", "\"\"\"\n", "prompt = PromptTemplate(input_variables=[\"location\"], template=template)\n", "\n", "conversation_chain = LLMChain(llm=chat_gpt, prompt=prompt)" ], "metadata": { "id": "6jANaR8h2pK4" }, "execution_count": 34, "outputs": [] }, { "cell_type": "code", "source": [ "sequential_chain = SimpleSequentialChain(\n", " chains=[location_chain, conversation_chain], verbose=True\n", ")\n", "response = sequential_chain.run(\"Visiting Europe\")" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YvZfOnK8MbG_", "outputId": "4e91bbaa-2408-4011-991b-cc8c2ed2cd67" }, "execution_count": 35, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "\n", "\u001b[1m> Entering new SimpleSequentialChain chain...\u001b[0m\n", "\u001b[36;1m\u001b[1;3mLocation: Amsterdam, Netherlands\n", "\n", "Description: The Dunder Mifflin team heads to Amsterdam for a business conference, but things quickly spiral out of control when Michael and Dwight accidentally eat space cakes and end up lost in the city's infamous Red Light District. Meanwhile, Jim and Pam explore the city's beautiful canals and historic landmarks, but struggle to keep their romantic getaway a secret from their coworkers.\u001b[0m\n", "\u001b[33;1m\u001b[1;3mJim: \"Wow, Amsterdam is beautiful. I can't believe we're here for a conference.\"\n", "\n", "Pam: \"I know, right? And the canals are so picturesque. I could spend all day just walking around and taking pictures.\"\n", "\n", "Jim: \"Speaking of pictures, I brought my camera. Want to take a selfie in front of that windmill?\"\n", "\n", "Pam: \"Sure, but let's make it quick. I don't want anyone to see us and get suspicious.\"\n", "\n", "Jim: \"Right, we don't want to give away our secret romantic getaway.\"\n", "\n", "Pam: \"Exactly. I just hope Michael and Dwight don't cause too much trouble while we're out here.\"\n", "\n", "Jim: \"Yeah, I heard they already got lost in the Red Light District after eating some space cakes.\"\n", "\n", "Pam: \"Oh no, that's not good. We better keep our phones on in case they need our help.\"\n", "\n", "Jim: \"Agreed. But for now, let's enjoy this beautiful city and each other's company.\"\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] } ] }, { "cell_type": "code", "source": [ "conversation = response\n", "print(conversation)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "xYxuc6_NOZBi", "outputId": "c7d7ac7d-9531-40bd-a2b9-93e40d9f8237" }, "execution_count": 38, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "Jim: \"Wow, Amsterdam is beautiful. I can't believe we're here for a conference.\"\n", "\n", "Pam: \"I know, right? And the canals are so picturesque. I could spend all day just walking around and taking pictures.\"\n", "\n", "Jim: \"Speaking of pictures, I brought my camera. Want to take a selfie in front of that windmill?\"\n", "\n", "Pam: \"Sure, but let's make it quick. I don't want anyone to see us and get suspicious.\"\n", "\n", "Jim: \"Right, we don't want to give away our secret romantic getaway.\"\n", "\n", "Pam: \"Exactly. I just hope Michael and Dwight don't cause too much trouble while we're out here.\"\n", "\n", "Jim: \"Yeah, I heard they already got lost in the Red Light District after eating some space cakes.\"\n", "\n", "Pam: \"Oh no, that's not good. We better keep our phones on in case they need our help.\"\n", "\n", "Jim: \"Agreed. But for now, let's enjoy this beautiful city and each other's company.\"\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Summarization" ], "metadata": { "id": "k9rYkRQJSo6U" } }, { "cell_type": "code", "source": [ "template = \"\"\"\n", "Write a concise bullet list summary of the conversation between Jim and Pam from the TV show \"The Office\":\n", "\n", "{text}\n", "\n", "Concise summary using markdown:\"\"\"\n", "\n", "prompt = PromptTemplate(template=template, input_variables=[\"text\"])\n", "summary_chain = load_summarize_chain(\n", " chat_gpt, chain_type=\"stuff\", verbose=True, prompt=prompt\n", ")" ], "metadata": { "id": "xJp2OJu6SqS9" }, "execution_count": 51, "outputs": [] }, { "cell_type": "code", "source": [ "docs = [Document(page_content=conversation)]\n", "docs" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "vAVHqoKGS4hy", "outputId": "3e74bdf9-cd1a-47c3-f454-e264d47f8aee" }, "execution_count": 52, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "[Document(page_content='Jim: \"Wow, Amsterdam is beautiful. I can\\'t believe we\\'re here for a conference.\"\\n\\nPam: \"I know, right? And the canals are so picturesque. I could spend all day just walking around and taking pictures.\"\\n\\nJim: \"Speaking of pictures, I brought my camera. Want to take a selfie in front of that windmill?\"\\n\\nPam: \"Sure, but let\\'s make it quick. I don\\'t want anyone to see us and get suspicious.\"\\n\\nJim: \"Right, we don\\'t want to give away our secret romantic getaway.\"\\n\\nPam: \"Exactly. I just hope Michael and Dwight don\\'t cause too much trouble while we\\'re out here.\"\\n\\nJim: \"Yeah, I heard they already got lost in the Red Light District after eating some space cakes.\"\\n\\nPam: \"Oh no, that\\'s not good. We better keep our phones on in case they need our help.\"\\n\\nJim: \"Agreed. But for now, let\\'s enjoy this beautiful city and each other\\'s company.\"', metadata={})]" ] }, "metadata": {}, "execution_count": 52 } ] }, { "cell_type": "code", "source": [ "summary_result = summary_chain.run(docs)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "WO9ykFzeSuQ6", "outputId": "ba73b955-d6e8-4fce-c2ba-b6fd2803d009" }, "execution_count": 54, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "\n", "\u001b[1m> Entering new StuffDocumentsChain chain...\u001b[0m\n", "\n", "\n", "\u001b[1m> Entering new LLMChain chain...\u001b[0m\n", "Prompt after formatting:\n", "\u001b[32;1m\u001b[1;3m\n", "Write a concise bullet list summary of the conversation between Jim and Pam from the TV show \"The Office\":\n", "\n", "Jim: \"Wow, Amsterdam is beautiful. I can't believe we're here for a conference.\"\n", "\n", "Pam: \"I know, right? And the canals are so picturesque. I could spend all day just walking around and taking pictures.\"\n", "\n", "Jim: \"Speaking of pictures, I brought my camera. Want to take a selfie in front of that windmill?\"\n", "\n", "Pam: \"Sure, but let's make it quick. I don't want anyone to see us and get suspicious.\"\n", "\n", "Jim: \"Right, we don't want to give away our secret romantic getaway.\"\n", "\n", "Pam: \"Exactly. I just hope Michael and Dwight don't cause too much trouble while we're out here.\"\n", "\n", "Jim: \"Yeah, I heard they already got lost in the Red Light District after eating some space cakes.\"\n", "\n", "Pam: \"Oh no, that's not good. We better keep our phones on in case they need our help.\"\n", "\n", "Jim: \"Agreed. But for now, let's enjoy this beautiful city and each other's company.\"\n", "\n", "Concise summary using markdown:\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n", "\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] } ] }, { "cell_type": "code", "source": [ "print(summary_result)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ftc5Xxh1SzYZ", "outputId": "754b794c-5248-42eb-c288-067ea76c6bf5" }, "execution_count": 56, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "- Jim and Pam are in Amsterdam for a conference\n", "- They admire the city's beauty and take a selfie in front of a windmill\n", "- They are keeping their romantic relationship a secret\n", "- They worry about Michael and Dwight causing trouble in the city\n", "- Michael and Dwight got lost in the Red Light District after eating space cakes\n", "- Jim and Pam plan to enjoy the city and each other's company.\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Question Answering" ], "metadata": { "id": "cGjTIQFibbTr" } }, { "cell_type": "code", "source": [ "template = \"\"\"\n", "You have to come up with a 200-300 word script for a new episode \n", "of the TV show \"The Office\" based on the theme\n", "\n", "{theme_suggestion}\n", "\n", "ANSWER:\n", "\"\"\"\n", "prompt = PromptTemplate(input_variables=[\"theme_suggestion\"], template=template)\n", "\n", "script_chain = LLMChain(llm=chat_gpt, prompt=prompt)" ], "metadata": { "id": "6dY040hyU8X7" }, "execution_count": 57, "outputs": [] }, { "cell_type": "code", "source": [ "script_response = script_chain(\"Visiting Europe\")" ], "metadata": { "id": "iyt9CmTSekP7" }, "execution_count": 58, "outputs": [] }, { "cell_type": "code", "source": [ "script = script_response[\"text\"]\n", "print(script)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "X1o0_ONLenVX", "outputId": "956ee247-a2fb-4412-df1a-84bd48c70f5f" }, "execution_count": 61, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "FADE IN:\n", "\n", "INT. DUNDER MIFFLIN SCRANTON OFFICE - DAY\n", "\n", "The camera pans around the office as we see the employees going about their daily routine. Suddenly, Michael bursts into the room, looking excited.\n", "\n", "MICHAEL: Attention, everyone! I have some exciting news. We are going to Europe!\n", "\n", "The employees look at each other in confusion.\n", "\n", "JIM: What do you mean, Michael?\n", "\n", "MICHAEL: I mean, we are going to Europe for a business trip. We have a new client in London, and we need to seal the deal.\n", "\n", "PAM: That sounds amazing, Michael. When are we leaving?\n", "\n", "MICHAEL: Next week. I have already booked the flights and the hotel. We are going to have a great time.\n", "\n", "CUT TO:\n", "\n", "INT. AIRPORT - DAY\n", "\n", "The employees are at the airport, waiting to board their flight. Michael is wearing a beret and a scarf, trying to look European.\n", "\n", "DWIGHT: Michael, why are you dressed like that?\n", "\n", "MICHAEL: Because I want to blend in with the locals, Dwight. I want to show them that we are not just a bunch of Americans.\n", "\n", "JIM: Michael, we are Americans.\n", "\n", "MICHAEL: Shh, Jim. Don't ruin the moment.\n", "\n", "CUT TO:\n", "\n", "INT. LONDON OFFICE - DAY\n", "\n", "The employees are in a meeting with their new client, trying to impress him.\n", "\n", "CLIENT: So, what can you offer us that other companies can't?\n", "\n", "MICHAEL: Well, we have a great team of professionals who are dedicated to providing the best service possible. Plus, we are from America, and everyone knows that Americans are the best.\n", "\n", "The client looks at Michael, unimpressed.\n", "\n", "CLIENT: I'm sorry, but I don't think we can do business with you.\n", "\n", "MICHAEL: What? Why not?\n", "\n", "CLIENT: Because you are too arrogant and ignorant. You don't know anything about our culture or our business.\n", "\n", "The employees look at Michael, embarrassed.\n", "\n", "CUT TO:\n", "\n", "INT. HOTEL - NIGHT\n", "\n", "The employees are in their hotel room, feeling defeated.\n", "\n", "PAM: I can't believe we blew it, Michael.\n", "\n", "MICHAEL: I know, Pam. I messed up. I thought I could charm them with my wit and my charm, but I was wrong.\n", "\n", "JIM: Maybe we should try to learn more about their culture and their business. Maybe then we can win them over.\n", "\n", "MICHAEL: That's a great idea, Jim. Let's do it.\n", "\n", "CUT TO:\n", "\n", "INT. LONDON OFFICE - DAY\n", "\n", "The employees are back in the meeting, armed with knowledge about the client's culture and business.\n", "\n", "MICHAEL: We have done our research, and we understand your needs and your values. We are committed to providing you with the best service possible, and we are willing to work hard to earn your trust and your business.\n", "\n", "The client looks at Michael, impressed.\n", "\n", "CLIENT: I like your attitude, Michael. You have shown me that you are willing to learn and to adapt. I think we can do business together.\n", "\n", "The employees cheer, relieved and happy.\n", "\n", "CUT TO:\n", "\n", "INT. AIRPORT - DAY\n", "\n", "The employees are at the airport, ready to go back home.\n", "\n", "MICHAEL: Well, that was a great trip, wasn't it?\n", "\n", "JIM: It was, Michael. We learned a lot and we had fun.\n", "\n", "PAM: And we got the client.\n", "\n", "MICHAEL: That's right, Pam. We got the client. And we got something else too.\n", "\n", "The employees look at Michael, curious.\n", "\n", "MICHAEL: We got a new appreciation for Europe and its culture. We learned that there is more to the world than just America. And that's a valuable lesson.\n", "\n", "The employees nod, agreeing with Michael.\n", "\n", "FADE OUT.\n" ] } ] }, { "cell_type": "code", "source": [ "script_docs = [Document(page_content=script)]\n", "\n", "text_splitter = CharacterTextSplitter(chunk_size=2048, chunk_overlap=32)\n", "texts = text_splitter.split_documents(script_docs)\n", "len(texts)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "0aXQm01yequa", "outputId": "4e619c5d-06ab-4d0a-88e6-ab1fba583ce5" }, "execution_count": 75, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "2" ] }, "metadata": {}, "execution_count": 75 } ] }, { "cell_type": "code", "source": [ "embeddings = OpenAIEmbeddings()" ], "metadata": { "id": "LExlv6-niIIg" }, "execution_count": 76, "outputs": [] }, { "cell_type": "code", "source": [ "db = Chroma.from_documents(texts, embeddings)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HUiYKC00iSFo", "outputId": "62ee399d-924a-495f-9674-899fa8b45891" }, "execution_count": 77, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "WARNING:chromadb:Using embedded DuckDB without persistence: data will be transient\n" ] } ] }, { "cell_type": "code", "source": [ "qa_chain = RetrievalQA.from_chain_type(\n", " llm=chat_gpt, chain_type=\"stuff\", retriever=db.as_retriever(search_kwargs={\"k\": 2})\n", ")" ], "metadata": { "id": "2wmCWKlSizRM" }, "execution_count": 82, "outputs": [] }, { "cell_type": "code", "source": [ "response = qa_chain(\"What is the location that The Office team is visiting?\")\n", "response" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "hw1sPPEWjl4s", "outputId": "da155540-b928-463f-8d92-cb2678ffeb49" }, "execution_count": 84, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "{'query': 'What is the location that The Office team is visiting?',\n", " 'result': 'The Office team is visiting London, England.'}" ] }, "metadata": {}, "execution_count": 84 } ] }, { "cell_type": "code", "source": [ "print_response(response[\"result\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "2UA75henj_hB", "outputId": "1e6473c1-6b10-40cc-a55e-aa4a60ee91e2" }, "execution_count": 86, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The Office team is visiting London, England.\n" ] } ] }, { "cell_type": "code", "source": [ "response = qa_chain(\"Why the client doesn't want to deal with Michael?\")\n", "print_response(response[\"result\"])" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "wR4kYwq5kel7", "outputId": "fba7f8e5-06a5-4b83-e95b-83f1ab621e1e" }, "execution_count": 88, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "The client doesn't want to deal with Michael because he perceives Michael as arrogant and ignorant.\n", "Michael's comment about Americans being the best did not impress the client, who felt that Michael\n", "and his team did not know anything about their culture or business.\n" ] } ] }, { "cell_type": "markdown", "source": [ "## Bash Chain" ], "metadata": { "id": "tUzV90_YnsJ3" } }, { "cell_type": "code", "source": [ "text = \"\"\"\n", "Please write a bash script that prints a single line that Michael G. Scott from \"The Office\" might say\n", "\"\"\"\n", "\n", "bash_chain = LLMBashChain(llm=OpenAI(temperature=0), verbose=True)\n", "\n", "bash_chain.run(text)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 268 }, "id": "jNThdO0ltnBw", "outputId": "5855c1f1-2161-483c-d588-225e97fa565f" }, "execution_count": 108, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "\n", "\u001b[1m> Entering new LLMBashChain chain...\u001b[0m\n", "\n", "Please write a bash script that prints a single line that Michael G. Scott from \"The Office\" might say\n", "\u001b[32;1m\u001b[1;3m\n", "```bash\n", "echo \"That's what she said!\"\n", "```\u001b[0m['```bash', 'echo \"That\\'s what she said!\"', '```']\n", "\n", "Answer: \u001b[33;1m\u001b[1;3mThat's what she said!\n", "\u001b[0m\n", "\u001b[1m> Finished chain.\u001b[0m\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "\"That's what she said!\\n\"" ], "application/vnd.google.colaboratory.intrinsic+json": { "type": "string" } }, "metadata": {}, "execution_count": 108 } ] } ] }