| 
					
				 | 
			
			
				@@ -0,0 +1,120 @@ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+{ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ "cells": [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "cell_type": "markdown", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "id": "47a9adb3", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "metadata": {}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "source": [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "## This demo app shows how to query Llama 2 using the Gradio UI.\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- You will need to first sign into [OctoAI](https://octoai.cloud/) with your Github or Google account\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- Then create a free API token [here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) that you can use for a while (a month or $10 in OctoAI credits, whichever one runs out first)\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "To run this example:\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- Run the notebook\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- Set up your OCTOAI API token and enter it when prompted\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- Enter your question and click Submit\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "Let's start by installing the necessary packages:\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- langchain provides necessary RAG tools for this demo\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- octoai-sdk allows us to use OctoAI Llama 2 endpoint\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "- gradio is used for the UI elements\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "And setting up the OctoAI token." 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "cell_type": "code", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "execution_count": null, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "id": "6ae4f858-6ef7-49d9-b45b-1ef79d0217a0", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "metadata": {}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "outputs": [], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "source": [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "!pip install langchain octoai-sdk gradio" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "cell_type": "code", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "execution_count": null, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "id": "3306c11d-ed82-41c5-a381-15fb5c07d307", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "metadata": {}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "outputs": [], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "source": [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "from getpass import getpass\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "import os\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "OCTOAI_API_TOKEN = getpass()\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "os.environ[\"OCTOAI_API_TOKEN\"] = OCTOAI_API_TOKEN" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "cell_type": "code", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "execution_count": null, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "id": "928041cc", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "metadata": {}, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "outputs": [], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "source": [ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "from langchain.schema import AIMessage, HumanMessage\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "import gradio as gr\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "from langchain.llms.octoai_endpoint import OctoAIEndpoint\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "llama2_13b = \"llama-2-13b-chat-fp16\"\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "llm = OctoAIEndpoint(\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    model_kwargs={\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        \"model\": llama2_13b,\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        \"messages\": [\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "            {\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "                \"role\": \"system\",\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "                \"content\": \"You are a helpful, respectful and honest assistant.\"\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "            }\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        ],\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        \"max_tokens\": 500,\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        \"top_p\": 1,\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        \"temperature\": 0.01\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    },\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ")\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "def predict(message, history):\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    history_langchain_format = []\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    for human, ai in history:\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        history_langchain_format.append(HumanMessage(content=human))\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "        history_langchain_format.append(AIMessage(content=ai))\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    history_langchain_format.append(HumanMessage(content=message))\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    gpt_response = llm(message) #history_langchain_format)\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "    return gpt_response#.content\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "\n", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "gr.ChatInterface(predict).launch()" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   ] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ ], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ "metadata": { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  "kernelspec": { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "display_name": "Python 3 (ipykernel)", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "language": "python", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "name": "python3" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  "language_info": { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "codemirror_mode": { 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "name": "ipython", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    "version": 3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "file_extension": ".py", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "mimetype": "text/x-python", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "name": "python", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "nbconvert_exporter": "python", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "pygments_lexer": "ipython3", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+   "version": "3.11.6" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+  } 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ }, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ "nbformat": 4, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ "nbformat_minor": 5 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+} 
			 |