|
@@ -5,14 +5,14 @@
|
|
|
"id": "47a9adb3",
|
|
|
"metadata": {},
|
|
|
"source": [
|
|
|
- "## This demo app shows how to query Llama 2 using the Gradio UI.\n",
|
|
|
+ "## This demo app shows how to query Llama 3 using the Gradio UI.\n",
|
|
|
"\n",
|
|
|
"Since we are using OctoAI in this example, you'll need to obtain an OctoAI token:\n",
|
|
|
"\n",
|
|
|
"- You will need to first sign into [OctoAI](https://octoai.cloud/) with your Github or Google account\n",
|
|
|
"- Then create a free API token [here](https://octo.ai/docs/getting-started/how-to-create-an-octoai-access-token) that you can use for a while (a month or $10 in OctoAI credits, whichever one runs out first)\n",
|
|
|
"\n",
|
|
|
- "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama2 hosted on OctoAI.\n",
|
|
|
+ "**Note** After the free trial ends, you will need to enter billing info to continue to use Llama 3 hosted on OctoAI.\n",
|
|
|
"\n",
|
|
|
"To run this example:\n",
|
|
|
"- Run the notebook\n",
|
|
@@ -22,8 +22,7 @@
|
|
|
"In the notebook or a browser with URL http://127.0.0.1:7860 you should see a UI with your answer.\n",
|
|
|
"\n",
|
|
|
"Let's start by installing the necessary packages:\n",
|
|
|
- "- langchain provides necessary RAG tools for this demo\n",
|
|
|
- "- octoai-sdk allows us to use OctoAI Llama 2 endpoint\n",
|
|
|
+ "- openai for us to use its APIs to talk to the OctoAI endpoint\n",
|
|
|
"- gradio is used for the UI elements\n",
|
|
|
"\n",
|
|
|
"And setting up the OctoAI token."
|
|
@@ -36,7 +35,7 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "!pip install langchain octoai-sdk gradio"
|
|
|
+ "!pip install openai gradio"
|
|
|
]
|
|
|
},
|
|
|
{
|
|
@@ -60,37 +59,34 @@
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
- "from langchain.schema import AIMessage, HumanMessage\n",
|
|
|
"import gradio as gr\n",
|
|
|
- "from langchain.llms.octoai_endpoint import OctoAIEndpoint\n",
|
|
|
+ "import openai\n",
|
|
|
"\n",
|
|
|
- "llama2_13b = \"llama-2-13b-chat-fp16\"\n",
|
|
|
- "\n",
|
|
|
- "llm = OctoAIEndpoint(\n",
|
|
|
- " endpoint_url=\"https://text.octoai.run/v1/chat/completions\",\n",
|
|
|
- " model_kwargs={\n",
|
|
|
- " \"model\": llama2_13b,\n",
|
|
|
- " \"messages\": [\n",
|
|
|
- " {\n",
|
|
|
- " \"role\": \"system\",\n",
|
|
|
- " \"content\": \"You are a helpful, respectful and honest assistant.\"\n",
|
|
|
- " }\n",
|
|
|
- " ],\n",
|
|
|
- " \"max_tokens\": 500,\n",
|
|
|
- " \"top_p\": 1,\n",
|
|
|
- " \"temperature\": 0.01\n",
|
|
|
- " },\n",
|
|
|
+ "# Init OctoAI client\n",
|
|
|
+ "client = openai.OpenAI(\n",
|
|
|
+ " base_url=\"https://text.octoai.run/v1\",\n",
|
|
|
+ " api_key=os.environ[\"OCTOAI_API_TOKEN\"]\n",
|
|
|
")\n",
|
|
|
"\n",
|
|
|
- "\n",
|
|
|
"def predict(message, history):\n",
|
|
|
- " history_langchain_format = []\n",
|
|
|
- " for human, ai in history:\n",
|
|
|
- " history_langchain_format.append(HumanMessage(content=human))\n",
|
|
|
- " history_langchain_format.append(AIMessage(content=ai))\n",
|
|
|
- " history_langchain_format.append(HumanMessage(content=message))\n",
|
|
|
- " llm_response = llm(message, history_langchain_format)\n",
|
|
|
- " return llm_response.content\n",
|
|
|
+ " history_openai_format = []\n",
|
|
|
+ " for human, assistant in history:\n",
|
|
|
+ " history_openai_format.append({\"role\": \"user\", \"content\": human})\n",
|
|
|
+ " history_openai_format.append({\"role\": \"assistant\", \"content\": assistant})\n",
|
|
|
+ " history_openai_format.append({\"role\": \"user\", \"content\": message})\n",
|
|
|
+ "\n",
|
|
|
+ " response = client.chat.completions.create(\n",
|
|
|
+ " model = 'meta-llama-3-70b-instruct',\n",
|
|
|
+ " messages = history_openai_format,\n",
|
|
|
+ " temperature = 0.0,\n",
|
|
|
+ " stream = True\n",
|
|
|
+ " )\n",
|
|
|
+ "\n",
|
|
|
+ " partial_message = \"\"\n",
|
|
|
+ " for chunk in response:\n",
|
|
|
+ " if chunk.choices[0].delta.content is not None:\n",
|
|
|
+ " partial_message = partial_message + chunk.choices[0].delta.content\n",
|
|
|
+ " yield partial_message\n",
|
|
|
"\n",
|
|
|
"gr.ChatInterface(predict).launch()"
|
|
|
]
|