|
@@ -87,13 +87,13 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 8,
|
|
|
+ "execution_count": 4,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"import base64\n",
|
|
|
"from IPython.display import Markdown\n",
|
|
|
- "imagePath= \"screenshot.png\"\n",
|
|
|
+ "imagePath= \"sample_screenshot.png\"\n",
|
|
|
"\n",
|
|
|
"def encode_image(image_path):\n",
|
|
|
" with open(image_path, \"rb\") as image_file:\n",
|
|
@@ -110,7 +110,7 @@
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"response = client.chat.completions.create(\n",
|
|
|
- " model=\"meta-llama/Llama-3.2-11B-Vision-Instruct-Turbo\",\n",
|
|
|
+ " model=\"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\",\n",
|
|
|
" messages=[\n",
|
|
|
" {\n",
|
|
|
" \"role\": \"user\",\n",
|
|
@@ -142,7 +142,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 10,
|
|
|
+ "execution_count": 7,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -194,7 +194,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 11,
|
|
|
+ "execution_count": 8,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -353,18 +353,18 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 12,
|
|
|
+ "execution_count": 9,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
|
"few_shot_example_1 = \"\"\"\n",
|
|
|
- "User Input: \"How much did Nvidia stock gain today?\"\n",
|
|
|
+ "User Input: \"What are the best tacos in San Francisco?\"\n",
|
|
|
"\n",
|
|
|
"Agent Step Sequence:\n",
|
|
|
"Step 1: \n",
|
|
|
"{\n",
|
|
|
" \"current_state\": \"On a blank page.\",\n",
|
|
|
- " \"reasoning\": \"The task is to find Nvidia's stock gain today, so the first step is to navigate to Google to perform a search.\",\n",
|
|
|
+ " \"reasoning\": \"The task is to find the best tacos in San Francisco, so the first step is to navigate to Google to perform a search.\",\n",
|
|
|
" \"action\": \"navigation\",\n",
|
|
|
" \"url\": \"https://www.google.com\",\n",
|
|
|
"}\n",
|
|
@@ -372,10 +372,10 @@
|
|
|
"Step 2: \n",
|
|
|
"{\n",
|
|
|
" \"current_state\": \"On the Google homepage.\",\n",
|
|
|
- " \"reasoning\": \"To search for Nvidia's stock price gain, I need to fill the Google search bar with the query.\",\n",
|
|
|
+ " \"reasoning\": \"To search for the best tacos in San Francisco, I need to fill the Google search bar with the query.\",\n",
|
|
|
" \"action\": \"fill\",\n",
|
|
|
" \"selector\": \"combobox=Search\",\n",
|
|
|
- " \"value\": \"Nvidia stock price today\"\n",
|
|
|
+ " \"value\": \"Best tacos in San Francisco\"\n",
|
|
|
"}\n",
|
|
|
"\n",
|
|
|
"Step 3:\n",
|
|
@@ -389,17 +389,17 @@
|
|
|
"Step 4: \n",
|
|
|
"{\n",
|
|
|
" \"current_state\": \"On the search results page with multiple links.\",\n",
|
|
|
- " \"reasoning\": \"From the search results, I need to click on a reliable financial website link.\",\n",
|
|
|
+ " \"reasoning\": \"From the search results, I need to click on a reliable food-review or blogwebsite link.\",\n",
|
|
|
" \"action\": \"click\",\n",
|
|
|
- " \"selector\": \"text=Yahoo Finance\"\n",
|
|
|
+ " \"selector\": \"text=Yelp\"\n",
|
|
|
"}\n",
|
|
|
"\n",
|
|
|
"Step 5:\n",
|
|
|
"{\n",
|
|
|
- " \"current_state\": \"On Yahoo Finance's Nvidia stock page.\",\n",
|
|
|
- " \"reasoning\": \"The task is complete as I have found the stock gain for Nvidia today.\",\n",
|
|
|
+ " \"current_state\": \"On Yelp's best taqueria near San Francisco page.\",\n",
|
|
|
+ " \"reasoning\": \"The task is complete as I have found the top taquerias in San Francisco.\",\n",
|
|
|
" \"action\": \"finished\",\n",
|
|
|
- " \"summary\": \"I have successfully found the stock gain for Nvidia today.\"\n",
|
|
|
+ " \"summary\": \"I have successfully found the best tacos in San Francisco.\"\n",
|
|
|
"}\n",
|
|
|
"\"\"\"\n",
|
|
|
"\n",
|
|
@@ -429,7 +429,7 @@
|
|
|
" \"reasoning\": \"Enter Dmitry's email address in the recipient field.\",\n",
|
|
|
" \"action\": \"fill\",\n",
|
|
|
" \"selector\": \"placeholder=Recipients\",\n",
|
|
|
- " \"value\": \"dmitriy@gmail.com\"\n",
|
|
|
+ " \"value\": \"dmitry@gmail.com\"\n",
|
|
|
"}\n",
|
|
|
"\n",
|
|
|
"Step 4: \n",
|
|
@@ -481,7 +481,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 18,
|
|
|
+ "execution_count": 10,
|
|
|
"metadata": {},
|
|
|
"outputs": [],
|
|
|
"source": [
|
|
@@ -508,7 +508,7 @@
|
|
|
"source": [
|
|
|
"print(\"Generating plan...\")\n",
|
|
|
"planning_response = client.chat.completions.create(\n",
|
|
|
- " model=\"meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo\",\n",
|
|
|
+ " model=\"meta-llama/Llama-3.2-90B-Vision-Instruct-Turbo\",\n",
|
|
|
" temperature=0.0,\n",
|
|
|
" messages=[\n",
|
|
|
" {\"role\": \"system\", \"content\": planning_prompt},\n",
|
|
@@ -572,7 +572,7 @@
|
|
|
" [\n",
|
|
|
" {\n",
|
|
|
" \"type\": \"text\",\n",
|
|
|
- " \"text\": f'What should be the next action to accomplish the task: {task} based on the previous actions and the current state? Provide the next action in JSON format strictly as specified above.',\n",
|
|
|
+ " \"text\": f'What should be the next action to accomplish the task: {task} based on the previous actions and the current state? Remember to review the plan. Provide the next action in JSON format strictly as specified above.',\n",
|
|
|
" },\n",
|
|
|
" {\n",
|
|
|
" \"type\": \"image_url\",\n",
|