|
@@ -0,0 +1,686 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "#!pip install google-search-results"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import json\n",
|
|
|
+ "import os\n",
|
|
|
+ "import time\n",
|
|
|
+ "from serpapi import GoogleSearch\n",
|
|
|
+ "import requests\n",
|
|
|
+ "import hashlib\n",
|
|
|
+ "from pathlib import Path\n",
|
|
|
+ "import pandas as pd"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 9,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "base_dir = Path(\"llama_data\")\n",
|
|
|
+ "src_dir = base_dir / \"src\"\n",
|
|
|
+ "results_dir = base_dir / \"results\""
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "base_dir.mkdir(exist_ok=True)\n",
|
|
|
+ "src_dir.mkdir(exist_ok=True)\n",
|
|
|
+ "results_dir.mkdir(exist_ok=True)\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "with open('generated_outlines.json', 'r') as file:\n",
|
|
|
+ " content = file.read()\n",
|
|
|
+ " data = json.loads(content)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 12,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Loaded 5 report outlines\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "print(f\"Loaded {len(data)} report outlines\")\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\n",
|
|
|
+ "Sample report title: Llama 3.3: A Revolutionary Leap in AI\n",
|
|
|
+ "Sample queries:\n",
|
|
|
+ "- Llama 3.3 new features and enhancements: To gather information on the new features and enhancements in Llama 3.3\n",
|
|
|
+ "- Llama 3.3 vs Llama 3.1 performance comparison: To gather information on the performance comparison between Llama 3.3 and Llama 3.1\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "print(\"\\nSample report title:\", data[0].get('original_goal', {}).get('Report Title', 'No title'))\n",
|
|
|
+ "print(\"Sample queries:\")\n",
|
|
|
+ "for query in data[0].get('Web Queries', [])[:2]:\n",
|
|
|
+ " print(f\"- {query.get('query')}: {query.get('purpose')}\")"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "all_queries = []"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 15,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "for report_index, report_data in enumerate(data):\n",
|
|
|
+ " report_title = report_data.get('original_goal', {}).get('Report Title', f\"Report {report_index}\")\n",
|
|
|
+ " \n",
|
|
|
+ " for query_index, query_data in enumerate(report_data.get('Web Queries', [])):\n",
|
|
|
+ " query = query_data.get('query', '')\n",
|
|
|
+ " purpose = query_data.get('purpose', '')\n",
|
|
|
+ " \n",
|
|
|
+ " all_queries.append({\n",
|
|
|
+ " 'report_index': report_index,\n",
|
|
|
+ " 'report_title': report_title,\n",
|
|
|
+ " 'query_index': query_index,\n",
|
|
|
+ " 'query': query,\n",
|
|
|
+ " 'purpose': purpose\n",
|
|
|
+ " })"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Total queries extracted: 15\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>report_index</th>\n",
|
|
|
+ " <th>report_title</th>\n",
|
|
|
+ " <th>query_index</th>\n",
|
|
|
+ " <th>query</th>\n",
|
|
|
+ " <th>purpose</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>0</th>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>Llama 3.3: A Revolutionary Leap in AI</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>Llama 3.3 new features and enhancements</td>\n",
|
|
|
+ " <td>To gather information on the new features and ...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>1</th>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>Llama 3.3: A Revolutionary Leap in AI</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>Llama 3.3 vs Llama 3.1 performance comparison</td>\n",
|
|
|
+ " <td>To gather information on the performance compa...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2</th>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>Llama 3.3: A Revolutionary Leap in AI</td>\n",
|
|
|
+ " <td>2</td>\n",
|
|
|
+ " <td>Cost of running Llama 3.3 on cloud vs local in...</td>\n",
|
|
|
+ " <td>To gather information on the cost-effectivenes...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>3</th>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>Llama 3.3 vs Llama 3.1: A Comparative Analysis</td>\n",
|
|
|
+ " <td>0</td>\n",
|
|
|
+ " <td>Llama 3.3 new features and improvements</td>\n",
|
|
|
+ " <td>To gather information on new features and impr...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>4</th>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>Llama 3.3 vs Llama 3.1: A Comparative Analysis</td>\n",
|
|
|
+ " <td>1</td>\n",
|
|
|
+ " <td>Llama 3.1 vs Llama 3.3 performance comparison</td>\n",
|
|
|
+ " <td>To gather information on performance differenc...</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " report_index report_title query_index \\\n",
|
|
|
+ "0 0 Llama 3.3: A Revolutionary Leap in AI 0 \n",
|
|
|
+ "1 0 Llama 3.3: A Revolutionary Leap in AI 1 \n",
|
|
|
+ "2 0 Llama 3.3: A Revolutionary Leap in AI 2 \n",
|
|
|
+ "3 1 Llama 3.3 vs Llama 3.1: A Comparative Analysis 0 \n",
|
|
|
+ "4 1 Llama 3.3 vs Llama 3.1: A Comparative Analysis 1 \n",
|
|
|
+ "\n",
|
|
|
+ " query \\\n",
|
|
|
+ "0 Llama 3.3 new features and enhancements \n",
|
|
|
+ "1 Llama 3.3 vs Llama 3.1 performance comparison \n",
|
|
|
+ "2 Cost of running Llama 3.3 on cloud vs local in... \n",
|
|
|
+ "3 Llama 3.3 new features and improvements \n",
|
|
|
+ "4 Llama 3.1 vs Llama 3.3 performance comparison \n",
|
|
|
+ "\n",
|
|
|
+ " purpose \n",
|
|
|
+ "0 To gather information on the new features and ... \n",
|
|
|
+ "1 To gather information on the performance compa... \n",
|
|
|
+ "2 To gather information on the cost-effectivenes... \n",
|
|
|
+ "3 To gather information on new features and impr... \n",
|
|
|
+ "4 To gather information on performance differenc... "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "queries_df = pd.DataFrame(all_queries)\n",
|
|
|
+ "print(f\"Total queries extracted: {len(queries_df)}\")\n",
|
|
|
+ "queries_df.head()\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "''"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 28,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "SERPAPI_KEY = \"\"\n",
|
|
|
+ "SERPAPI_KEY"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def search_with_serpapi(query, num_results=5):\n",
|
|
|
+ " print(f\"Searching for: {query}\")\n",
|
|
|
+ " \n",
|
|
|
+ " params = {\n",
|
|
|
+ " \"engine\": \"google\",\n",
|
|
|
+ " \"q\": query,\n",
|
|
|
+ " \"api_key\": SERPAPI_KEY,\n",
|
|
|
+ " \"num\": num_results,\n",
|
|
|
+ " }\n",
|
|
|
+ " \n",
|
|
|
+ " search = GoogleSearch(params)\n",
|
|
|
+ " results = search.get_dict()\n",
|
|
|
+ " \n",
|
|
|
+ " # Check if we have organic results\n",
|
|
|
+ " if \"organic_results\" not in results:\n",
|
|
|
+ " print(f\"Warning: No organic results found for query: {query}\")\n",
|
|
|
+ " return []\n",
|
|
|
+ " \n",
|
|
|
+ " return results[\"organic_results\"]"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def fetch_html(url):\n",
|
|
|
+ " try:\n",
|
|
|
+ " headers = {\n",
|
|
|
+ " \"User-Agent\": \"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36\"\n",
|
|
|
+ " }\n",
|
|
|
+ " response = requests.get(url, headers=headers, timeout=10)\n",
|
|
|
+ " response.raise_for_status()\n",
|
|
|
+ " return response.text\n",
|
|
|
+ " except Exception as e:\n",
|
|
|
+ " print(f\"Error fetching HTML from {url}: {str(e)}\")\n",
|
|
|
+ " return None"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def save_html(html_content, report_index, report_title, query_index, query, result_index, title, url):\n",
|
|
|
+ " if html_content is None:\n",
|
|
|
+ " return None\n",
|
|
|
+ " \n",
|
|
|
+ " sanitized_report = report_title.replace(\" \", \"_\").replace(\":\", \"\").replace(\"/\", \"\")[:30]\n",
|
|
|
+ " sanitized_query = query.replace(\" \", \"_\").replace(\":\", \"\").replace(\"/\", \"\")[:30]\n",
|
|
|
+ " \n",
|
|
|
+ " url_hash = hashlib.md5(url.encode()).hexdigest()[:8]\n",
|
|
|
+ "\n",
|
|
|
+ " report_dir = results_dir / f\"report_{report_index}_{sanitized_report}\"\n",
|
|
|
+ " report_dir.mkdir(exist_ok=True)\n",
|
|
|
+ " \n",
|
|
|
+ " query_dir = report_dir / f\"query_{query_index}_{sanitized_query}\"\n",
|
|
|
+ " query_dir.mkdir(exist_ok=True)\n",
|
|
|
+ " \n",
|
|
|
+ " sanitized_title = ''.join(c if c.isalnum() or c in ['_', '-'] else '_' for c in title)[:30]\n",
|
|
|
+ " filename = f\"result_{result_index}_{url_hash}_{sanitized_title}.html\"\n",
|
|
|
+ " filepath = query_dir / filename\n",
|
|
|
+ "\n",
|
|
|
+ " with open(filepath, \"w\", encoding=\"utf-8\") as f:\n",
|
|
|
+ " f.write(html_content)\n",
|
|
|
+ " \n",
|
|
|
+ " metadata = {\n",
|
|
|
+ " \"report_index\": report_index,\n",
|
|
|
+ " \"report_title\": report_title,\n",
|
|
|
+ " \"query_index\": query_index,\n",
|
|
|
+ " \"query\": query,\n",
|
|
|
+ " \"result_index\": result_index,\n",
|
|
|
+ " \"title\": title,\n",
|
|
|
+ " \"url\": url,\n",
|
|
|
+ " \"timestamp\": time.strftime(\"%Y-%m-%d %H:%M:%S\")\n",
|
|
|
+ " }\n",
|
|
|
+ " \n",
|
|
|
+ " metadata_path = query_dir / f\"result_{result_index}_{url_hash}_metadata.json\"\n",
|
|
|
+ " with open(metadata_path, \"w\") as f:\n",
|
|
|
+ " json.dump(metadata, f, indent=2)\n",
|
|
|
+ " \n",
|
|
|
+ " return str(filepath)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def process_all_queries(queries_df):\n",
|
|
|
+ " results = []\n",
|
|
|
+ " \n",
|
|
|
+ " for index, row in queries_df.iterrows():\n",
|
|
|
+ " print(f\"\\nProcessing query {index + 1}/{len(queries_df)}\")\n",
|
|
|
+ " print(f\"Report: {row['report_title']}\")\n",
|
|
|
+ " print(f\"Query: {row['query']}\")\n",
|
|
|
+ " \n",
|
|
|
+ " search_results = search_with_serpapi(row['query'])\n",
|
|
|
+ " \n",
|
|
|
+ " query_results = []\n",
|
|
|
+ " for result_index, result in enumerate(search_results):\n",
|
|
|
+ " title = result.get('title', 'No Title')\n",
|
|
|
+ " url = result.get('link', '')\n",
|
|
|
+ " snippet = result.get('snippet', '')\n",
|
|
|
+ " \n",
|
|
|
+ " print(f\" Result {result_index + 1}: {title[:50]}...\")\n",
|
|
|
+ " \n",
|
|
|
+ " html_content = fetch_html(url)\n",
|
|
|
+ " filepath = save_html(\n",
|
|
|
+ " html_content, \n",
|
|
|
+ " row['report_index'], \n",
|
|
|
+ " row['report_title'],\n",
|
|
|
+ " row['query_index'], \n",
|
|
|
+ " row['query'], \n",
|
|
|
+ " result_index, \n",
|
|
|
+ " title, \n",
|
|
|
+ " url\n",
|
|
|
+ " )\n",
|
|
|
+ " \n",
|
|
|
+ " result_info = {\n",
|
|
|
+ " \"result_index\": result_index,\n",
|
|
|
+ " \"title\": title,\n",
|
|
|
+ " \"url\": url,\n",
|
|
|
+ " \"snippet\": snippet,\n",
|
|
|
+ " \"filepath\": filepath\n",
|
|
|
+ " }\n",
|
|
|
+ " \n",
|
|
|
+ " query_results.append(result_info)\n",
|
|
|
+ " \n",
|
|
|
+ " # Timeout\n",
|
|
|
+ " time.sleep(1)\n",
|
|
|
+ " \n",
|
|
|
+ " query_result = {\n",
|
|
|
+ " \"report_index\": row['report_index'],\n",
|
|
|
+ " \"report_title\": row['report_title'],\n",
|
|
|
+ " \"query_index\": row['query_index'],\n",
|
|
|
+ " \"query\": row['query'],\n",
|
|
|
+ " \"purpose\": row['purpose'],\n",
|
|
|
+ " \"results\": query_results\n",
|
|
|
+ " }\n",
|
|
|
+ " \n",
|
|
|
+ " results.append(query_result)\n",
|
|
|
+ " \n",
|
|
|
+ " with open(base_dir / \"results_so_far.json\", \"w\") as f:\n",
|
|
|
+ " json.dump(results, f, indent=2)\n",
|
|
|
+ " \n",
|
|
|
+ " return results\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 24,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "\n",
|
|
|
+ "Processing query 1/15\n",
|
|
|
+ "Report: Llama 3.3: A Revolutionary Leap in AI\n",
|
|
|
+ "Query: Llama 3.3 new features and enhancements\n",
|
|
|
+ "Searching for: Llama 3.3 new features and enhancements\n",
|
|
|
+ " Result 1: Introducing the new Llama 3.3: Features and Overvi...\n",
|
|
|
+ " Result 2: What is Meta Llama 3.3 70B? Features, Use Cases & ...\n",
|
|
|
+ " Result 3: Key Features and Improvements in LLaMA 3.3...\n",
|
|
|
+ " Result 4: Everything You Need to Know About Llama 3.3 | by A...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 2/15\n",
|
|
|
+ "Report: Llama 3.3: A Revolutionary Leap in AI\n",
|
|
|
+ "Query: Llama 3.3 vs Llama 3.1 performance comparison\n",
|
|
|
+ "Searching for: Llama 3.3 vs Llama 3.1 performance comparison\n",
|
|
|
+ " Result 1: Llama 3 vs 3.1 vs 3.2 : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: Llama 3.3 70B Instruct vs Llama 3.1 405B Instruct...\n",
|
|
|
+ " Result 3: Choosing the Best Llama Model: Llama 3 vs 3.1 vs 3...\n",
|
|
|
+ " Result 4: Llama 3.3 just dropped — is it better than GPT-4 o...\n",
|
|
|
+ " Result 5: Llama 3 vs Llama 3.1 : Which is Better for Your AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 3/15\n",
|
|
|
+ "Report: Llama 3.3: A Revolutionary Leap in AI\n",
|
|
|
+ "Query: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ "Searching for: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ " Result 1: What's the cost of running Llama3:8b & 70b in the ...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Llama 3.3 vs. ChatGPT Pro: Key Considerations...\n",
|
|
|
+ " Result 4: Llama 3.3 API Pricing: What You Need to Know...\n",
|
|
|
+ " Result 5: Llama models | Generative AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 4/15\n",
|
|
|
+ "Report: Llama 3.3 vs Llama 3.1: A Comparative Analysis\n",
|
|
|
+ "Query: Llama 3.3 new features and improvements\n",
|
|
|
+ "Searching for: Llama 3.3 new features and improvements\n",
|
|
|
+ " Result 1: What is Meta Llama 3.3 70B? Features, Use Cases & ...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Efficient, Accessible Generative AI on CPU with Ne...\n",
|
|
|
+ " Result 4: Meta Releases Llama 3.3: a Model with Enhanced Per...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 5/15\n",
|
|
|
+ "Report: Llama 3.3 vs Llama 3.1: A Comparative Analysis\n",
|
|
|
+ "Query: Llama 3.1 vs Llama 3.3 performance comparison\n",
|
|
|
+ "Searching for: Llama 3.1 vs Llama 3.3 performance comparison\n",
|
|
|
+ " Result 1: Llama 3 vs 3.1 vs 3.2 : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: Llama 3.3 70B Instruct vs Llama 3.1 405B Instruct...\n",
|
|
|
+ " Result 3: Llama 3.3 just dropped — is it better than GPT-4 o...\n",
|
|
|
+ " Result 4: Llama 3 vs Llama 3.1 : Which is Better for Your AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 6/15\n",
|
|
|
+ "Report: Llama 3.3 vs Llama 3.1: A Comparative Analysis\n",
|
|
|
+ "Query: Cost of running Llama 3.3 vs Llama 3.1 on cloud and local infrastructure\n",
|
|
|
+ "Searching for: Cost of running Llama 3.3 vs Llama 3.1 on cloud and local infrastructure\n",
|
|
|
+ " Result 1: What's the cost of running Llama3:8b & 70b in the ...\n",
|
|
|
+ " Result 2: The Million-Dollar Trick: LLAMA 3.1 is Free to Own...\n",
|
|
|
+ " Result 3: Decoding Llama 3 vs 3.1: Which One Is Right for Yo...\n",
|
|
|
+ " Result 4: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 5: Llama models | Generative AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 7/15\n",
|
|
|
+ "Report: The Cost-Benefit Analysis of Llama 3.3\n",
|
|
|
+ "Query: Llama 3.3 new features and improvements\n",
|
|
|
+ "Searching for: Llama 3.3 new features and improvements\n",
|
|
|
+ " Result 1: What is Meta Llama 3.3 70B? Features, Use Cases & ...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Efficient, Accessible Generative AI on CPU with Ne...\n",
|
|
|
+ " Result 4: Meta Releases Llama 3.3: a Model with Enhanced Per...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 8/15\n",
|
|
|
+ "Report: The Cost-Benefit Analysis of Llama 3.3\n",
|
|
|
+ "Query: Cost of running Llama 3.3 on cloud vs local\n",
|
|
|
+ "Searching for: Cost of running Llama 3.3 on cloud vs local\n",
|
|
|
+ " Result 1: Costs to run Llama 3.3 on cloud? : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Llama models | Generative AI...\n",
|
|
|
+ " Result 4: Meta Llama in the Cloud | Llama Everywhere...\n",
|
|
|
+ "Error fetching HTML from https://www.llama.com/docs/llama-everywhere/running-meta-llama-in-the-cloud/: 400 Client Error: Bad Request for url: https://www.llama.com/docs/llama-everywhere/running-meta-llama-in-the-cloud/\n",
|
|
|
+ " Result 5: Llama 3.3 vs. ChatGPT Pro: Key Considerations...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 9/15\n",
|
|
|
+ "Report: The Cost-Benefit Analysis of Llama 3.3\n",
|
|
|
+ "Query: Llama 3.3 vs Llama 3.1 performance comparison\n",
|
|
|
+ "Searching for: Llama 3.3 vs Llama 3.1 performance comparison\n",
|
|
|
+ " Result 1: Llama 3 vs 3.1 vs 3.2 : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: Llama 3.3 70B Instruct vs Llama 3.1 405B Instruct...\n",
|
|
|
+ " Result 3: Choosing the Best Llama Model: Llama 3 vs 3.1 vs 3...\n",
|
|
|
+ " Result 4: Llama 3.3 just dropped — is it better than GPT-4 o...\n",
|
|
|
+ " Result 5: Llama 3 vs Llama 3.1 : Which is Better for Your AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 10/15\n",
|
|
|
+ "Report: Llama 3.3: The Future of AI-Driven Innovation\n",
|
|
|
+ "Query: Llama 3.3 new features and enhancements\n",
|
|
|
+ "Searching for: Llama 3.3 new features and enhancements\n",
|
|
|
+ " Result 1: Introducing the new Llama 3.3: Features and Overvi...\n",
|
|
|
+ " Result 2: What is Meta Llama 3.3 70B? Features, Use Cases & ...\n",
|
|
|
+ " Result 3: Key Features and Improvements in LLaMA 3.3...\n",
|
|
|
+ " Result 4: Everything You Need to Know About Llama 3.3 | by A...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 11/15\n",
|
|
|
+ "Report: Llama 3.3: The Future of AI-Driven Innovation\n",
|
|
|
+ "Query: Llama 3.3 vs Llama 3.1 comparison\n",
|
|
|
+ "Searching for: Llama 3.3 vs Llama 3.1 comparison\n",
|
|
|
+ " Result 1: Llama 3 vs 3.1 vs 3.2 : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: Llama 3 vs Llama 3.1 : Which is Better for Your AI...\n",
|
|
|
+ " Result 3: Llama 3.3 70B Instruct vs Llama 3.1 405B Instruct...\n",
|
|
|
+ " Result 4: Llama 3.1 vs Llama 3 Differences - GoPenAI...\n",
|
|
|
+ " Result 5: Decoding Llama 3 vs 3.1: Which One Is Right for Yo...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 12/15\n",
|
|
|
+ "Report: Llama 3.3: The Future of AI-Driven Innovation\n",
|
|
|
+ "Query: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ "Searching for: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ " Result 1: What's the cost of running Llama3:8b & 70b in the ...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Llama 3.3 vs. ChatGPT Pro: Key Considerations...\n",
|
|
|
+ " Result 4: Llama 3.3 API Pricing: What You Need to Know...\n",
|
|
|
+ " Result 5: Llama models | Generative AI...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 13/15\n",
|
|
|
+ "Report: Llama 3.3: A Technical Deep Dive\n",
|
|
|
+ "Query: Llama 3.3 architecture and technical specifications\n",
|
|
|
+ "Searching for: Llama 3.3 architecture and technical specifications\n",
|
|
|
+ " Result 1: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 2: Introducing Meta Llama 3: The most capable openly ...\n",
|
|
|
+ "Error fetching HTML from https://ai.meta.com/blog/meta-llama-3/: 400 Client Error: Bad Request for url: https://ai.meta.com/blog/meta-llama-3/\n",
|
|
|
+ " Result 3: meta-llama/Llama-3.3-70B-Instruct...\n",
|
|
|
+ " Result 4: llama-3.3-70b-instruct Model by Meta...\n",
|
|
|
+ " Result 5: Llama-3.3-70B - Documentation & FAQ...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 14/15\n",
|
|
|
+ "Report: Llama 3.3: A Technical Deep Dive\n",
|
|
|
+ "Query: Llama 3.3 vs Llama 3.1 comparison\n",
|
|
|
+ "Searching for: Llama 3.3 vs Llama 3.1 comparison\n",
|
|
|
+ " Result 1: Llama 3 vs 3.1 vs 3.2 : r/LocalLLaMA...\n",
|
|
|
+ " Result 2: Llama 3 vs Llama 3.1 : Which is Better for Your AI...\n",
|
|
|
+ " Result 3: Llama 3.3 70B Instruct vs Llama 3.1 405B Instruct...\n",
|
|
|
+ " Result 4: Llama 3.1 vs Llama 3 Differences - GoPenAI...\n",
|
|
|
+ " Result 5: Decoding Llama 3 vs 3.1: Which One Is Right for Yo...\n",
|
|
|
+ "\n",
|
|
|
+ "Processing query 15/15\n",
|
|
|
+ "Report: Llama 3.3: A Technical Deep Dive\n",
|
|
|
+ "Query: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ "Searching for: Cost of running Llama 3.3 on cloud vs local infrastructure\n",
|
|
|
+ " Result 1: What's the cost of running Llama3:8b & 70b in the ...\n",
|
|
|
+ " Result 2: What Is Meta's Llama 3.3 70B? How It Works, Use Ca...\n",
|
|
|
+ "Error fetching HTML from https://www.datacamp.com/blog/llama-3-3-70b: 403 Client Error: Forbidden for url: https://www.datacamp.com/blog/llama-3-3-70b\n",
|
|
|
+ " Result 3: Llama 3.3 vs. ChatGPT Pro: Key Considerations...\n",
|
|
|
+ " Result 4: Llama 3.3 API Pricing: What You Need to Know...\n",
|
|
|
+ " Result 5: Llama models | Generative AI...\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "results = process_all_queries(queries_df)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "def analyze_results():\n",
|
|
|
+ "\n",
|
|
|
+ " try:\n",
|
|
|
+ " with open(base_dir / \"results_so_far.json\", \"r\") as f:\n",
|
|
|
+ " results = json.load(f)\n",
|
|
|
+ " \n",
|
|
|
+ " total_results = sum(len(query[\"results\"]) for query in results)\n",
|
|
|
+ " print(f\"Total queries processed: {len(results)}\")\n",
|
|
|
+ " print(f\"Total search results fetched: {total_results}\")\n",
|
|
|
+ " \n",
|
|
|
+ " summary_data = []\n",
|
|
|
+ " for query in results:\n",
|
|
|
+ " report_title = query[\"report_title\"]\n",
|
|
|
+ " query_text = query[\"query\"]\n",
|
|
|
+ " results_count = len(query[\"results\"])\n",
|
|
|
+ " \n",
|
|
|
+ " summary_data.append({\n",
|
|
|
+ " \"Report\": report_title,\n",
|
|
|
+ " \"Query\": query_text,\n",
|
|
|
+ " \"Results Count\": results_count\n",
|
|
|
+ " })\n",
|
|
|
+ " \n",
|
|
|
+ " summary_df = pd.DataFrame(summary_data)\n",
|
|
|
+ " return summary_df\n",
|
|
|
+ " except FileNotFoundError:\n",
|
|
|
+ " print(\"No results file found. Run the processing first.\")\n",
|
|
|
+ " return None\n"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 26,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "Total queries processed: 15\n",
|
|
|
+ "Total search results fetched: 70\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "summary_df = analyze_results()\n",
|
|
|
+ "# if summary_df is not None:\n",
|
|
|
+ "# summary_df"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "base",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.12.2"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|