{ "cells": [ { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "#!pip install google-search-results" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import json\n", "import os\n", "import time\n", "from serpapi import GoogleSearch\n", "import requests\n", "import hashlib\n", "from pathlib import Path\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "base_dir = Path(\"llama_data\")\n", "src_dir = base_dir / \"src\"\n", "results_dir = base_dir / \"results\"" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "base_dir.mkdir(exist_ok=True)\n", "src_dir.mkdir(exist_ok=True)\n", "results_dir.mkdir(exist_ok=True)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "with open('generated_outlines.json', 'r') as file:\n", " content = file.read()\n", " data = json.loads(content)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Loaded 5 report outlines\n" ] } ], "source": [ "print(f\"Loaded {len(data)} report outlines\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Sample report title: Llama 3.3: A Revolutionary Leap in AI\n", "Sample queries:\n", "- Llama 3.3 new features and enhancements: To gather information on the new features and enhancements in Llama 3.3\n", "- Llama 3.3 vs Llama 3.1 performance comparison: To gather information on the performance comparison between Llama 3.3 and Llama 3.1\n" ] } ], "source": [ "print(\"\\nSample report title:\", data[0].get('original_goal', {}).get('Report Title', 'No title'))\n", "print(\"Sample queries:\")\n", "for query in data[0].get('Web Queries', [])[:2]:\n", " print(f\"- {query.get('query')}: {query.get('purpose')}\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "all_queries = []" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "for report_index, report_data in enumerate(data):\n", " report_title = report_data.get('original_goal', {}).get('Report Title', f\"Report {report_index}\")\n", " \n", " for query_index, query_data in enumerate(report_data.get('Web Queries', [])):\n", " query = query_data.get('query', '')\n", " purpose = query_data.get('purpose', '')\n", " \n", " all_queries.append({\n", " 'report_index': report_index,\n", " 'report_title': report_title,\n", " 'query_index': query_index,\n", " 'query': query,\n", " 'purpose': purpose\n", " })" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total queries extracted: 15\n" ] }, { "data": { "text/html": [ "
| \n", " | report_index | \n", "report_title | \n", "query_index | \n", "query | \n", "purpose | \n", "
|---|---|---|---|---|---|
| 0 | \n", "0 | \n", "Llama 3.3: A Revolutionary Leap in AI | \n", "0 | \n", "Llama 3.3 new features and enhancements | \n", "To gather information on the new features and ... | \n", "
| 1 | \n", "0 | \n", "Llama 3.3: A Revolutionary Leap in AI | \n", "1 | \n", "Llama 3.3 vs Llama 3.1 performance comparison | \n", "To gather information on the performance compa... | \n", "
| 2 | \n", "0 | \n", "Llama 3.3: A Revolutionary Leap in AI | \n", "2 | \n", "Cost of running Llama 3.3 on cloud vs local in... | \n", "To gather information on the cost-effectivenes... | \n", "
| 3 | \n", "1 | \n", "Llama 3.3 vs Llama 3.1: A Comparative Analysis | \n", "0 | \n", "Llama 3.3 new features and improvements | \n", "To gather information on new features and impr... | \n", "
| 4 | \n", "1 | \n", "Llama 3.3 vs Llama 3.1: A Comparative Analysis | \n", "1 | \n", "Llama 3.1 vs Llama 3.3 performance comparison | \n", "To gather information on performance differenc... | \n", "