|
@@ -0,0 +1,985 @@
|
|
|
|
|
+{
|
|
|
|
|
+ "cells": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "0e4aad87-ddd4-4b5e-a83f-63a75bd89f38",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# PowerPoint to Knowledge-Grounded & Narrative-Aware Voiceover Transcript Generator\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This cookbook demonstrates the complete workflow for converting PowerPoint presentations into AI-generated voiceover transcripts with retrieval augmentation and narrative continuity features, powered by Llama 4 Maverick's vision capabilities through the Llama API.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Overview\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This workflow performs the following operations:\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "1. **Content Extraction**: Pulls speaker notes and visual elements from PowerPoint slides\n",
|
|
|
|
|
+ "2. **Knowledge Base Integration**: Leverages external knowledge sources to enhance transcript quality (For the purposes of this cookbook, the knowledge_base folder)\n",
|
|
|
|
|
+ "3. **Image Conversion**: Transforms slides into high-quality images for analysis by Llama 4 Maverick.\n",
|
|
|
|
|
+ "4. **Context-Aware Generation**: Creates natural-sounding voiceover content with narrative continuity and knowledge-based insights\n",
|
|
|
|
|
+ " - **Speech Optimization**: Converts numbers, technical terms, and abbreviations to spoken form\n",
|
|
|
|
|
+ "6. **Results Export**: Saves transcripts, context information, and knowledge usage statistics in multiple formats\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Key Features\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "- **Knowledge Base Integration**: Automatically retrieves relevant information from markdown knowledge files\n",
|
|
|
|
|
+ "- **Unified Processor**: Single class handles both standard and narrative-aware processing with knowledge enhancement\n",
|
|
|
|
|
+ "- **Configurable Context**: Adjustable context window for narrative continuity and knowledge retrieval\n",
|
|
|
|
|
+ "- **Mode Selection**: Toggle between standard and narrative processing with optional knowledge integration\n",
|
|
|
|
|
+ "- **Performance Optimization**: Caching and lazy loading for efficient knowledge retrieval\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Prerequisites\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Before running this notebook, ensure you have:\n",
|
|
|
|
|
+ "- Created a `.env` file with your `LLAMA_API_KEY`\n",
|
|
|
|
|
+ "- Updated `config.yaml` with your presentation file path\n",
|
|
|
|
|
+ "- Set up your knowledge base directory with relevant markdown files (This cookbook only supports markdown format at the moment)\n",
|
|
|
|
|
+ "- Enabled knowledge base features in `config.yaml` (set `knowledge.enabled: true`)\n",
|
|
|
|
|
+ "\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "b3367845-76ad-4493-a312-f80f00fad029",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Setup and Configuration\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Import required libraries and load environment configuration."
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 50,
|
|
|
|
|
+ "id": "37249034-75bf-41bd-b640-eb6345435f47",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Import required libraries\n",
|
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
|
+ "import os\n",
|
|
|
|
|
+ "from pathlib import Path\n",
|
|
|
|
|
+ "from dotenv import load_dotenv\n",
|
|
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
|
|
+ "from IPython.display import display"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 51,
|
|
|
|
|
+ "id": "0aedb2c5-5762-43ae-826b-fdb45ff642f5",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "SUCCESS: Environment loaded successfully!\n",
|
|
|
|
|
+ "SUCCESS: GROQ API key found\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Load environment variables from .env file\n",
|
|
|
|
|
+ "load_dotenv()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Verify setup\n",
|
|
|
|
|
+ "if os.getenv('GROQ_API_KEY'):\n",
|
|
|
|
|
+ " print(\"SUCCESS: Environment loaded successfully!\")\n",
|
|
|
|
|
+ " print(\"SUCCESS: GROQ API key found\")\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " print(\"WARNING: GROQ_API_KEY not found in .env file\")\n",
|
|
|
|
|
+ " print(\"Please check your .env file and add your API key\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 52,
|
|
|
|
|
+ "id": "0563bb13-9dbd-4a29-9b3b-f565befd2001",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "SUCCESS: All modules imported successfully!\n",
|
|
|
|
|
+ "- PPTX processor ready\n",
|
|
|
|
|
+ "- Unified transcript generator ready\n",
|
|
|
|
|
+ "- Configuration manager ready\n",
|
|
|
|
|
+ "- Visualization generator ready\n",
|
|
|
|
|
+ "- FAISS knowledge base components ready\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Import custom modules\n",
|
|
|
|
|
+ "try:\n",
|
|
|
|
|
+ " from src.core.pptx_processor import extract_pptx_notes, pptx_to_images_and_notes\n",
|
|
|
|
|
+ " from src.processors.unified_transcript_generator import (\n",
|
|
|
|
|
+ " UnifiedTranscriptProcessor,\n",
|
|
|
|
|
+ " process_slides,\n",
|
|
|
|
|
+ " process_slides_with_narrative\n",
|
|
|
|
|
+ " )\n",
|
|
|
|
|
+ " from src.config.settings import load_config, get_config, is_knowledge_enabled\n",
|
|
|
|
|
+ " from src.utils.visualization import display_slide_grid, display_slide_preview\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " print(\"SUCCESS: All modules imported successfully!\")\n",
|
|
|
|
|
+ " print(\"- PPTX processor ready\")\n",
|
|
|
|
|
+ " print(\"- Unified transcript generator ready\")\n",
|
|
|
|
|
+ " print(\"- Configuration manager ready\")\n",
|
|
|
|
|
+ " print(\"- Visualization generator ready\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Try to import knowledge base modules\n",
|
|
|
|
|
+ " knowledge_available = False\n",
|
|
|
|
|
+ " try:\n",
|
|
|
|
|
+ " from src.knowledge.faiss_knowledge import FAISSKnowledgeManager\n",
|
|
|
|
|
+ " from src.knowledge.context_manager import ContextManager\n",
|
|
|
|
|
+ " knowledge_available = True\n",
|
|
|
|
|
+ " print(\"- FAISS knowledge base components ready\")\n",
|
|
|
|
|
+ " except ImportError as e:\n",
|
|
|
|
|
+ " print(f\"- WARNING: Knowledge base components not available: {e}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "except ImportError as e:\n",
|
|
|
|
|
+ " print(f\"ERROR: Import error: {e}\")\n",
|
|
|
|
|
+ " print(\"Make sure you're running from the project root directory\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 53,
|
|
|
|
|
+ "id": "cafe366c-3ec6-47c7-8e70-ed69e89ae137",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "SUCCESS: Configuration loaded successfully!\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Current Settings:\n",
|
|
|
|
|
+ "- Llama Model: meta-llama/llama-4-maverick-17b-128e-instruct\n",
|
|
|
|
|
+ "- Image DPI: 200\n",
|
|
|
|
|
+ "- Image Format: png\n",
|
|
|
|
|
+ "- Context Window: 5 previous slides (default)\n",
|
|
|
|
|
+ "- Knowledge Base: ENABLED\n",
|
|
|
|
|
+ " - Knowledge Directory: knowledge_base\n",
|
|
|
|
|
+ " - Context Strategy: combined\n",
|
|
|
|
|
+ " - Knowledge Weight: 0.3\n",
|
|
|
|
|
+ " - Embedding Model: all-MiniLM-L6-v2\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Load configuration\n",
|
|
|
|
|
+ "config = load_config()\n",
|
|
|
|
|
+ "print(\"\\nSUCCESS: Configuration loaded successfully!\")\n",
|
|
|
|
|
+ "print(\"\\nCurrent Settings:\")\n",
|
|
|
|
|
+ "print(f\"- Llama Model: {config['api']['groq_model']}\")\n",
|
|
|
|
|
+ "print(f\"- Image DPI: {config['processing']['default_dpi']}\")\n",
|
|
|
|
|
+ "print(f\"- Image Format: {config['processing']['default_format']}\")\n",
|
|
|
|
|
+ "print(f\"- Context Window: 5 previous slides (default)\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Display knowledge base configuration\n",
|
|
|
|
|
+ "knowledge_enabled = is_knowledge_enabled()\n",
|
|
|
|
|
+ "print(f\"- Knowledge Base: {'ENABLED' if knowledge_enabled else 'DISABLED'}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if knowledge_enabled:\n",
|
|
|
|
|
+ " knowledge_config = config.get('knowledge', {})\n",
|
|
|
|
|
+ " print(f\" - Knowledge Directory: {knowledge_config.get('knowledge_base_dir', 'knowledge_base')}\")\n",
|
|
|
|
|
+ " print(f\" - Context Strategy: {knowledge_config.get('context', {}).get('strategy', 'combined')}\")\n",
|
|
|
|
|
+ " print(f\" - Knowledge Weight: {knowledge_config.get('context', {}).get('knowledge_weight', 0.3)}\")\n",
|
|
|
|
|
+ " print(f\" - Embedding Model: {knowledge_config.get('embedding', {}).get('model_name', 'all-MiniLM-L6-v2')}\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "dd800f7d-3ae5-4291-89d4-32d5cfca6cc7",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "#### Don't forget to update the config file with your pptx file name!\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 54,
|
|
|
|
|
+ "id": "58642e4d-cb6f-4e6f-8543-c1290a0e258d",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "File Configuration:\n",
|
|
|
|
|
+ "- Input File: input/All About Llamas.pptx\n",
|
|
|
|
|
+ "- Output Directory: output/\n",
|
|
|
|
|
+ "- SUCCESS: Input file found (10.8 MB)\n",
|
|
|
|
|
+ "- SUCCESS: Output directory ready\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Configure file paths from config.yaml\n",
|
|
|
|
|
+ "pptx_file = config['current_project']['pptx_file'] + config['current_project']['extension']\n",
|
|
|
|
|
+ "output_dir = config['current_project']['output_dir']\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(\"File Configuration:\")\n",
|
|
|
|
|
+ "print(f\"- Input File: {pptx_file}\")\n",
|
|
|
|
|
+ "print(f\"- Output Directory: {output_dir}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Verify input file exists\n",
|
|
|
|
|
+ "if Path(pptx_file).exists():\n",
|
|
|
|
|
+ " file_size = Path(pptx_file).stat().st_size / 1024 / 1024\n",
|
|
|
|
|
+ " print(f\"- SUCCESS: Input file found ({file_size:.1f} MB)\")\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " print(f\"- ERROR: Input file not found: {pptx_file}\")\n",
|
|
|
|
|
+ " print(\" Please update the 'pptx_file' path in config.yaml\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Create output directory if needed\n",
|
|
|
|
|
+ "Path(output_dir).mkdir(parents=True, exist_ok=True)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Output directory ready\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "09cf9962-a9f0-4362-a72b-7c11f50772bb",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "## Knowledge Base Setup and Validation\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Set up and validate the knowledge base if enabled in configuration.\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 55,
|
|
|
|
|
+ "id": "e7666fa8-a4a4-4e7d-bf5d-e34ca992f9b0",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "def setup_knowledge_base(config):\n",
|
|
|
|
|
+ " \"\"\"Setup and validate knowledge base if enabled.\"\"\"\n",
|
|
|
|
|
+ " knowledge_enabled = is_knowledge_enabled()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " if not knowledge_enabled:\n",
|
|
|
|
|
+ " print(\"Knowledge base is disabled in configuration\")\n",
|
|
|
|
|
+ " return None, None\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " if not knowledge_available:\n",
|
|
|
|
|
+ " print(\"WARNING: Knowledge base is enabled but components are not available\")\n",
|
|
|
|
|
+ " return None, None\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " print(\"Setting up knowledge base...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " knowledge_config = config.get('knowledge', {})\n",
|
|
|
|
|
+ " knowledge_base_dir = Path(knowledge_config.get('knowledge_base_dir', 'knowledge_base'))\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Check if knowledge base directory exists and has content\n",
|
|
|
|
|
+ " if not knowledge_base_dir.exists():\n",
|
|
|
|
|
+ " print(f\"Creating knowledge base directory: {knowledge_base_dir}\")\n",
|
|
|
|
|
+ " knowledge_base_dir.mkdir(parents=True, exist_ok=True)\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Create sample knowledge base files for demonstration\n",
|
|
|
|
|
+ " create_sample_knowledge_base(knowledge_base_dir)\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # List existing knowledge files\n",
|
|
|
|
|
+ " md_files = list(knowledge_base_dir.rglob(\"*.md\"))\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " print(f\"Knowledge Base Status:\")\n",
|
|
|
|
|
+ " print(f\"- Directory: {knowledge_base_dir}\")\n",
|
|
|
|
|
+ " print(f\"- Markdown files found: {len(md_files)}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " if md_files:\n",
|
|
|
|
|
+ " print(\"- Available knowledge files:\")\n",
|
|
|
|
|
+ " for md_file in md_files:\n",
|
|
|
|
|
+ " file_size = md_file.stat().st_size\n",
|
|
|
|
|
+ " print(f\" - {md_file.name} ({file_size} bytes)\")\n",
|
|
|
|
|
+ " else:\n",
|
|
|
|
|
+ " print(\"- No knowledge files found\")\n",
|
|
|
|
|
+ " print(\"- Creating sample knowledge base for demonstration...\")\n",
|
|
|
|
|
+ " create_sample_knowledge_base(knowledge_base_dir)\n",
|
|
|
|
|
+ " md_files = list(knowledge_base_dir.rglob(\"*.md\"))\n",
|
|
|
|
|
+ " print(f\"- Created {len(md_files)} sample knowledge files\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Initialize knowledge manager\n",
|
|
|
|
|
+ " try:\n",
|
|
|
|
|
+ " # Get FAISS configuration from config\n",
|
|
|
|
|
+ " vector_config = knowledge_config.get('vector_store', {})\n",
|
|
|
|
|
+ " embedding_config = knowledge_config.get('embedding', {})\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Initialize FAISS knowledge manager with configuration\n",
|
|
|
|
|
+ " knowledge_manager = FAISSKnowledgeManager(\n",
|
|
|
|
|
+ " knowledge_base_dir=str(knowledge_base_dir),\n",
|
|
|
|
|
+ " index_type=vector_config.get('index_type', 'flat'),\n",
|
|
|
|
|
+ " embedding_model=embedding_config.get('model_name', 'all-MiniLM-L6-v2'),\n",
|
|
|
|
|
+ " use_gpu=vector_config.get('use_gpu', False)\n",
|
|
|
|
|
+ " )\n",
|
|
|
|
|
+ " knowledge_manager.initialize()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " context_manager = ContextManager()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " # Display knowledge base statistics\n",
|
|
|
|
|
+ " stats = knowledge_manager.get_stats()\n",
|
|
|
|
|
+ " print(f\"- Knowledge chunks loaded: {stats['total_chunks']}\")\n",
|
|
|
|
|
+ " print(f\"- Index type: {stats['index_type']}\")\n",
|
|
|
|
|
+ " print(f\"- Embedding model: {stats['embedding_model']}\")\n",
|
|
|
|
|
+ " print(f\"- Model loaded: {stats['model_loaded']}\")\n",
|
|
|
|
|
+ " print(f\"- Index loaded: {stats['index_loaded']}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " return knowledge_manager, context_manager\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " except Exception as e:\n",
|
|
|
|
|
+ " print(f\"ERROR: Failed to initialize knowledge base: {e}\")\n",
|
|
|
|
|
+ " import traceback\n",
|
|
|
|
|
+ " traceback.print_exc()\n",
|
|
|
|
|
+ " return None, None\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 56,
|
|
|
|
|
+ "id": "91f8fd6d-c142-4eb8-a72d-6640a7423af8",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "Setting up knowledge base...\n",
|
|
|
|
|
+ "Knowledge Base Status:\n",
|
|
|
|
|
+ "- Directory: knowledge_base\n",
|
|
|
|
|
+ "- Markdown files found: 2\n",
|
|
|
|
|
+ "- Available knowledge files:\n",
|
|
|
|
|
+ " - llama diet.md (5762 bytes)\n",
|
|
|
|
|
+ " - llamas.md (7567 bytes)\n",
|
|
|
|
|
+ "- Knowledge chunks loaded: 19\n",
|
|
|
|
|
+ "- Index type: flat\n",
|
|
|
|
|
+ "- Embedding model: all-MiniLM-L6-v2\n",
|
|
|
|
|
+ "- Model loaded: True\n",
|
|
|
|
|
+ "- Index loaded: True\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Setup knowledge base\n",
|
|
|
|
|
+ "knowledge_manager, context_manager = setup_knowledge_base(config)"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "85c830ee-c91f-452b-987e-1652efeb326a",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "## Processing Mode Configuration\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Choose your processing mode and configure the processor with knowledge integration.\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": 57,
|
|
|
|
|
+ "id": "290d9c7e-19db-44e0-b9c3-8973674b1010",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "Processing Mode Configuration:\n",
|
|
|
|
|
+ "- Mode: NARRATIVE CONTINUITY\n",
|
|
|
|
|
+ "- Context Window: 5 previous slides\n",
|
|
|
|
|
+ "- Knowledge Integration: ENABLED\n",
|
|
|
|
|
+ " - Knowledge chunks available: 19\n",
|
|
|
|
|
+ " - Search strategy: combined\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Configure processing mode with knowledge integration\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "USE_NARRATIVE = True # Set to False for standard processing, True for narrative continuity\n",
|
|
|
|
|
+ "CONTEXT_WINDOW_SIZE = 5 # Number of previous slides to use as context (only used when USE_NARRATIVE=True)\n",
|
|
|
|
|
+ "ENABLE_KNOWLEDGE = True # Set to False to disable knowledge base integration\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(\"Processing Mode Configuration:\")\n",
|
|
|
|
|
+ "if USE_NARRATIVE:\n",
|
|
|
|
|
+ " print(f\"- Mode: NARRATIVE CONTINUITY\")\n",
|
|
|
|
|
+ " print(f\"- Context Window: {CONTEXT_WINDOW_SIZE} previous slides\")\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " print(f\"- Mode: STANDARD PROCESSING\")\n",
|
|
|
|
|
+ " print(f\"- Features: Independent slide processing, faster execution\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"- Knowledge Integration: {'ENABLED' if ENABLE_KNOWLEDGE else 'DISABLED'}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if ENABLE_KNOWLEDGE and knowledge_manager:\n",
|
|
|
|
|
+ " print(f\" - Knowledge chunks available: {knowledge_manager.get_stats()['total_chunks']}\")\n",
|
|
|
|
|
+ " print(f\" - Search strategy: {config.get('knowledge', {}).get('context', {}).get('strategy', 'combined')}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Initialize the unified processor with knowledge integration\n",
|
|
|
|
|
+ "processor = UnifiedTranscriptProcessor(\n",
|
|
|
|
|
+ " use_narrative=USE_NARRATIVE,\n",
|
|
|
|
|
+ " context_window_size=CONTEXT_WINDOW_SIZE,\n",
|
|
|
|
|
+ " enable_knowledge=ENABLE_KNOWLEDGE\n",
|
|
|
|
|
+ ")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "2cd7bd6d-364a-4350-9f38-b988323fcdae",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "## Processing Pipeline\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Execute the main processing pipeline in three key steps.\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "1ce1e223-faf0-4ab3-996d-a451bed30fc9",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 1: Extract Content and Convert to Images\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Extract speaker notes and slide text, then convert the presentation to high-quality images for AI analysis.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "db3ad12e-03d8-45cb-9999-b167d2ab93c5",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stdout",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "PROCESSING: Converting PPTX to images and extracting notes...\n",
|
|
|
|
|
+ "Processing: All About Llamas.pptx\n",
|
|
|
|
|
+ "Extracting speaker notes...\n",
|
|
|
|
|
+ "Found notes on 10 of 10 slides\n",
|
|
|
|
|
+ "Notes df saved to: /Users/yucedincer/Desktop/Projects/llama-cookbook/end-to-end-use-cases/powerpoint-to-voiceover-transcript/output/All About Llamas_notes.csv\n",
|
|
|
|
|
+ "Converting to PDF...\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "name": "stderr",
|
|
|
|
|
+ "output_type": "stream",
|
|
|
|
|
+ "text": [
|
|
|
|
|
+ "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n",
|
|
|
|
|
+ "To disable this warning, you can either:\n",
|
|
|
|
|
+ "\t- Avoid using `tokenizers` before the fork if possible\n",
|
|
|
|
|
+ "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Converting PPTX to images and extracting notes...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "result = pptx_to_images_and_notes(\n",
|
|
|
|
|
+ " pptx_path=pptx_file,\n",
|
|
|
|
|
+ " output_dir=output_dir,\n",
|
|
|
|
|
+ " extract_notes=True\n",
|
|
|
|
|
+ ")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "notes_df = result['notes_df']\n",
|
|
|
|
|
+ "image_files = result['image_files']\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"\\nSUCCESS: Processing completed successfully!\")\n",
|
|
|
|
|
+ "print(f\"- Processed {len(image_files)} slides\")\n",
|
|
|
|
|
+ "print(f\"- Images saved to: {result['output_dir']}\")\n",
|
|
|
|
|
+ "print(f\"- Found notes on {notes_df['has_notes'].sum()} slides\")\n",
|
|
|
|
|
+ "print(f\"- DataFrame shape: {notes_df.shape}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Show sample data\n",
|
|
|
|
|
+ "print(\"\\nSample Data (First 5 slides):\")\n",
|
|
|
|
|
+ "display(notes_df[['slide_number', 'slide_title', 'has_notes', 'notes_word_count', 'slide_text_word_count']].head())\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Preview only the first 6 slide images\n",
|
|
|
|
|
+ "display_slide_preview(image_files, num_slides=6, max_cols=3)"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "bf5e8a23-c046-45f5-a7cd-14baa70854c2",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 2: Generate Knowledge-Enhanced Narrative-Aware AI Transcripts\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Use the Llama vision model to analyze each slide image and generate natural-sounding voiceover transcripts with both narrative continuity and knowledge base enhancement.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This enhanced process:\n",
|
|
|
|
|
+ "- Analyzes slide visual content using AI vision\n",
|
|
|
|
|
+ "- Retrieves relevant information from the knowledge base\n",
|
|
|
|
|
+ "- Uses transcripts from previous slides as context\n",
|
|
|
|
|
+ "- Combines slide content, speaker notes, and knowledge insights\n",
|
|
|
|
|
+ "- Generates speech-optimized transcripts with smooth transitions and enhanced accuracy\n",
|
|
|
|
|
+ "- Maintains consistent terminology throughout the presentation\n",
|
|
|
|
|
+ "- Converts numbers and technical terms to spoken form\n",
|
|
|
|
|
+ "\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "2c56a543-4ad7-4276-99d2-0be5c198782c",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Starting AI transcript generation with knowledge-enhanced unified processor...\")\n",
|
|
|
|
|
+ "print(f\"- Processing {len(notes_df)} slides\")\n",
|
|
|
|
|
+ "print(f\"- Using model: {config['api']['groq_model']}\")\n",
|
|
|
|
|
+ "print(f\"- Mode: {'Narrative Continuity' if USE_NARRATIVE else 'Standard Processing'}\")\n",
|
|
|
|
|
+ "print(f\"- Knowledge Integration: {'ENABLED' if ENABLE_KNOWLEDGE else 'DISABLED'}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if USE_NARRATIVE:\n",
|
|
|
|
|
+ " print(f\"- Context window: {CONTEXT_WINDOW_SIZE} previous slides\")\n",
|
|
|
|
|
+ " print(f\"- Using previous transcripts as context for narrative continuity\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if ENABLE_KNOWLEDGE and knowledge_manager:\n",
|
|
|
|
|
+ " print(f\"- Knowledge base: {knowledge_manager.get_stats()['total_chunks']} chunks available\")\n",
|
|
|
|
|
+ " print(f\"- Search strategy: {config.get('knowledge', {}).get('context', {}).get('strategy', 'combined')}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(\"- This may take several minutes...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Generate transcripts using the knowledge-enhanced unified processor\n",
|
|
|
|
|
+ "processed_df = processor.process_slides_dataframe(\n",
|
|
|
|
|
+ " df=notes_df,\n",
|
|
|
|
|
+ " output_dir=output_dir,\n",
|
|
|
|
|
+ " save_context=True # Only saves context if USE_NARRATIVE=True\n",
|
|
|
|
|
+ ")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"\\nSUCCESS: Transcript generation completed!\")\n",
|
|
|
|
|
+ "print(f\"- Generated {len(processed_df)} transcripts\")\n",
|
|
|
|
|
+ "print(f\"- Average length: {processed_df['ai_transcript'].str.len().mean():.0f} characters\")\n",
|
|
|
|
|
+ "print(f\"- Total words: {processed_df['ai_transcript'].str.split().str.len().sum():,}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if USE_NARRATIVE:\n",
|
|
|
|
|
+ " print(f\"- Context information saved to: {output_dir}narrative_context/\")\n",
|
|
|
|
|
+ " print(f\"- Average context slides used: {processed_df['context_slides_used'].mean():.1f}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "if ENABLE_KNOWLEDGE and knowledge_manager:\n",
|
|
|
|
|
+ " print(f\"- Knowledge base integration: Active during processing\")\n",
|
|
|
|
|
+ " print(f\"- Enhanced transcripts with domain-specific information\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "2cd0590b-66af-4653-a3e1-5d4eb9a845af",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Show first 5 transcripts with detailed knowledge information\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "from src.utils.transcript_display import show_transcripts_with_knowledge\n",
|
|
|
|
|
+ "show_transcripts_with_knowledge(processed_df, knowledge_manager, num_slides=5)\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "e2d617a6-33c3-4747-86d3-5a4161aa857c",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "b4dd6593-6539-4d4f-baa7-678fed43165d",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "207e169d-7668-4d75-b2d7-265504175ec7",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "25258057-dad3-4ced-adfd-8e399eb2bae6",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "b6aec6d2-f001-46a7-bf5d-2e29318d5f82",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 3: Save Results\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Save results in multiple formats with knowledge integration metadata.\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "ff2f8de2-121b-4e98-a426-80c37cb19da1",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Saving knowledge-enhanced results in multiple formats...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Create output directory\n",
|
|
|
|
|
+ "os.makedirs(output_dir, exist_ok=True)\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Determine file prefix based on processing mode and knowledge integration\n",
|
|
|
|
|
+ "mode_prefix = \"narrative\" if USE_NARRATIVE else \"standard\"\n",
|
|
|
|
|
+ "knowledge_prefix = \"knowledge_enhanced\" if ENABLE_KNOWLEDGE else \"standard\"\n",
|
|
|
|
|
+ "file_prefix = f\"{knowledge_prefix}_{mode_prefix}\"\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save complete results with all metadata\n",
|
|
|
|
|
+ "output_file = f\"{output_dir}{file_prefix}_transcripts.csv\"\n",
|
|
|
|
|
+ "processed_df.to_csv(output_file, index=False)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Complete results saved to {output_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save transcript-only version for voiceover work\n",
|
|
|
|
|
+ "if USE_NARRATIVE:\n",
|
|
|
|
|
+ " transcript_only = processed_df[['slide_number', 'slide_title', 'ai_transcript', 'context_slides_used']]\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " transcript_only = processed_df[['slide_number', 'slide_title', 'ai_transcript']]\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "transcript_file = f\"{output_dir}{file_prefix}_transcripts_clean.csv\"\n",
|
|
|
|
|
+ "transcript_only.to_csv(transcript_file, index=False)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Clean transcripts saved to {transcript_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save as JSON for API integration\n",
|
|
|
|
|
+ "json_file = f\"{output_dir}{file_prefix}_transcripts.json\"\n",
|
|
|
|
|
+ "processed_df.to_json(json_file, orient='records', indent=2)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: JSON format saved to {json_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save knowledge base statistics if available\n",
|
|
|
|
|
+ "if ENABLE_KNOWLEDGE and knowledge_manager:\n",
|
|
|
|
|
+ " knowledge_stats_file = f\"{output_dir}knowledge_base_stats.json\"\n",
|
|
|
|
|
+ " stats = knowledge_manager.get_stats()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " import json\n",
|
|
|
|
|
+ " with open(knowledge_stats_file, 'w') as f:\n",
|
|
|
|
|
+ " json.dump(stats, f, indent=2)\n",
|
|
|
|
|
+ " print(f\"- SUCCESS: Knowledge base statistics saved to {knowledge_stats_file}\")\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "d49d7cb9-e598-4511-875b-1629a4373a67",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ " "
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "4b2e1671-9495-45bb-9ac1-a02a83037eb5",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "---\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Completion Summary\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Successfully Generated:\n",
|
|
|
|
|
+ "- **Knowledge-Enhanced Processing**: Integrated external knowledge base with transcript generation\n",
|
|
|
|
|
+ "- **Unified Processing**: Single processor handles standard, narrative, and knowledge-enhanced modes\n",
|
|
|
|
|
+ "- **Flexible Configuration**: Easy switching between processing modes and knowledge integration\n",
|
|
|
|
|
+ "- **Speech-Optimized Transcripts**: Natural-sounding voiceover content enhanced with domain knowledge\n",
|
|
|
|
|
+ "- **Multiple Formats**: CSV, JSON exports for different use cases\n",
|
|
|
|
|
+ "- **Context Analysis**: Detailed information about narrative flow and knowledge usage\n",
|
|
|
|
|
+ "- **Performance Optimization**: Efficient knowledge retrieval with caching and lazy loading\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Output Files:\n",
|
|
|
|
|
+ "- `[knowledge_mode]_[narrative_mode]_transcripts.csv` - Complete dataset with metadata\n",
|
|
|
|
|
+ "- `[knowledge_mode]_[narrative_mode]_transcripts_clean.csv` - Clean transcripts for voiceover work\n",
|
|
|
|
|
+ "- `[knowledge_mode]_[narrative_mode]_transcripts.json` - JSON format for API integration\n",
|
|
|
|
|
+ "- `knowledge_base_stats.json` - Knowledge base usage statistics\n",
|
|
|
|
|
+ "- `narrative_context/` - Context analysis files (narrative mode only)\n",
|
|
|
|
|
+ "- Individual slide images in PNG/JPEG format\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Processing Modes:\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Standard Mode (`USE_NARRATIVE = False`, `ENABLE_KNOWLEDGE = False`)\n",
|
|
|
|
|
+ "- **Best for**: Simple presentations, quick processing, independent slides\n",
|
|
|
|
|
+ "- **Features**: Fast execution, no context dependencies\n",
|
|
|
|
|
+ "- **Use cases**: Training materials, product demos, standalone slides\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Knowledge-Enhanced Standard Mode (`USE_NARRATIVE = False`, `ENABLE_KNOWLEDGE = True`)\n",
|
|
|
|
|
+ "- **Best for**: Technical presentations requiring domain expertise\n",
|
|
|
|
|
+ "- **Features**: Domain knowledge integration, improved accuracy\n",
|
|
|
|
|
+ "- **Use cases**: Technical documentation, educational materials, expert presentations\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Narrative Mode (`USE_NARRATIVE = True`, `ENABLE_KNOWLEDGE = False`)\n",
|
|
|
|
|
+ "- **Best for**: Story-driven presentations, complex topics, educational content\n",
|
|
|
|
|
+ "- **Features**: Context awareness, smooth transitions, terminology consistency\n",
|
|
|
|
|
+ "- **Use cases**: Conference talks, educational courses, marketing presentations\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Knowledge-Enhanced Narrative Mode (`USE_NARRATIVE = True`, `ENABLE_KNOWLEDGE = True`)\n",
|
|
|
|
|
+ "- **Best for**: Complex educational content requiring both continuity and expertise\n",
|
|
|
|
|
+ "- **Features**: Full context awareness, domain knowledge, smooth transitions, enhanced accuracy\n",
|
|
|
|
|
+ "- **Use cases**: Advanced training, academic presentations, expert-level educational content\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Knowledge Base Features:\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Automatic Knowledge Retrieval\n",
|
|
|
|
|
+ "- **Semantic Search**: Uses embedding models to find relevant knowledge chunks\n",
|
|
|
|
|
+ "- **Context Integration**: Seamlessly blends knowledge with slide content and speaker notes\n",
|
|
|
|
|
+ "- **Fallback Mechanisms**: Graceful degradation if knowledge components fail\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Performance Optimization\n",
|
|
|
|
|
+ "- **Caching**: Stores embeddings and search results for faster processing\n",
|
|
|
|
|
+ "- **Lazy Loading**: Loads knowledge components only when needed\n",
|
|
|
|
|
+ "- **Memory Management**: Efficient memory usage with configurable limits\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Configuration Options\n",
|
|
|
|
|
+ "- **Search Strategy**: Choose between knowledge-only, narrative-priority, or combined approaches\n",
|
|
|
|
|
+ "- **Knowledge Weight**: Adjust the influence of knowledge base content\n",
|
|
|
|
|
+ "- **Similarity Threshold**: Control the relevance threshold for knowledge retrieval\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Next Steps:\n",
|
|
|
|
|
+ "1. **Review** generated transcripts for accuracy, flow, and knowledge integration quality\n",
|
|
|
|
|
+ "2. **Customize** knowledge base with domain-specific content for your presentations\n",
|
|
|
|
|
+ "3. **Tune** knowledge integration parameters for optimal results\n",
|
|
|
|
|
+ "4. **Edit** any content that needs refinement\n",
|
|
|
|
|
+ "5. **Create** voiceover recordings or use TTS systems\n",
|
|
|
|
|
+ "6. **Integrate** JSON data into your video production workflow\n",
|
|
|
|
|
+ "7. **Experiment** with different processing modes and knowledge settings\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Tips for Better Results:\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Knowledge Base Optimization\n",
|
|
|
|
|
+ "- **Rich Content**: Include comprehensive, well-structured markdown files in your knowledge base\n",
|
|
|
|
|
+ "- **Relevant Topics**: Ensure knowledge base content aligns with your presentation topics\n",
|
|
|
|
|
+ "- **Clear Structure**: Use proper markdown headers and sections for better chunk extraction\n",
|
|
|
|
|
+ "- **Regular Updates**: Keep knowledge base content current and accurate\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Processing Mode Selection\n",
|
|
|
|
|
+ "- **Simple Presentations**: Use standard mode for quick, independent slide processing\n",
|
|
|
|
|
+ "- **Technical Content**: Enable knowledge integration for domain-specific accuracy\n",
|
|
|
|
|
+ "- **Story-Driven Content**: Use narrative mode for presentations with logical flow\n",
|
|
|
|
|
+ "- **Complex Educational Material**: Combine both narrative and knowledge features\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Configuration Tuning\n",
|
|
|
|
|
+ "- **Context Window**: Adjust context window size (3-7 slides) based on presentation complexity\n",
|
|
|
|
|
+ "- **Knowledge Weight**: Fine-tune knowledge influence (0.1-0.5) based on content needs\n",
|
|
|
|
|
+ "- **Search Parameters**: Adjust similarity threshold and top-k values for optimal knowledge retrieval\n",
|
|
|
|
|
+ "- **Consistent Style**: Maintain consistent formatting across your presentation\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Performance Considerations\n",
|
|
|
|
|
+ "- **Memory Usage**: Monitor knowledge base memory consumption for large knowledge bases\n",
|
|
|
|
|
+ "- **Processing Time**: Knowledge integration adds processing time but improves quality\n",
|
|
|
|
|
+ "- **Caching**: Enable caching for repeated processing of the same presentations\n",
|
|
|
|
|
+ "- **Batch Processing**: Process multiple presentations efficiently with shared knowledge base\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "---\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Advanced Features\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Custom Knowledge Base Creation\n",
|
|
|
|
|
+ "Create domain-specific knowledge bases by:\n",
|
|
|
|
|
+ "1. **Organizing Content**: Structure markdown files by topic, domain, or presentation type\n",
|
|
|
|
|
+ "2. **Using Headers**: Employ clear markdown headers for better chunk extraction\n",
|
|
|
|
|
+ "3. **Including Examples**: Add concrete examples and case studies\n",
|
|
|
|
|
+ "4. **Maintaining Quality**: Ensure accuracy and relevance of knowledge content\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Integration with Existing Workflows\n",
|
|
|
|
|
+ "- **API Integration**: Use JSON output for seamless integration with video production tools\n",
|
|
|
|
|
+ "- **Batch Processing**: Process multiple presentations with shared knowledge bases\n",
|
|
|
|
|
+ "- **Custom Prompts**: Modify system prompts for specific use cases or audiences\n",
|
|
|
|
|
+ "- **Quality Assurance**: Implement review workflows for generated transcripts\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "### Troubleshooting Common Issues\n",
|
|
|
|
|
+ "- **Knowledge Base Not Loading**: Check file paths and permissions\n",
|
|
|
|
|
+ "- **Poor Knowledge Retrieval**: Adjust similarity thresholds and search parameters\n",
|
|
|
|
|
+ "- **Memory Issues**: Reduce knowledge base size or enable lazy loading\n",
|
|
|
|
|
+ "- **Processing Errors**: Enable graceful degradation for robust processing\n"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "365e0d35-3a76-4dbf-b83f-5f43b7613e3b",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "fb21e0eb-6534-4a74-a8be-11585c5816ea",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "2f267a58-d43e-4a4c-96e9-7875754f1b80",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "b3a09ab5-3bff-49e0-8ad3-75dc8153a9f0",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "a964a651-8a63-4d20-bc9c-49a043a8cf64",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "ff14ceca-df4a-4ea5-8da1-468be8367e07",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "5778eee0-ee4f-421b-81a3-a74f13b43d36",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "787e632c-9866-4283-82b0-c26d101a1020",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "52aaaa68-1553-4bd6-b2bd-25604f4515c2",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "82861c16-c84e-401f-94a4-343512b29bf6",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "c464ba5b-8c55-45d3-be5c-2f2258ea327f",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "0af5695f-f436-4652-82bd-233b014d3a15",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "f29420c4-40c9-48e5-9331-e5b487fb084e",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "ed3bae75-4ed0-4742-81af-2b0f155de947",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "a24c87dd-97b7-434a-b669-471f5dc3af29",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "54a23f65-207d-476c-9aac-ea61f5af6804",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "e7493249-d0e4-4ef9-bdf4-fffea7f9de70",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "2596be10-0ff9-4440-9385-dd135fc3a633",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "01fd46be-46b9-4fcf-b4ed-2cc31f5fed07",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "a203ca3d-92ba-40f6-be14-fa1943531a3d",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "metadata": {
|
|
|
|
|
+ "kernelspec": {
|
|
|
|
|
+ "display_name": "pptxTTS",
|
|
|
|
|
+ "language": "python",
|
|
|
|
|
+ "name": "pptxtts"
|
|
|
|
|
+ },
|
|
|
|
|
+ "language_info": {
|
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
|
+ "name": "ipython",
|
|
|
|
|
+ "version": 3
|
|
|
|
|
+ },
|
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
|
+ "name": "python",
|
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
|
+ "version": "3.13.2"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ "nbformat": 4,
|
|
|
|
|
+ "nbformat_minor": 5
|
|
|
|
|
+}
|