|
@@ -0,0 +1,355 @@
|
|
|
|
|
+{
|
|
|
|
|
+ "cells": [
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "6c33ba3a",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# PowerPoint to Narrative-Aware Voiceover Transcript Generator\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This notebook demonstrates the complete workflow for converting PowerPoint presentations into AI-generated voiceover transcripts with narrative continuity using Llama 4 Maverick through the Llama API.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Overview\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This enhanced workflow performs the following operations:\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "1. **Content Extraction**: Pulls speaker notes and visual elements from PowerPoint slides\n",
|
|
|
|
|
+ "2. **Image Conversion**: Transforms slides into high-quality images for AI analysis\n",
|
|
|
|
|
+ "3. **Narrative-Aware Processing**: Uses previous slide transcripts as context for continuity\n",
|
|
|
|
|
+ "4. **Transcript Generation**: Creates natural-sounding voiceover content with smooth transitions\n",
|
|
|
|
|
+ "5. **Speech Optimization**: Converts numbers, technical terms, and abbreviations to spoken form\n",
|
|
|
|
|
+ "6. **Results Export**: Saves transcripts and context information in multiple formats\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Prerequisites\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Before running this notebook, ensure you have:\n",
|
|
|
|
|
+ "- Created a `.env` file with your `LLAMA_API_KEY`\n",
|
|
|
|
|
+ "- Updated `config.yaml` with your presentation file path\n",
|
|
|
|
|
+ "---"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "d8965447",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "## Setup and Configuration\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Import required libraries and load environment configuration."
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "21a962b2",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Import required libraries\n",
|
|
|
|
|
+ "import pandas as pd\n",
|
|
|
|
|
+ "import os\n",
|
|
|
|
|
+ "from pathlib import Path\n",
|
|
|
|
|
+ "from dotenv import load_dotenv\n",
|
|
|
|
|
+ "import matplotlib.pyplot as plt\n",
|
|
|
|
|
+ "from IPython.display import display\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Load environment variables from .env file\n",
|
|
|
|
|
+ "load_dotenv()\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Verify setup\n",
|
|
|
|
|
+ "if os.getenv('LLAMA_API_KEY'):\n",
|
|
|
|
|
+ " print(\"SUCCESS: Environment loaded successfully!\")\n",
|
|
|
|
|
+ " print(\"SUCCESS: Llama API key found\")\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " print(\"WARNING: LLAMA_API_KEY not found in .env file\")\n",
|
|
|
|
|
+ " print(\"Please check your .env file and add your API key\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "71c1c8bd",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Import custom modules\n",
|
|
|
|
|
+ "try:\n",
|
|
|
|
|
+ " from src.core.pptx_processor import extract_pptx_notes, pptx_to_images_and_notes\n",
|
|
|
|
|
+ " from src.processors.narrative_transcript_generator import (\n",
|
|
|
|
|
+ " NarrativeTranscriptProcessor,\n",
|
|
|
|
|
+ " process_slides_with_narrative\n",
|
|
|
|
|
+ " )\n",
|
|
|
|
|
+ " from src.config.settings import load_config, get_config\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ " print(\"SUCCESS: All modules imported successfully!\")\n",
|
|
|
|
|
+ " print(\"- PPTX processor ready\")\n",
|
|
|
|
|
+ " print(\"- Narrative transcript generator ready\")\n",
|
|
|
|
|
+ " print(\"- Configuration manager ready\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "except ImportError as e:\n",
|
|
|
|
|
+ " print(f\"ERROR: Import error: {e}\")\n",
|
|
|
|
|
+ " print(\"Make sure you're running from the project root directory\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "53781172",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Load and display configuration\n",
|
|
|
|
|
+ "config = load_config()\n",
|
|
|
|
|
+ "print(\"SUCCESS: Configuration loaded successfully!\")\n",
|
|
|
|
|
+ "print(\"\\nCurrent Settings:\")\n",
|
|
|
|
|
+ "print(f\"- Llama Model: {config['api']['llama_model']}\")\n",
|
|
|
|
|
+ "print(f\"- Image DPI: {config['processing']['default_dpi']}\")\n",
|
|
|
|
|
+ "print(f\"- Image Format: {config['processing']['default_format']}\")\n",
|
|
|
|
|
+ "print(f\"- Context Window: 5 previous slides (default)\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "9386e035",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "# Configure file paths from config.yaml\n",
|
|
|
|
|
+ "pptx_file = config['current_project']['pptx_file'] + config['current_project']['extension']\n",
|
|
|
|
|
+ "output_dir = config['current_project']['output_dir']\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(\"File Configuration:\")\n",
|
|
|
|
|
+ "print(f\"- Input File: {pptx_file}\")\n",
|
|
|
|
|
+ "print(f\"- Output Directory: {output_dir}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Verify input file exists\n",
|
|
|
|
|
+ "if Path(pptx_file).exists():\n",
|
|
|
|
|
+ " file_size = Path(pptx_file).stat().st_size / 1024 / 1024\n",
|
|
|
|
|
+ " print(f\"- SUCCESS: Input file found ({file_size:.1f} MB)\")\n",
|
|
|
|
|
+ "else:\n",
|
|
|
|
|
+ " print(f\"- ERROR: Input file not found: {pptx_file}\")\n",
|
|
|
|
|
+ " print(\" Please update the 'pptx_file' path in config.yaml\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Create output directory if needed\n",
|
|
|
|
|
+ "Path(output_dir).mkdir(parents=True, exist_ok=True)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Output directory ready\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "ea4851e6",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "---\n",
|
|
|
|
|
+ "## Processing Pipeline\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Execute the main processing pipeline in three key steps."
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "0f098fdf",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 1: Extract Content and Convert to Images\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Extract speaker notes and slide text, then convert the presentation to high-quality images for AI analysis."
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "644ee94c",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Converting PPTX to images and extracting notes...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "result = pptx_to_images_and_notes(\n",
|
|
|
|
|
+ " pptx_path=pptx_file,\n",
|
|
|
|
|
+ " output_dir=output_dir,\n",
|
|
|
|
|
+ " extract_notes=True\n",
|
|
|
|
|
+ ")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "notes_df = result['notes_df']\n",
|
|
|
|
|
+ "image_files = result['image_files']\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"\\nSUCCESS: Processing completed successfully!\")\n",
|
|
|
|
|
+ "print(f\"- Processed {len(image_files)} slides\")\n",
|
|
|
|
|
+ "print(f\"- Images saved to: {result['output_dir']}\")\n",
|
|
|
|
|
+ "print(f\"- Found notes on {notes_df['has_notes'].sum()} slides\")\n",
|
|
|
|
|
+ "print(f\"- DataFrame shape: {notes_df.shape}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Show sample data\n",
|
|
|
|
|
+ "print(\"\\nSample Data (First 5 slides):\")\n",
|
|
|
|
|
+ "display(notes_df[['slide_number', 'slide_title', 'has_notes', 'notes_word_count', 'slide_text_word_count']].head())"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "1f95749d",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 2: Generate Narrative-Aware AI Transcripts\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Use the Llama vision model to analyze each slide image and generate natural-sounding voiceover transcripts with narrative continuity.\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "This process:\n",
|
|
|
|
|
+ "- Analyzes slide visual content using AI vision\n",
|
|
|
|
|
+ "- Uses transcripts from previous slides as context\n",
|
|
|
|
|
+ "- Combines slide content with speaker notes\n",
|
|
|
|
|
+ "- Generates speech-optimized transcripts with smooth transitions\n",
|
|
|
|
|
+ "- Maintains consistent terminology throughout the presentation\n",
|
|
|
|
|
+ "- Converts numbers and technical terms to spoken form"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "fe564b99",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Starting narrative-aware AI transcript generation...\")\n",
|
|
|
|
|
+ "print(f\"- Processing {len(notes_df)} slides\")\n",
|
|
|
|
|
+ "print(f\"- Using model: {config['api']['llama_model']}\")\n",
|
|
|
|
|
+ "print(f\"- Context window: 5 previous slides\")\n",
|
|
|
|
|
+ "print(f\"- Using previous transcripts as context for narrative continuity\")\n",
|
|
|
|
|
+ "print(\"- This may take several minutes...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Initialize processor and generate transcripts with narrative continuity\n",
|
|
|
|
|
+ "processor = NarrativeTranscriptProcessor(context_window_size=5)\n",
|
|
|
|
|
+ "processed_df = processor.process_slides_dataframe_with_narrative(\n",
|
|
|
|
|
+ " df=notes_df,\n",
|
|
|
|
|
+ " output_dir=output_dir,\n",
|
|
|
|
|
+ " save_context=True\n",
|
|
|
|
|
+ ")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"\\nSUCCESS: Narrative-aware transcript generation completed!\")\n",
|
|
|
|
|
+ "print(f\"- Generated {len(processed_df)} transcripts\")\n",
|
|
|
|
|
+ "print(f\"- Average length: {processed_df['ai_transcript'].str.len().mean():.0f} characters\")\n",
|
|
|
|
|
+ "print(f\"- Total words: {processed_df['ai_transcript'].str.split().str.len().sum():,}\")\n",
|
|
|
|
|
+ "print(f\"- Context information saved to: {output_dir}narrative_context/\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "5cff4b70",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "### Step 3: Save Results\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "Save results in multiple formats for different use cases."
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "8463ac3a",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "print(\"PROCESSING: Saving results in multiple formats...\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Create output directory\n",
|
|
|
|
|
+ "os.makedirs(output_dir, exist_ok=True)\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save complete results with all metadata\n",
|
|
|
|
|
+ "output_file = f\"{output_dir}narrative_transcripts.csv\"\n",
|
|
|
|
|
+ "processed_df.to_csv(output_file, index=False)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Complete results saved to {output_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save transcript-only version for voiceover work\n",
|
|
|
|
|
+ "transcript_only = processed_df[['slide_number', 'slide_title', 'ai_transcript', 'context_slides_used']]\n",
|
|
|
|
|
+ "transcript_file = f\"{output_dir}narrative_transcripts_clean.csv\"\n",
|
|
|
|
|
+ "transcript_only.to_csv(transcript_file, index=False)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: Clean transcripts saved to {transcript_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Save as JSON for API integration\n",
|
|
|
|
|
+ "json_file = f\"{output_dir}narrative_transcripts.json\"\n",
|
|
|
|
|
+ "processed_df.to_json(json_file, orient='records', indent=2)\n",
|
|
|
|
|
+ "print(f\"- SUCCESS: JSON format saved to {json_file}\")\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "# Summary statistics\n",
|
|
|
|
|
+ "total_words = processed_df['ai_transcript'].str.split().str.len().sum()\n",
|
|
|
|
|
+ "reading_time = total_words / 150 # Assuming 150 words per minute\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "print(f\"\\nExport Summary:\")\n",
|
|
|
|
|
+ "print(f\"- Total slides processed: {len(processed_df)}\")\n",
|
|
|
|
|
+ "print(f\"- Slides with speaker notes: {processed_df['has_notes'].sum()}\")\n",
|
|
|
|
|
+ "print(f\"- Total transcript words: {total_words:,}\")\n",
|
|
|
|
|
+ "print(f\"- Average transcript length: {processed_df['ai_transcript'].str.len().mean():.0f} characters\")\n",
|
|
|
|
|
+ "print(f\"- Estimated reading time: {reading_time:.1f} minutes\")\n",
|
|
|
|
|
+ "print(f\"- Average context slides per slide: {processed_df['context_slides_used'].mean():.1f}\")"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "markdown",
|
|
|
|
|
+ "id": "8728d2ac",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "source": [
|
|
|
|
|
+ "---\n",
|
|
|
|
|
+ "# Completion Summary\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Successfully Generated:\n",
|
|
|
|
|
+ "- **Narrative-Aware Transcripts**: Context-aware voiceover content with smooth transitions\n",
|
|
|
|
|
+ "- **Consistent Terminology**: Maintained terminology consistency throughout presentation\n",
|
|
|
|
|
+ "- **Multiple Formats**: CSV, JSON exports for different use cases\n",
|
|
|
|
|
+ "- **Context Analysis**: Detailed information about narrative flow and relationships\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Output Files:\n",
|
|
|
|
|
+ "- `narrative_transcripts.csv` - Complete dataset with context information\n",
|
|
|
|
|
+ "- `narrative_transcripts_clean.csv` - Clean transcripts for voiceover work\n",
|
|
|
|
|
+ "- `narrative_transcripts.json` - JSON format for API integration\n",
|
|
|
|
|
+ "- `narrative_context/slide_contexts.json` - Individual slide context data\n",
|
|
|
|
|
+ "- `narrative_context/narrative_summary.json` - Overall narrative analysis\n",
|
|
|
|
|
+ "- Individual slide images in PNG/JPEG format\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Next Steps:\n",
|
|
|
|
|
+ "1. **Review** generated transcripts for narrative flow and accuracy\n",
|
|
|
|
|
+ "2. **Edit** any content that needs refinement\n",
|
|
|
|
|
+ "3. **Create** voiceover recordings or use TTS systems\n",
|
|
|
|
|
+ "4. **Integrate** JSON data into your video production workflow\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "## Tips for Better Results:\n",
|
|
|
|
|
+ "- **Rich Speaker Notes**: Slides with detailed notes generate better contextual transcripts\n",
|
|
|
|
|
+ "- **Clear Visuals**: High-contrast slides with readable text work best\n",
|
|
|
|
|
+ "- **Consistent Style**: Maintain consistent formatting across your presentation\n",
|
|
|
|
|
+ "- **Context Window**: Adjust context window size (3-7 slides) based on presentation complexity\n",
|
|
|
|
|
+ "- **Review Context**: Check the narrative_context files to understand how continuity was maintained\n",
|
|
|
|
|
+ "\n",
|
|
|
|
|
+ "---"
|
|
|
|
|
+ ]
|
|
|
|
|
+ },
|
|
|
|
|
+ {
|
|
|
|
|
+ "cell_type": "code",
|
|
|
|
|
+ "execution_count": null,
|
|
|
|
|
+ "id": "7122cdf6-667e-4ae4-8ce7-67cfc32577c8",
|
|
|
|
|
+ "metadata": {},
|
|
|
|
|
+ "outputs": [],
|
|
|
|
|
+ "source": []
|
|
|
|
|
+ }
|
|
|
|
|
+ ],
|
|
|
|
|
+ "metadata": {
|
|
|
|
|
+ "kernelspec": {
|
|
|
|
|
+ "display_name": "promptTesting",
|
|
|
|
|
+ "language": "python",
|
|
|
|
|
+ "name": "prompttesting"
|
|
|
|
|
+ },
|
|
|
|
|
+ "language_info": {
|
|
|
|
|
+ "codemirror_mode": {
|
|
|
|
|
+ "name": "ipython",
|
|
|
|
|
+ "version": 3
|
|
|
|
|
+ },
|
|
|
|
|
+ "file_extension": ".py",
|
|
|
|
|
+ "mimetype": "text/x-python",
|
|
|
|
|
+ "name": "python",
|
|
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
|
|
+ "version": "3.13.2"
|
|
|
|
|
+ }
|
|
|
|
|
+ },
|
|
|
|
|
+ "nbformat": 4,
|
|
|
|
|
+ "nbformat_minor": 5
|
|
|
|
|
+}
|