| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116 |
- """Transcript generation processor for PPTX slides."""
- from pathlib import Path
- from typing import Optional, Union
- import pandas as pd
- from tqdm import tqdm
- from ..config.settings import get_processing_config
- from ..core.llama_client import LlamaClient
- class TranscriptProcessor:
- """Processor for generating transcripts from slide images and notes."""
- def __init__(self, api_key: Optional[str] = None):
- """
- Initialize transcript processor.
- Args:
- api_key: Llama API key. If None, will be loaded from config/environment.
- """
- self.client = LlamaClient(api_key=api_key)
- self.processing_config = get_processing_config()
- def process_single_slide(
- self,
- image_path: Union[str, Path],
- speaker_notes: str = "",
- system_prompt: Optional[str] = None,
- ) -> str:
- """
- Process a single slide to generate transcript.
- Args:
- image_path: Path to the slide image
- speaker_notes: Speaker notes for the slide
- system_prompt: Custom system prompt. If None, uses default from config.
- Returns:
- Generated transcript text
- """
- return self.client.generate_transcript(
- image_path=str(image_path),
- speaker_notes=speaker_notes,
- system_prompt=system_prompt,
- stream=False,
- )
- def process_slides_dataframe(
- self,
- df: pd.DataFrame,
- output_dir: Union[str, Path],
- system_prompt: Optional[str] = None,
- ) -> pd.DataFrame:
- """
- Process slides from a DataFrame containing slide information.
- Args:
- df: DataFrame with slide information (from extract_pptx_notes)
- output_dir: Directory containing slide images
- system_prompt: Custom system prompt. If None, uses default from config.
- Returns:
- DataFrame with added 'ai_transcript' column
- """
- output_dir = Path(output_dir)
- df_copy = df.copy()
- for i in tqdm(range(len(df_copy)), desc="Processing slides"):
- # Get data for current slide
- slide_filename = df_copy.iloc[i]["image_filename"]
- speaker_notes = (
- df_copy.iloc[i]["speaker_notes"]
- if pd.notna(df_copy.iloc[i]["speaker_notes"])
- else ""
- )
- image_path = output_dir / slide_filename
- # Generate transcript
- transcript = self.process_single_slide(
- image_path=image_path,
- speaker_notes=speaker_notes,
- system_prompt=system_prompt,
- )
- # Add to dataframe
- df_copy.loc[i, "ai_transcript"] = transcript
- return df_copy
- def process_slides(
- df: pd.DataFrame,
- output_dir: Union[str, Path] = "slide_images",
- api_key: Optional[str] = None,
- system_prompt: Optional[str] = None,
- ) -> pd.DataFrame:
- """
- Legacy function for backward compatibility with notebook code.
- Process slides from a DataFrame to generate transcripts.
- Args:
- df: DataFrame with slide information (from extract_pptx_notes)
- output_dir: Directory containing slide images
- api_key: Llama API key. If None, will be loaded from config/environment.
- system_prompt: Custom system prompt. If None, uses default from config.
- Returns:
- DataFrame with added 'ai_transcript' column
- """
- processor = TranscriptProcessor(api_key=api_key)
- return processor.process_slides_dataframe(df, output_dir, system_prompt)
|