In [None]:
import json
import os
import torch
from pathlib import Path
import re
from transformers import pipeline

In [None]:
# Use Llama 3.3 for high-quality rewriting
DEFAULT_MODEL = "meta-llama/Llama-3.3-70B-Instruct" 

# Set up directories
base_dir = Path("llama_data")
reports_dir = base_dir / "final_reports"
rewritten_dir = base_dir / "rewritten_reports"
rewritten_dir.mkdir(exist_ok=True)

In [None]:
ORIGINAL_USER_INPUT = 

In [None]:
SYS_PROMPT =

In [None]:
text_pipeline = pipeline(
 "text-generation",
 model=DEFAULT_MODEL,
 model_kwargs={"torch_dtype": torch.bfloat16},
 device_map="auto",
)

In [None]:
def get_report_files():
 """Get all report files from the final_reports directory"""
 report_files = []
 
 if not reports_dir.exists():
 print(f"Error: Reports directory '{reports_dir}' not found.")
 return []
 
 for file in reports_dir.glob("report_*.txt"):
 report_files.append(file)
 
 # Sort by report number to process in consistent order
 report_files.sort()
 
 print(f"Found {len(report_files)} report files.")
 return report_files

In [None]:
def rewrite_report(report_path, original_user_input):
 """Rewrite a single report using Llama 3.3"""
 
 # Extract report number and title from filename
 report_filename = report_path.name
 report_id = re.search(r'report_(\d+)_', report_filename).group(1) if re.search(r'report_(\d+)_', report_filename) else "unknown"
 
 print(f"Processing report {report_id}: {report_filename}")
 
 # Read the report content
 try:
 with open(report_path, "r", encoding="utf-8") as f:
 report_content = f.read()
 except Exception as e:
 print(f"Error reading report file: {str(e)}")
 return None
 
 # Extract the report title from the content
 title_match = re.search(r'^# (.+?)\n', report_content)
 report_title = title_match.group(1) if title_match else "Unknown Report"
 
 # Create the prompt for rewriting
 user_prompt = f"""
{original_user_input}

Here's the report to rewrite:

{report_content}
"""
 
 # Set up the conversation
 conversation = [
 {"role": "system", "content": SYS_PROMPT},
 {"role": "user", "content": user_prompt}
 ]
 
 # Generate the rewritten report
 output = text_pipeline(
 conversation,
 max_new_tokens=4000,
 temperature=0.7,
 do_sample=True,
 )
 
 # Extract the assistant's response
 assistant_response = output[0]["generated_text"][-1]
 rewritten_content = assistant_response["content"]
 
 # Save the rewritten report
 rewritten_filename = f"rewritten_{report_id}_{report_title.replace(' ', '_')[:30]}.txt"
 rewritten_path = rewritten_dir / rewritten_filename
 
 with open(rewritten_path, "w", encoding="utf-8") as f:
 f.write(rewritten_content)
 
 print(f" Saved rewritten report to: {rewritten_path}")
 return rewritten_path

In [None]:
def rewrite_all_reports():
 """Rewrite all reports in the final_reports directory"""
 
 # Get all report files
 report_files = get_report_files()
 
 if not report_files:
 print("No reports to rewrite.")
 return []
 
 rewritten_paths = []
 
 # Process each report file
 for report_path in report_files:
 # Rewrite the report
 rewritten_path = rewrite_report(report_path, ORIGINAL_USER_INPUT)
 
 if rewritten_path:
 rewritten_paths.append(rewritten_path)
 
 # IMPORTANT: Force garbage collection to ensure no history is kept between reports
 # This simulates removing the old report from input history
 import gc
 gc.collect()
 
 print("\n--------\n")
 
 return rewritten_paths

In [None]:
print("Starting report rewriting process...")

# Validate user input
if ORIGINAL_USER_INPUT == "[Your input here - describe what you want Llama to do with these reports]":
 print("Please edit the ORIGINAL_USER_INPUT cell to specify your request before running this cell.")
else:
 # Process all reports
 rewritten_paths = rewrite_all_reports()
 
 print("\nReport rewriting complete!")
 print(f"Rewritten {len(rewritten_paths)} reports.")
 
 # List all rewritten reports
 if rewritten_paths:
 print("\nRewritten reports:")
 for path in rewritten_paths:
 print(f"- {path}")