# Ensure the required libraries are installed i.e.
!pip install sentence-transformers qdrant-client requests IPython


# Step 1: Import necessary modules

In [2]:
import os
import uuid
import re
from pathlib import Path
from sentence_transformers import SentenceTransformer, CrossEncoder
from qdrant_client import QdrantClient, models
from qdrant_client.models import SearchRequest
import requests
from IPython.display import Markdown, display
import json

print("Libraries installed and modules imported successfully.")

Libraries installed and modules imported successfully.


# Step 2: Define Configuration and Global Variables
This contains all your static configuration, including API keys, URLs, and file paths.

In [None]:
# --- Configuration ---
# API Keys should be loaded from environment variables for security.
# DO NOT commit your .env file or hardcode API keys directly in the code for production.


LLAMA_API_KEY = os.getenv("LLAMA_API_KEY")
if not LLAMA_API_KEY:
 raise ValueError("LLAMA_API_KEY not found. Please set it as an environment variable or in a .env file.")

API_URL = "https://api.llama.com/v1/chat/completions"
HEADERS = {
 "Content-Type": "application/json",
 "Authorization": f"Bearer {LLAMA_API_KEY}"
}
LLAMA_MODEL = "Llama-4-Maverick-17B-128E-Instruct-FP8"

# Qdrant Configuration (Now using In-Memory Qdrant for offline use)
# No QDRANT_URL or QDRANT_API_KEY needed for in-memory client.

# The Qdrant collection to be queried. This will be created in-memory.
MAIN_COLLECTION_NAME = "readme_blogs_latest"

print("Configuration variables and collection name set.")

Configuration variables and collection name set.


# Step 3: Define Helper Functions
It contains all the functions that handle the core logic of the application: markdown_splitter, setup_qdrant, and query_qdrant.

In [None]:
def get_qdrant_client():
 """Returns an in-memory Qdrant client instance."""
 # For an in-memory client, you don't pass URL or API Key.
 return QdrantClient(":memory:")

def get_embedding_model():
 """Returns the SentenceTransformer embedding model."""
 return SentenceTransformer('all-MiniLM-L6-v2')

def create_qdrant_collection(client, collection_name, vector_size):
 """Creates a Qdrant collection with the specified vector size if it doesn't exist."""
 try:
 # Check if collection exists
 client.get_collection(collection_name=collection_name)
 print(f"Collection '{collection_name}' already exists.")
 except Exception: # QdrantClient throws if collection doesn't exist
 print(f"Creating collection '{collection_name}'...")
 client.recreate_collection(
 collection_name=collection_name,
 vectors_config=models.VectorParams(size=vector_size, distance=models.Distance.COSINE),
 )
 print(f"Collection '{collection_name}' created.")

def ingest_data_into_qdrant(client, collection_name, embedding_model, data_chunks):
 """
 Ingests data (text chunks) into the Qdrant collection.
 You will need to replace this with your actual data loading and chunking logic.
 """
 print(f"Ingesting data into collection '{collection_name}'...")
 if not data_chunks:
 print("No data chunks provided for ingestion.")
 return

 points = []
 for i, chunk_text in enumerate(data_chunks):
 embedding = embedding_model.encode(chunk_text).tolist()
 points.append(
 models.PointStruct(
 id=i, # Unique ID for each point
 vector=embedding,
 payload={"text": chunk_text}
 )
 )
 
 # Ensure the collection has been created with the correct vector size
 # before attempting to upsert.
 # The vector size must match the embedding model output.
 embedding_size = len(embedding_model.encode("test").tolist())
 create_qdrant_collection(client, collection_name, embedding_size)

 operation_info = client.upsert(
 collection_name=collection_name,
 wait=True,
 points=points,
 )
 print(f"Data ingestion complete. Status: {operation_info.status}")


def query_qdrant(query, client, collection_name, top_k=5):
 """Query Qdrant with hybrid search and reranking on a specified collection."""
 embedding_model = get_embedding_model()
 query_embedding = embedding_model.encode(query).tolist()
 
 # Initial vector search
 try:
 results = client.search(
 collection_name=collection_name,
 query_vector=query_embedding,
 limit=top_k*2
 )
 except Exception as e:
 print(f"Error during Qdrant search on collection '{collection_name}': {e}")
 return []
 
 if not results:
 print("No results found in Qdrant for the given query.")
 return []

 # Rerank using cross-encoder
 cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L6-v2')
 pairs = [(query, hit.payload["text"]) for hit in results]
 scores = cross_encoder.predict(pairs)
 
 # Combine scores with results
 sorted_results = [x for _, x in sorted(zip(scores, results), key=lambda pair: pair[0], reverse=True)]
 return sorted_results[:top_k]

print("Helper functions for querying Qdrant defined.")

Helper functions for querying Qdrant defined.


# Step 4: Define the Main Blog Generation Function
This function orchestrates the RAG process by calling the helper functions, building the prompt, and making the API call.

In [None]:
def generate_blog(topic):
 """Generates a technical blog post based on a topic using RAG."""
 print("Getting Qdrant client and querying pre-existing collection...")
 client = get_qdrant_client()
 embedding_model = get_embedding_model()

 # IMPORTANT: For in-memory Qdrant, you MUST ingest your data every time
 # the script runs or the client is initialized, as it's not persistent.
 # Replace this with your actual data loading and chunking.
 # Example placeholder data:
 example_data_chunks = [
 "Llama 3 is a powerful large language model developed by Meta. It excels at various NLP tasks.",
 "To build a chatbot with Llama 3, you'll typically use an API to send prompts and receive responses.",
 "Messenger Platform allows developers to create interactive experiences for Facebook Messenger users.",
 "Integrating Llama 3 with Messenger involves setting up webhooks and handling message events.",
 "Key steps include setting up a Facebook App, configuring webhooks, and deploying your bot's backend.",
 "Best practices for chatbots include clear error handling, concise responses, and user guidance.",
 "Security is crucial; always protect your API keys and ensure your webhook endpoints are secure."
 ]
 ingest_data_into_qdrant(client, MAIN_COLLECTION_NAME, embedding_model, example_data_chunks)
 # End of IMPORTANT section for data ingestion


 # Query relevant sections from the main collection
 relevant_chunks = query_qdrant(topic, client, MAIN_COLLECTION_NAME)
 
 if not relevant_chunks:
 error_message = "No relevant content found in the knowledge base. Cannot generate blog post."
 print(error_message)
 return error_message

 context = "\n".join([chunk.payload["text"] for chunk in relevant_chunks])
 
 system_prompt = f"""
 You are a technical writer specializing in creating comprehensive documentation-based blog posts. 
 Use the following context from technical documentation to write an in-depth blog post about {topic}.
 
 Requirements:
 1. Structure the blog with clear sections and subsections
 2. Include code examples and configuration details where relevant
 3. Explain architectural components using diagrams (describe in markdown)
 4. Add setup instructions and best practices
 5. Use technical terminology appropriate for developers
 
 Context:
 {context}
 """
 
 payload = {
 "model": LLAMA_MODEL,
 "messages": [
 {"role": "system", "content": system_prompt},
 {"role": "user", "content": f"Write a detailed technical blog post about {topic}"}
 ],
 "temperature": 0.5,
 "max_tokens": 4096
 }
 
 print("Sending request to Llama API for blog generation...")
 try:
 response = requests.post(API_URL, headers=HEADERS, json=payload)
 
 if response.status_code == 200:
 response_json = response.json()
 # Adjusting to handle the potentially nested structure as seen in your original code
 # where 'completion_message' might be missing or 'content' might be missing.
 # Adding .get with default values for safer access.
 blog_content = response_json.get('completion_message', {}).get('content', {}).get('text', '')
 
 if not blog_content:
 print("Warning: 'completion_message.content.text' was empty or not found in API response.")
 print(f"Full API response: {response_json}")
 return "Error: Could not extract blog content from API response."

 # Format as markdown
 markdown_content = f"# {topic}\n\n{blog_content}"
 
 # Save to file
 output_path = Path(f"{topic.replace(' ', '_')}_blog.md")
 with open(output_path, "w", encoding="utf-8") as f:
 f.write(markdown_content)
 
 print(f"Blog post generated and saved to {output_path}.")
 
 # Display markdown content directly in the notebook
 display(Markdown(markdown_content))
 return markdown_content
 
 else:
 error_message = f"Error: {response.status_code} - {response.text}"
 print(error_message)
 return error_message
 
 except Exception as e:
 error_message = f"An unexpected error occurred: {str(e)}"
 print(error_message)
 return error_message

print("Blog generation function defined.")

Blog generation function defined.


# Step 5: Specify the topic for the blog post and execute the Blog Generation Process


In [None]:
# Specify the topic for the blog post
topic = "Building a Messenger Chatbot with Llama 3"

# Generate and display the blog content
blog_content = generate_blog(topic)

if isinstance(blog_content, str) and "Error" in blog_content:
 print(blog_content)

Getting Qdrant client and querying pre-existing collection...


 results = client.search(


