|
@@ -0,0 +1,337 @@
|
|
|
+#!/usr/bin/env -S uv --quiet run --script
|
|
|
+# /// script
|
|
|
+# requires-python = ">=3.12"
|
|
|
+# dependencies = [
|
|
|
+# "bs4",
|
|
|
+# "httpx",
|
|
|
+# "pydantic",
|
|
|
+# "python-dateutil",
|
|
|
+# "python-frontmatter",
|
|
|
+# "python-slugify",
|
|
|
+# "pytz",
|
|
|
+# "rich",
|
|
|
+# "typer",
|
|
|
+# "markdown-it-py",
|
|
|
+# ]
|
|
|
+# ///
|
|
|
+import os
|
|
|
+import re
|
|
|
+from pathlib import Path
|
|
|
+from typing import Any
|
|
|
+from urllib.parse import urlparse
|
|
|
+
|
|
|
+import frontmatter
|
|
|
+import httpx
|
|
|
+import typer
|
|
|
+from bs4 import BeautifulSoup
|
|
|
+from bs4 import Tag
|
|
|
+from markdown_it import MarkdownIt
|
|
|
+from pydantic import BaseModel
|
|
|
+from pydantic import ConfigDict
|
|
|
+from pydantic import Field
|
|
|
+from rich import print
|
|
|
+from rich.progress import track
|
|
|
+from slugify import slugify
|
|
|
+
|
|
|
+
|
|
|
+app = typer.Typer(
|
|
|
+ add_help_option=False,
|
|
|
+ no_args_is_help=True,
|
|
|
+ rich_markup_mode="rich",
|
|
|
+)
|
|
|
+
|
|
|
+
|
|
|
+class Project(BaseModel):
|
|
|
+ """Model representing a Django project from the awesome list."""
|
|
|
+
|
|
|
+ model_config = ConfigDict(extra="allow")
|
|
|
+
|
|
|
+ name: str
|
|
|
+ description: str
|
|
|
+ url: str
|
|
|
+ category: str
|
|
|
+ slug: str = Field(default="")
|
|
|
+ tags: list[str] = Field(default_factory=list)
|
|
|
+ github_stars: int | None = None
|
|
|
+ github_forks: int | None = None
|
|
|
+ github_last_update: str | None = None
|
|
|
+ previous_urls: list[str] = Field(default_factory=list)
|
|
|
+
|
|
|
+ def __init__(self, **data):
|
|
|
+ super().__init__(**data)
|
|
|
+ if not self.slug:
|
|
|
+ self.slug = slugify(self.name)
|
|
|
+
|
|
|
+
|
|
|
+def parse_project_line(line: Tag, category: str) -> Project | None:
|
|
|
+ """Parse a project line from the markdown and return a Project object."""
|
|
|
+ try:
|
|
|
+ # Find the project link
|
|
|
+ link = line.find("a")
|
|
|
+ if not link:
|
|
|
+ return None
|
|
|
+
|
|
|
+ name = link.text.strip()
|
|
|
+ url = link.get("href", "").strip()
|
|
|
+
|
|
|
+ # Get description (text after the link)
|
|
|
+ description = line.text.replace(name, "").strip()
|
|
|
+ description = re.sub(r"^\s*-\s*", "", description) # Remove leading dash
|
|
|
+ description = re.sub(r"^\s*", "", description) # Remove leading whitespace
|
|
|
+
|
|
|
+ if not all([name, url, description]):
|
|
|
+ return None
|
|
|
+
|
|
|
+ return Project(name=name, description=description, url=url, category=category)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[red]Error parsing project line: {e}[/red]")
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def read_readme(file_path: Path) -> str:
|
|
|
+ """Read README content from local file and convert to HTML."""
|
|
|
+ markdown_content = file_path.read_text()
|
|
|
+ md = MarkdownIt()
|
|
|
+ html_content = md.render(markdown_content)
|
|
|
+ return html_content
|
|
|
+
|
|
|
+
|
|
|
+def parse_readme(content: str) -> list[Project]:
|
|
|
+ """Parse README content and extract projects."""
|
|
|
+ soup = BeautifulSoup(content, "html.parser")
|
|
|
+ projects = []
|
|
|
+ current_category = ""
|
|
|
+
|
|
|
+ for element in soup.find_all(["h2", "h3", "li"]):
|
|
|
+ if element.name in ["h2", "h3"]:
|
|
|
+ current_category = element.text.strip()
|
|
|
+ elif element.name == "li" and current_category:
|
|
|
+ if current_category == "Contents":
|
|
|
+ continue
|
|
|
+
|
|
|
+ project = parse_project_line(element, current_category)
|
|
|
+ if project:
|
|
|
+ projects.append(project)
|
|
|
+
|
|
|
+ return projects
|
|
|
+
|
|
|
+
|
|
|
+def merge_project_data(existing: dict[str, Any], new: dict[str, Any]) -> dict[str, Any]:
|
|
|
+ """
|
|
|
+ Merge existing project data with new data, preserving existing values
|
|
|
+ while updating with new information where appropriate.
|
|
|
+ """
|
|
|
+ # Start with the existing data
|
|
|
+ merged = existing.copy()
|
|
|
+
|
|
|
+ # Always update core fields from the README
|
|
|
+ core_fields = {"name", "url", "category"}
|
|
|
+ for field in core_fields:
|
|
|
+ if field in new:
|
|
|
+ # If URL is changing, store the old URL in previous_urls
|
|
|
+ if field == "url" and new["url"] != existing.get("url"):
|
|
|
+ previous_urls = merged.get("previous_urls", [])
|
|
|
+ old_url = existing.get("url")
|
|
|
+ if old_url and old_url not in previous_urls:
|
|
|
+ previous_urls.append(old_url)
|
|
|
+ merged["previous_urls"] = previous_urls
|
|
|
+ merged[field] = new[field]
|
|
|
+
|
|
|
+ # Smart merge for description - update only if meaningfully different
|
|
|
+ if "description" in new and new["description"] != existing.get("description", ""):
|
|
|
+ merged["description"] = new["description"]
|
|
|
+
|
|
|
+ # Update GitHub metrics if they exist in new data
|
|
|
+ github_fields = {"github_stars", "github_forks", "github_last_update"}
|
|
|
+ for field in github_fields:
|
|
|
+ if field in new and new[field] is not None:
|
|
|
+ merged[field] = new[field]
|
|
|
+
|
|
|
+ return merged
|
|
|
+
|
|
|
+
|
|
|
+def save_project(project: Project, output_dir: Path):
|
|
|
+ """Save project as a markdown file with frontmatter, preserving and merging existing content."""
|
|
|
+ output_file = output_dir / f"{project.slug}.md"
|
|
|
+ project_data = project.model_dump(exclude_none=True)
|
|
|
+
|
|
|
+ if output_file.exists():
|
|
|
+ try:
|
|
|
+ # Load existing file
|
|
|
+ existing_post = frontmatter.load(output_file)
|
|
|
+ existing_data = dict(existing_post.metadata)
|
|
|
+
|
|
|
+ # Merge data, favoring preservation of existing content
|
|
|
+ merged_data = merge_project_data(existing_data, project_data)
|
|
|
+
|
|
|
+ # Create new post with merged data but keep existing content
|
|
|
+ post = frontmatter.Post(existing_post.content, **merged_data)
|
|
|
+ except Exception as e:
|
|
|
+ print(
|
|
|
+ f"[yellow]Warning: Could not load existing file {output_file}, creating new: {e}[/yellow]"
|
|
|
+ )
|
|
|
+ post = frontmatter.Post(project.description, **project_data)
|
|
|
+ else:
|
|
|
+ # Create new file
|
|
|
+ post = frontmatter.Post(project.description, **project_data)
|
|
|
+
|
|
|
+ output_file.write_text(frontmatter.dumps(post))
|
|
|
+
|
|
|
+
|
|
|
+def extract_github_info(url: str) -> dict[str, str] | None:
|
|
|
+ """Extract owner and repo from a GitHub URL."""
|
|
|
+ parsed = urlparse(url)
|
|
|
+ if parsed.netloc != "github.com":
|
|
|
+ return None
|
|
|
+
|
|
|
+ parts = parsed.path.strip("/").split("/")
|
|
|
+ if len(parts) >= 2:
|
|
|
+ return {"owner": parts[0], "repo": parts[1]}
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+def get_github_metrics(
|
|
|
+ owner: str, repo: str, client: httpx.Client
|
|
|
+) -> tuple[dict, str | None]:
|
|
|
+ """
|
|
|
+ Fetch GitHub metrics for a repository.
|
|
|
+ Returns a tuple of (metrics_dict, new_url) where new_url is set if the repo has moved.
|
|
|
+ """
|
|
|
+ headers = {}
|
|
|
+ if github_token := os.environ.get("GITHUB_TOKEN"):
|
|
|
+ headers["Authorization"] = f"token {github_token}"
|
|
|
+
|
|
|
+ api_url = f"https://api.github.com/repos/{owner}/{repo}"
|
|
|
+ try:
|
|
|
+ response = client.get(
|
|
|
+ api_url,
|
|
|
+ headers=headers,
|
|
|
+ timeout=10.0,
|
|
|
+ follow_redirects=True, # Enable following redirects
|
|
|
+ )
|
|
|
+
|
|
|
+ # Check if we followed a redirect
|
|
|
+ new_url = None
|
|
|
+ if len(response.history) > 0:
|
|
|
+ for r in response.history:
|
|
|
+ if r.status_code == 301:
|
|
|
+ # Get the new location from the API response
|
|
|
+ data = response.json()
|
|
|
+ new_url = data.get("html_url")
|
|
|
+ if new_url:
|
|
|
+ print(
|
|
|
+ f"[yellow]Repository moved: {owner}/{repo} -> {new_url}[/yellow]"
|
|
|
+ )
|
|
|
+ break
|
|
|
+
|
|
|
+ response.raise_for_status()
|
|
|
+ data = response.json()
|
|
|
+
|
|
|
+ return {
|
|
|
+ "github_stars": data["stargazers_count"],
|
|
|
+ "github_forks": data["forks_count"],
|
|
|
+ "github_last_update": data["updated_at"],
|
|
|
+ }, new_url
|
|
|
+
|
|
|
+ except httpx.HTTPError as e:
|
|
|
+ print(f"[red]Error fetching GitHub metrics for {owner}/{repo}: {str(e)}[/red]")
|
|
|
+ return {}, None
|
|
|
+
|
|
|
+
|
|
|
+def load_project(file_path: Path) -> Project | None:
|
|
|
+ """Load a project from a markdown file."""
|
|
|
+ try:
|
|
|
+ post = frontmatter.load(file_path)
|
|
|
+ return Project(**post.metadata)
|
|
|
+ except Exception as e:
|
|
|
+ print(f"[red]Error loading project from {file_path}: {str(e)}[/red]")
|
|
|
+ return None
|
|
|
+
|
|
|
+
|
|
|
+@app.command()
|
|
|
+def parse(readme_path: Path = Path("README.md"), output_dir: str = "_projects"):
|
|
|
+ """
|
|
|
+ Parse local Awesome Django README and create individual project files with frontmatter.
|
|
|
+ Preserves existing file content and metadata while updating with new information from README.
|
|
|
+ """
|
|
|
+ if not readme_path.exists():
|
|
|
+ print(f"[red]Error: README file not found at {readme_path}[/red]")
|
|
|
+ raise typer.Exit(1)
|
|
|
+
|
|
|
+ print(f"[bold blue]Reading README from {readme_path}...[/bold blue]")
|
|
|
+
|
|
|
+ # Create output directory
|
|
|
+ output_path = Path(output_dir)
|
|
|
+ output_path.mkdir(exist_ok=True)
|
|
|
+
|
|
|
+ # Read and parse README
|
|
|
+ content = read_readme(readme_path)
|
|
|
+ projects = parse_readme(content)
|
|
|
+
|
|
|
+ print(f"[green]Found {len(projects)} projects[/green]")
|
|
|
+
|
|
|
+ # Save individual project files
|
|
|
+ for project in projects:
|
|
|
+ save_project(project, output_path)
|
|
|
+ print(f"[green]Updated {project.name} in {project.slug}.md[/green]")
|
|
|
+
|
|
|
+
|
|
|
+@app.command()
|
|
|
+def update_metrics(projects_dir: Path = Path("_projects"), batch_size: int = 50):
|
|
|
+ """
|
|
|
+ Update GitHub metrics (stars, forks, last update) for all projects.
|
|
|
+ """
|
|
|
+ if not projects_dir.exists():
|
|
|
+ print(f"[red]Error: Projects directory not found at {projects_dir}[/red]")
|
|
|
+ raise typer.Exit(1)
|
|
|
+
|
|
|
+ print(
|
|
|
+ f"[bold blue]Updating GitHub metrics for projects in {projects_dir}...[/bold blue]"
|
|
|
+ )
|
|
|
+
|
|
|
+ # Load all projects
|
|
|
+ project_files = list(projects_dir.glob("*.md"))
|
|
|
+ projects = []
|
|
|
+ for file in project_files:
|
|
|
+ if project := load_project(file):
|
|
|
+ projects.append((file, project))
|
|
|
+
|
|
|
+ print(f"[green]Found {len(projects)} projects to update[/green]")
|
|
|
+
|
|
|
+ # Update metrics in batches to avoid rate limiting
|
|
|
+ with httpx.Client() as client:
|
|
|
+ for i in track(
|
|
|
+ range(0, len(projects), batch_size), description="Updating projects"
|
|
|
+ ):
|
|
|
+ batch = projects[i : i + batch_size]
|
|
|
+ for file_path, project in batch:
|
|
|
+ if github_info := extract_github_info(project.url):
|
|
|
+ metrics, new_url = get_github_metrics(
|
|
|
+ github_info["owner"], github_info["repo"], client
|
|
|
+ )
|
|
|
+
|
|
|
+ if metrics:
|
|
|
+ # Update project with new metrics
|
|
|
+ for key, value in metrics.items():
|
|
|
+ setattr(project, key, value)
|
|
|
+
|
|
|
+ # Update URL if repository has moved
|
|
|
+ if new_url and new_url != project.url:
|
|
|
+ # Store the old URL in previous_urls
|
|
|
+ if not hasattr(project, "previous_urls"):
|
|
|
+ project.previous_urls = []
|
|
|
+ project.previous_urls.append(project.url)
|
|
|
+ # Update to new URL
|
|
|
+ project.url = new_url
|
|
|
+ print(
|
|
|
+ f"[yellow]Updated URL for {project.name}: {project.url}[/yellow]"
|
|
|
+ )
|
|
|
+
|
|
|
+ save_project(project, projects_dir)
|
|
|
+ print(f"[green]Updated metrics for {project.name}[/green]")
|
|
|
+
|
|
|
+ print("[bold blue]Finished updating GitHub metrics![/bold blue]")
|
|
|
+
|
|
|
+
|
|
|
+if __name__ == "__main__":
|
|
|
+ app()
|