api.py 3.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100
  1. """
  2. This script parses the README.md and generates a machine-readable
  3. data structure from it, for publication as an API.
  4. https://springload.github.io/awesome-wagtail/api/v1/readme.json
  5. It is automatically ran as part of Travis builds, also validating the README
  6. formatting, and the API endpoint is deployed on successful builds on master.
  7. See also:
  8. - https://djangopackages.org/api/v3/grids/wagtail-cms/
  9. - https://github.com/awesomerank/rank
  10. """
  11. import json
  12. from datetime import datetime, timezone
  13. from pathlib import Path
  14. API_PATH = '/api/v1/readme.json'
  15. def parse_line(line: str, category: str) -> dict[str, str]:
  16. """Parse a single line from the README into a structured dictionary."""
  17. print(line)
  18. name = line.split('](')[0][3:]
  19. url = line.split('](')[1].split(')')[0]
  20. description = '' if line[-1] == ')' else line.split(') ')[1][2:]
  21. return {
  22. 'name': name,
  23. 'description': description,
  24. 'url': url,
  25. 'category': category,
  26. }
  27. def parse_section(section: str, category: str = '') -> list[dict[str, str]]:
  28. """Parse a section of lines into a list of structured items."""
  29. return [parse_line(line, category) for line in section.split('\n')]
  30. def parse_subsections(section: str) -> list[dict[str, str]]:
  31. """Parse a section containing multiple subsections."""
  32. subsections = section.split('### ')[1:]
  33. items = []
  34. for subsection in subsections:
  35. split_section = subsection.split('\n\n')
  36. section_title = split_section[0]
  37. items += parse_section(split_section[1], section_title)
  38. return items
  39. def cut_section(readme: str, start: str) -> str:
  40. """Extract a specific section from the README."""
  41. return readme.split(f'## {start}\n\n')[1].split('\n\n## ')[0]
  42. def parse_readme(readme: str) -> dict:
  43. """Parse the entire README into a structured dictionary."""
  44. return {
  45. 'apps': parse_subsections(cut_section(readme, 'Apps')),
  46. 'tools': parse_subsections(cut_section(readme, 'Tools')),
  47. 'resources': parse_subsections(cut_section(readme, 'Resources')),
  48. 'sites': parse_section(cut_section(readme, 'Open-source sites')),
  49. 'metadata': {
  50. 'updated': datetime.now(timezone.utc).isoformat(),
  51. },
  52. }
  53. if __name__ == '__main__':
  54. readme_path = Path('README.md')
  55. try:
  56. readme = readme_path.read_text(encoding='utf-8')
  57. parsed_readme = parse_readme(readme)
  58. json_path = Path(f'./dist{API_PATH}')
  59. json_path.parent.mkdir(parents=True, exist_ok=True)
  60. readme_payload = json.dumps(parsed_readme, indent=2, ensure_ascii=False)
  61. print(readme_payload)
  62. json_path.write_text(readme_payload, encoding='utf-8')
  63. except FileNotFoundError as e:
  64. print(f'Error: Could not find file - {e}')
  65. raise
  66. except (KeyError, IndexError) as e:
  67. print(f'Error: README formatting issue - {e}')
  68. print('Is the README well formatted?')
  69. raise
  70. except Exception as e:
  71. print(f'Unexpected error: {e}')
  72. raise