__init__.py 1002 B

123456789101112131415161718192021222324252627282930313233343536373839
  1. """
  2. Structured Parser Package
  3. This package provides tools for extracting structured data from documents,
  4. particularly PDFs, using LLMs. It includes functionality for:
  5. 1. Extracting text, tables, and images from PDFs
  6. 2. Converting extracted data to SQL database entries
  7. 3. Creating vector embeddings for semantic search
  8. """
  9. from .json_to_sql import DatabaseManager, flatten_json_to_sql, VectorIndexManager
  10. from .structured_extraction import (
  11. ArtifactExtractor,
  12. main as extract_artifacts,
  13. RequestBuilder,
  14. )
  15. from .utils import config, ImageUtils, InferenceUtils, JSONUtils, load_config, PDFUtils
  16. __all__ = [
  17. # Main extraction functionality
  18. "ArtifactExtractor",
  19. "RequestBuilder",
  20. "extract_artifacts",
  21. # Database functionality
  22. "DatabaseManager",
  23. "VectorIndexManager",
  24. "flatten_json_to_sql",
  25. "sql_query",
  26. # Utility classes
  27. "ImageUtils",
  28. "JSONUtils",
  29. "PDFUtils",
  30. "InferenceUtils",
  31. # Configuration
  32. "config",
  33. "load_config",
  34. ]