config.ts 794 B

12345678910111213141516171819202122232425262728
  1. import { Page } from "playwright";
  2. type Config = {
  3. /** URL to start the crawl */
  4. url: string;
  5. /** Pattern to match against for links on a page to subsequently crawl */
  6. match: string;
  7. /** Selector to grab the inner text from */
  8. selector: string;
  9. /** Don't crawl more than this many pages */
  10. maxPagesToCrawl: number;
  11. /** File name for the finished data */
  12. outputFileName: string;
  13. /** Optional function to run for each page found */
  14. onVisitPage?: (options: {
  15. page: Page;
  16. pushData: (data: any) => Promise<void>;
  17. }) => Promise<void>;
  18. };
  19. export const config: Config = {
  20. url: "https://www.builder.io/c/docs/developers",
  21. match: "https://www.builder.io/c/docs/**",
  22. selector: `.docs-builder-container`,
  23. maxPagesToCrawl: 50,
  24. outputFileName: "output.json",
  25. };