config.ts 995 B

12345678910111213141516171819202122232425262728293031
  1. import { Page } from "playwright";
  2. type Config = {
  3. /** URL to start the crawl */
  4. url: string;
  5. /** Pattern to match against for links on a page to subsequently crawl */
  6. match: string;
  7. /** Selector to grab the inner text from */
  8. selector: string;
  9. /** Don't crawl more than this many pages */
  10. maxPagesToCrawl: number;
  11. /** File name for the finished data */
  12. outputFileName: string;
  13. /** Optional cookie to be set. E.g. for Cookie Consent */
  14. cookie?: { name: string; value: string };
  15. /** Optional function to run for each page found */
  16. onVisitPage?: (options: {
  17. page: Page;
  18. pushData: (data: any) => Promise<void>;
  19. }) => Promise<void>;
  20. /** Optional timeout for waiting for a selector to appear */
  21. waitForSelectorTimeout?: number;
  22. };
  23. export const config: Config = {
  24. url: "https://www.builder.io/c/docs/developers",
  25. match: "https://www.builder.io/c/docs/**",
  26. selector: `.docs-builder-container`,
  27. maxPagesToCrawl: 50,
  28. outputFileName: "output.json",
  29. };