config.ts 787 B

123456789101112131415161718192021222324
  1. import { Page } from "playwright";
  2. export type Config = {
  3. /** URL to start the crawl */
  4. url: string;
  5. /** Pattern to match against for links on a page to subsequently crawl */
  6. match: string | string[];
  7. /** Selector to grab the inner text from */
  8. selector: string;
  9. /** Don't crawl more than this many pages */
  10. maxPagesToCrawl: number;
  11. /** File name for the finished data */
  12. outputFileName: string;
  13. /** Optional cookie to be set. E.g. for Cookie Consent */
  14. cookie?: { name: string; value: string };
  15. /** Optional function to run for each page found */
  16. onVisitPage?: (options: {
  17. page: Page;
  18. pushData: (data: any) => Promise<void>;
  19. }) => Promise<void>;
  20. /** Optional timeout for waiting for a selector to appear */
  21. waitForSelectorTimeout?: number;
  22. };