config.ts 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758
  1. import { z } from 'zod';
  2. import type { Page } from "playwright";
  3. const Page: z.ZodType<Page> = z.any();
  4. export const configSchema = z.object({
  5. /**
  6. * URL to start the crawl
  7. * @example "https://www.builder.io/c/docs/developers"
  8. * @default ""
  9. */
  10. url: z.string(),
  11. /**
  12. * Pattern to match against for links on a page to subsequently crawl
  13. * @example "https://www.builder.io/c/docs/**"
  14. * @default ""
  15. */
  16. match: z.string().or(z.array(z.string())),
  17. /**
  18. * Selector to grab the inner text from
  19. * @example ".docs-builder-container"
  20. * @default ""
  21. */
  22. selector: z.string().optional(),
  23. /**
  24. * Don't crawl more than this many pages
  25. * @default 50
  26. */
  27. maxPagesToCrawl: z.number().int().positive(),
  28. /**
  29. * File name for the finished data
  30. * @default "output.json"
  31. */
  32. outputFileName: z.string(),
  33. /** Optional cookie to be set. E.g. for Cookie Consent */
  34. cookie: z.object({
  35. name: z.string(),
  36. value: z.string(),
  37. }).optional(),
  38. /** Optional function to run for each page found */
  39. onVisitPage: z.function()
  40. .args(z.object({
  41. page: Page,
  42. pushData: z.function()
  43. .args(z.any())
  44. .returns(z.promise(z.void()))
  45. }))
  46. .returns(z.promise(z.void()))
  47. .optional(),
  48. /** Optional timeout for waiting for a selector to appear */
  49. waitForSelectorTimeout: z.number().int().nonnegative().optional(),
  50. });
  51. export type Config = z.infer<typeof configSchema>;