2 yıl önce · 4771ddbef8
--- a/.gitignore
+++ b/.gitignore
@@ -6,3 +6,6 @@ node_modules
 
				 apify_storage
			
 
				 crawlee_storage
			
 
				 storage
			
 
				+
			
 
				+# any output from the crawler
			
 
				+.json
			
--- a/Dockerfile
+++ b/Dockerfile
@@ -48,4 +48,4 @@ COPY --chown=myuser . ./
 
				 
			
 
				 # Run the image. If you know you won't need headful browsers,
			
 
				 # you can remove the XVFB start script for a micro perf gain.
			
 
				-CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent
			
 
				+CMD ./start_xvfb_and_run_cmd.sh && npm run start:prod --silent
			
--- a/README.md
+++ b/README.md
@@ -1,51 +1,63 @@
 
				-# GPT Crawler
			
 
				+<!-- Markdown written with https://marketplace.visualstudio.com/items?itemName=yzhang.markdown-all-in-one -->
			
 
				+
			
 
				+# GPT Crawler <!-- omit from toc -->
			
 
				 
			
 
				 Crawl a site to generate knowledge files to create your own custom GPT from one or multiple URLs
			
 
				 
			
 
				 ![Gif showing the crawl run](https://github.com/BuilderIO/gpt-crawler/assets/844291/feb8763a-152b-4708-9c92-013b5c70d2f2)
			
 
				 
			
 
				+- [Example](#example)
			
 
				+- [Get started](#get-started)
			
 
				+  - [Running locally](#running-locally)
			
 
				+    - [Clone the repository](#clone-the-repository)
			
 
				+    - [Install dependencies](#install-dependencies)
			
 
				+    - [Configure the crawler](#configure-the-crawler)
			
 
				+    - [Run your crawler](#run-your-crawler)
			
 
				+  - [Alternative methods](#alternative-methods)
			
 
				+    - [Running in a container with Docker](#running-in-a-container-with-docker)
			
 
				+    - [Running as a CLI](#running-as-a-cli)
			
 
				+      - [Development](#development)
			
 
				+  - [Upload your data to OpenAI](#upload-your-data-to-openai)
			
 
				+    - [Create a custom GPT](#create-a-custom-gpt)
			
 
				+    - [Create a custom assistant](#create-a-custom-assistant)
			
 
				+- [Contributing](#contributing)
			
 
				 
			
 
				 ## Example
			
 
				 
			
 
				-[Here is a custom GPT](https://chat.openai.com/g/g-kywiqipmR-builder-io-assistant) that I quickly made to help answer questions about how to use and integrate [Builder.io](https://www.builder.io) by simply providing the URL to the Builder docs. 
			
 
				+[Here is a custom GPT](https://chat.openai.com/g/g-kywiqipmR-builder-io-assistant) that I quickly made to help answer questions about how to use and integrate [Builder.io](https://www.builder.io) by simply providing the URL to the Builder docs.
			
 
				 
			
 
				 This project crawled the docs and generated the file that I uploaded as the basis for the custom GPT.
			
 
				 
			
 
				-[Try it out yourself](https://chat.openai.com/g/g-kywiqipmR-builder-io-assistant) by asking questions about how to integrate Builder.io into a site. 
			
 
				+[Try it out yourself](https://chat.openai.com/g/g-kywiqipmR-builder-io-assistant) by asking questions about how to integrate Builder.io into a site.
			
 
				 
			
 
				 > Note that you may need a paid ChatGPT plan to access this feature
			
 
				 
			
 
				 ## Get started
			
 
				 
			
 
				-### Prerequisites
			
 
				+### Running locally
			
 
				 
			
 
				-Be sure you have Node.js >= 16 installed
			
 
				+#### Clone the repository
			
 
				 
			
 
				-### Clone the repo
			
 
				+Be sure you have Node.js >= 16 installed.
			
 
				 
			
 
				 ```sh
			
 
				 git clone https://github.com/builderio/gpt-crawler
			
 
				 ```
			
 
				 
			
 
				-### Install Dependencies
			
 
				+#### Install dependencies
			
 
				 
			
 
				 ```sh
			
 
				 npm i
			
 
				 ```
			
 
				 
			
 
				-If you do not have Playwright installed:
			
 
				-```sh
			
 
				-npx playwright install
			
 
				-```
			
 
				-
			
 
				-### Configure the crawler
			
 
				+#### Configure the crawler
			
 
				 
			
 
				 Open [config.ts](config.ts) and edit the `url` and `selectors` properties to match your needs.
			
 
				 
			
 
				 E.g. to crawl the Builder.io docs to make our custom GPT you can use:
			
 
				 
			
 
				 ```ts
			
 
				-export const config: Config = {
			
 
				+export const defaultConfig: Config = {
			
 
				   url: "https://www.builder.io/c/docs/developers",
			
 
				   match: "https://www.builder.io/c/docs/**",
			
 
				   selector: `.docs-builder-container`,
			
@@ -69,23 +81,41 @@ type Config = {
 
				   /** File name for the finished data */
			
 
				   outputFileName: string;
			
 
				   /** Optional cookie to be set. E.g. for Cookie Consent */
			
 
				-  cookie?: {name: string; value: string}
			
 
				+  cookie?: { name: string; value: string };
			
 
				   /** Optional function to run for each page found */
			
 
				   onVisitPage?: (options: {
			
 
				     page: Page;
			
 
				     pushData: (data: any) => Promise<void>;
			
 
				   }) => Promise<void>;
			
 
				-    /** Optional timeout for waiting for a selector to appear */
			
 
				-    waitForSelectorTimeout?: number;
			
 
				+  /** Optional timeout for waiting for a selector to appear */
			
 
				+  waitForSelectorTimeout?: number;
			
 
				 };
			
 
				 ```
			
 
				 
			
 
				-### Run your crawler
			
 
				+#### Run your crawler
			
 
				 
			
 
				 ```sh
			
 
				 npm start
			
 
				 ```
			
 
				 
			
 
				+### Alternative methods
			
 
				+
			
 
				+#### [Running in a container with Docker](./containerapp/README.md)
			
 
				+
			
 
				+To obtain the `output.json` with a containerized execution. Go into the `containerapp` directory. Modify the `config.ts` same as above, the `output.json`file should be generated in the data folder. Note : the `outputFileName` property in the `config.ts` file in containerapp folder is configured to work with the container.
			
 
				+
			
 
				+#### Running as a CLI
			
 
				+
			
 
				+<!-- TODO: Needs to be actually published -->
			
 
				+
			
 
				+##### Development
			
 
				+
			
 
				+To run the CLI locally while developing it:
			
 
				+  
			
 
				+```sh
			
 
				+npm run start:cli --url https://www.builder.io/c/docs/developers --match https://www.builder.io/c/docs/** --selector .docs-builder-container --maxPagesToCrawl 50 --outputFileName output.json
			
 
				+```
			
 
				+
			
 
				 ### Upload your data to OpenAI
			
 
				 
			
 
				 The crawl will generate a file called `output.json` at the root of this project. Upload that [to OpenAI](https://platform.openai.com/docs/assistants/overview) to create your custom assistant or custom GPT.
			
@@ -105,7 +135,6 @@ Use this option for UI access to your generated knowledge that you can easily sh
 
				 
			
 
				 ![Gif of how to upload a custom GPT](https://github.com/BuilderIO/gpt-crawler/assets/844291/22f27fb5-6ca5-4748-9edd-6bcf00b408cf)
			
 
				 
			
 
				-
			
 
				 #### Create a custom assistant
			
 
				 
			
 
				 Use this option for API access to your generated knowledge that you can integrate into your product.
			
@@ -116,10 +145,6 @@ Use this option for API access to your generated knowledge that you can integrat
 
				 
			
 
				 ![Gif of how to upload to an assistant](https://github.com/BuilderIO/gpt-crawler/assets/844291/06e6ad36-e2ba-4c6e-8d5a-bf329140de49)
			
 
				 
			
 
				-## (Alternate method) Running in a container with Docker
			
 
				-To obtain the `output.json` with a containerized execution. Go into the `containerapp` directory. Modify the `config.ts` same as above, the `output.json`file should be generated in the data folder. Note : the `outputFileName` property in the `config.ts` file in containerapp folder is configured to work with the container. 
			
 
				-
			
 
				-
			
 
				 ## Contributing
			
 
				 
			
 
				 Know how to make this project better? Send a PR!
			
--- a/config.ts
+++ b/config.ts
@@ -1,14 +1,33 @@
 
				 import { Page } from "playwright";
			
 
				-type Config = {
			
 
				-  /** URL to start the crawl */
			
 
				+
			
 
				+export type Config = {
			
 
				+  /**
			
 
				+   * URL to start the crawl
			
 
				+   * @example "https://www.builder.io/c/docs/developers"
			
 
				+   * @default ""
			
 
				+   */
			
 
				   url: string;
			
 
				-  /** Pattern to match against for links on a page to subsequently crawl */
			
 
				+  /**
			
 
				+   * Pattern to match against for links on a page to subsequently crawl
			
 
				+   * @example "https://www.builder.io/c/docs/**"
			
 
				+   * @default ""
			
 
				+   */
			
 
				   match: string | string[];
			
 
				-  /** Selector to grab the inner text from */
			
 
				+  /**
			
 
				+   * Selector to grab the inner text from
			
 
				+   * @example ".docs-builder-container"
			
 
				+   * @default ""
			
 
				+   */
			
 
				   selector: string;
			
 
				-  /** Don't crawl more than this many pages */
			
 
				+  /**
			
 
				+   * Don't crawl more than this many pages
			
 
				+   * @default 50
			
 
				+   */
			
 
				   maxPagesToCrawl: number;
			
 
				-  /** File name for the finished data */
			
 
				+  /**
			
 
				+   * File name for the finished data
			
 
				+   * @default "output.json"
			
 
				+   */
			
 
				   outputFileName: string;
			
 
				   /** Optional cookie to be set. E.g. for Cookie Consent */
			
 
				   cookie?: { name: string; value: string };
			
@@ -21,10 +40,10 @@ type Config = {
 
				   waitForSelectorTimeout?: number;
			
 
				 };
			
 
				 
			
 
				-export const config: Config = {
			
 
				+export const defaultConfig: Config = {
			
 
				   url: "https://www.builder.io/c/docs/developers",
			
 
				   match: "https://www.builder.io/c/docs/**",
			
 
				   selector: `.docs-builder-container`,
			
 
				   maxPagesToCrawl: 50,
			
 
				-  outputFileName: "output.json",
			
 
				+  outputFileName: "../output.json",
			
 
				 };
			
--- a/package-lock.json
+++ b/package-lock.json
--- a/package.json
+++ b/package.json
@@ -2,22 +2,30 @@
 
				   "name": "@builder.io/gpt-crawler",
			
 
				   "version": "0.0.1",
			
 
				   "type": "module",
			
 
				+  "bin": {
			
 
				+    "gpt-crawler": "./dist/src/cli.js"
			
 
				+  },
			
 
				   "description": "Crawl a site to generate knowledge files to create your own custom GPT",
			
 
				   "dependencies": {
			
 
				+    "commander": "^11.1.0",
			
 
				     "crawlee": "^3.0.0",
			
 
				     "glob": "^10.3.10",
			
 
				+    "inquirer": "^9.2.12",
			
 
				     "playwright": "*"
			
 
				   },
			
 
				   "devDependencies": {
			
 
				     "@apify/tsconfig": "^0.1.0",
			
 
				+    "@types/inquirer": "^9.0.7",
			
 
				     "@types/node": "^20.0.0",
			
 
				     "ts-node": "^10.8.0",
			
 
				     "typescript": "^5.0.0"
			
 
				   },
			
 
				   "scripts": {
			
 
				+    "preinstall": "npx playwright install",
			
 
				     "start": "npm run start:dev",
			
 
				+    "start:cli": "NODE_ENV=development npm run build && node dist/src/cli.js",
			
 
				+    "start:dev": "NODE_ENV=development npm run build && node dist/src/main.js",
			
 
				     "start:prod": "node dist/main.js",
			
 
				-    "start:dev": "node --no-warnings=ExperimentalWarning --loader ts-node/esm/transpile-only src/main.ts",
			
 
				     "build": "tsc"
			
 
				   },
			
 
				   "author": "It's not you it's me",
			
--- a/src/cli.ts
+++ b/src/cli.ts
@@ -0,0 +1,100 @@
 
				+#!/usr/bin/env node
			
 
				+
			
 
				+import { program } from "commander";
			
 
				+import { Config } from "../config.js";
			
 
				+import { crawl, write } from "./core.js";
			
 
				+import { createRequire } from "node:module";
			
 
				+import inquirer from "inquirer";
			
 
				+
			
 
				+const require = createRequire(import.meta.url);
			
 
				+const { version, description } = require("../../package.json");
			
 
				+
			
 
				+const messages = {
			
 
				+  url: "What is the first URL of the website you want to crawl?",
			
 
				+  match: "What is the URL pattern you want to match?",
			
 
				+  selector: "What is the CSS selector you want to match?",
			
 
				+  maxPagesToCrawl: "How many pages do you want to crawl?",
			
 
				+  outputFileName: "What is the name of the output file?",
			
 
				+};
			
 
				+
			
 
				+async function handler(options: Config) {
			
 
				+  try {
			
 
				+    const {
			
 
				+      url,
			
 
				+      match,
			
 
				+      selector,
			
 
				+      maxPagesToCrawl: maxPagesToCrawlStr,
			
 
				+      outputFileName,
			
 
				+    } = options;
			
 
				+
			
 
				+    // @ts-ignore
			
 
				+    const maxPagesToCrawl = parseInt(maxPagesToCrawlStr, 10);
			
 
				+
			
 
				+    let config: Config = {
			
 
				+      url,
			
 
				+      match,
			
 
				+      selector,
			
 
				+      maxPagesToCrawl,
			
 
				+      outputFileName,
			
 
				+    };
			
 
				+
			
 
				+    if (!config.url || !config.match || !config.selector) {
			
 
				+      const questions = [];
			
 
				+
			
 
				+      if (!config.url) {
			
 
				+        questions.push({
			
 
				+          type: "input",
			
 
				+          name: "url",
			
 
				+          message: messages.url,
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      if (!config.match) {
			
 
				+        questions.push({
			
 
				+          type: "input",
			
 
				+          name: "match",
			
 
				+          message: messages.match,
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      if (!config.selector) {
			
 
				+        questions.push({
			
 
				+          type: "input",
			
 
				+          name: "selector",
			
 
				+          message: messages.selector,
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      const answers = await inquirer
			
 
				+        .prompt(questions);
			
 
				+
			
 
				+      config = {
			
 
				+        ...config,
			
 
				+        ...answers,
			
 
				+      };
			
 
				+    }
			
 
				+
			
 
				+    await crawl(config);
			
 
				+    await write(config);
			
 
				+  } catch (error) {
			
 
				+    console.log(error);
			
 
				+  }
			
 
				+}
			
 
				+
			
 
				+program
			
 
				+  .version(version)
			
 
				+  .description(description);
			
 
				+
			
 
				+program
			
 
				+  .option("-u, --url <string>", messages.url, "")
			
 
				+  .option("-m, --match <string>", messages.match, "")
			
 
				+  .option("-s, --selector <string>", messages.selector, "")
			
 
				+  .option("-m, --maxPagesToCrawl <number>", messages.maxPagesToCrawl, "50")
			
 
				+  .option(
			
 
				+    "-o, --outputFileName <string>",
			
 
				+    messages.outputFileName,
			
 
				+    "output.json",
			
 
				+  )
			
 
				+  .action(handler);
			
 
				+
			
 
				+program.parse();
			
--- a/src/core.ts
+++ b/src/core.ts
@@ -0,0 +1,100 @@
 
				+// For more information, see https://crawlee.dev/
			
 
				+import { PlaywrightCrawler } from "crawlee";
			
 
				+import { readFile, writeFile } from "fs/promises";
			
 
				+import { glob } from "glob";
			
 
				+import { Config } from "../config.js";
			
 
				+import { Page } from "playwright";
			
 
				+
			
 
				+let pageCounter = 0; 
			
 
				+
			
 
				+export function getPageHtml(page: Page, selector: string) {
			
 
				+  return page.evaluate((selector) => {
			
 
				+    // Check if the selector is an XPath
			
 
				+    if (selector.startsWith('/')) {
			
 
				+      const elements = document.evaluate(selector, document, null, XPathResult.ANY_TYPE, null);
			
 
				+      let result = elements.iterateNext();
			
 
				+      return result ? result.textContent || "" : "";
			
 
				+    } else {
			
 
				+      // Handle as a CSS selector
			
 
				+      const el = document.querySelector(selector) as HTMLElement | null;
			
 
				+      return el?.innerText || "";
			
 
				+    }
			
 
				+  }, selector);
			
 
				+}
			
 
				+
			
 
				+export async function waitForXPath(page: Page, xpath: string, timeout: number) {
			
 
				+  await page.waitForFunction(xpath => {
			
 
				+    const elements = document.evaluate(xpath, document, null, XPathResult.ANY_TYPE, null);
			
 
				+    return elements.iterateNext() !== null;
			
 
				+  }, xpath, { timeout });
			
 
				+}
			
 
				+
			
 
				+export async function crawl(config: Config) {
			
 
				+  if (process.env.NO_CRAWL !== "true") {
			
 
				+    // PlaywrightCrawler crawls the web using a headless
			
 
				+    // browser controlled by the Playwright library.
			
 
				+    const crawler = new PlaywrightCrawler({
			
 
				+      // Use the requestHandler to process each of the crawled pages.
			
 
				+      async requestHandler({ request, page, enqueueLinks, log, pushData }) {
			
 
				+        if (config.cookie) {
			
 
				+          // Set the cookie for the specific URL
			
 
				+          const cookie = {
			
 
				+            name: config.cookie.name,
			
 
				+            value: config.cookie.value,
			
 
				+            url: request.loadedUrl, 
			
 
				+          };
			
 
				+          await page.context().addCookies([cookie]);
			
 
				+        }
			
 
				+  
			
 
				+        const title = await page.title();
			
 
				+        pageCounter++;
			
 
				+        log.info(`Crawling: Page ${pageCounter} / ${config.maxPagesToCrawl} - URL: ${request.loadedUrl}...`);
			
 
				+        
			
 
				+        // Use custom handling for XPath selector
			
 
				+        if (config.selector.startsWith('/')) {
			
 
				+          await waitForXPath(page, config.selector, config.waitForSelectorTimeout ?? 1000);
			
 
				+        } else {
			
 
				+          await page.waitForSelector(config.selector, {
			
 
				+            timeout: config.waitForSelectorTimeout ?? 1000,
			
 
				+          });
			
 
				+        }
			
 
				+  
			
 
				+        const html = await getPageHtml(page, config.selector);
			
 
				+  
			
 
				+        // Save results as JSON to ./storage/datasets/default
			
 
				+        await pushData({ title, url: request.loadedUrl, html });
			
 
				+  
			
 
				+        if (config.onVisitPage) {
			
 
				+          await config.onVisitPage({ page, pushData });
			
 
				+        }
			
 
				+  
			
 
				+        // Extract links from the current page
			
 
				+        // and add them to the crawling queue.
			
 
				+        await enqueueLinks({
			
 
				+          globs: typeof config.match === "string" ? [config.match] : config.match,
			
 
				+        });
			
 
				+      },
			
 
				+      // Comment this option to scrape the full website.
			
 
				+      maxRequestsPerCrawl: config.maxPagesToCrawl,
			
 
				+      // Uncomment this option to see the browser window.
			
 
				+      // headless: false,
			
 
				+    });
			
 
				+  
			
 
				+    // Add first URL to the queue and start the crawl.
			
 
				+    await crawler.run([config.url]);
			
 
				+  }  
			
 
				+}
			
 
				+
			
 
				+export async function write(config: Config) {
			
 
				+  const jsonFiles = await glob("storage/datasets/default/*.json", {
			
 
				+    absolute: true,
			
 
				+  });
			
 
				+  
			
 
				+  const results = [];
			
 
				+  for (const file of jsonFiles) {
			
 
				+    const data = JSON.parse(await readFile(file, "utf-8"));
			
 
				+    results.push(data);
			
 
				+  }
			
 
				+  
			
 
				+  await writeFile(config.outputFileName, JSON.stringify(results, null, 2));
			
 
				+}
			
--- a/src/main.ts
+++ b/src/main.ts
@@ -1,97 +1,5 @@
 
				-// For more information, see https://crawlee.dev/
			
 
				-import { PlaywrightCrawler } from "crawlee";
			
 
				-import { readFile, writeFile } from "fs/promises";
			
 
				-import { glob } from "glob";
			
 
				-import { config } from "../config.js";
			
 
				-import { Page } from "playwright";
			
 
				+import { defaultConfig } from "../config.js";
			
 
				+import { crawl, write } from "./core.js";
			
 
				 
			
 
				-let pageCounter = 0; 
			
 
				-
			
 
				-export function getPageHtml(page: Page, selector: string) {
			
 
				-  return page.evaluate((selector) => {
			
 
				-    // Check if the selector is an XPath
			
 
				-    if (selector.startsWith('/')) {
			
 
				-      const elements = document.evaluate(selector, document, null, XPathResult.ANY_TYPE, null);
			
 
				-      let result = elements.iterateNext();
			
 
				-      return result ? result.textContent || "" : "";
			
 
				-    } else {
			
 
				-      // Handle as a CSS selector
			
 
				-      const el = document.querySelector(selector) as HTMLElement | null;
			
 
				-      return el?.innerText || "";
			
 
				-    }
			
 
				-  }, selector);
			
 
				-}
			
 
				-
			
 
				-export async function waitForXPath(page: Page, xpath: string, timeout: number) {
			
 
				-  await page.waitForFunction(xpath => {
			
 
				-    const elements = document.evaluate(xpath, document, null, XPathResult.ANY_TYPE, null);
			
 
				-    return elements.iterateNext() !== null;
			
 
				-  }, xpath, { timeout });
			
 
				-}
			
 
				-
			
 
				-if (process.env.NO_CRAWL !== "true") {
			
 
				-  // PlaywrightCrawler crawls the web using a headless
			
 
				-  // browser controlled by the Playwright library.
			
 
				-  const crawler = new PlaywrightCrawler({
			
 
				-    // Use the requestHandler to process each of the crawled pages.
			
 
				-    async requestHandler({ request, page, enqueueLinks, log, pushData }) {
			
 
				-
			
 
				-      if (config.cookie) {
			
 
				-        // Set the cookie for the specific URL
			
 
				-        const cookie = {
			
 
				-          name: config.cookie.name,
			
 
				-          value: config.cookie.value,
			
 
				-          url: request.loadedUrl, 
			
 
				-        };
			
 
				-        await page.context().addCookies([cookie]);
			
 
				-      }
			
 
				-
			
 
				-      const title = await page.title();
			
 
				-      pageCounter++;
			
 
				-      log.info(`Crawling: Page ${pageCounter} / ${config.maxPagesToCrawl} - URL: ${request.loadedUrl}...`);
			
 
				-      
			
 
				-      // Use custom handling for XPath selector
			
 
				-      if (config.selector.startsWith('/')) {
			
 
				-        await waitForXPath(page, config.selector, config.waitForSelectorTimeout ?? 1000);
			
 
				-      } else {
			
 
				-        await page.waitForSelector(config.selector, {
			
 
				-          timeout: config.waitForSelectorTimeout ?? 1000,
			
 
				-        });
			
 
				-      }
			
 
				-
			
 
				-      const html = await getPageHtml(page, config.selector);
			
 
				-
			
 
				-      // Save results as JSON to ./storage/datasets/default
			
 
				-      await pushData({ title, url: request.loadedUrl, html });
			
 
				-
			
 
				-      if (config.onVisitPage) {
			
 
				-        await config.onVisitPage({ page, pushData });
			
 
				-      }
			
 
				-
			
 
				-      // Extract links from the current page
			
 
				-      // and add them to the crawling queue.
			
 
				-      await enqueueLinks({
			
 
				-        globs: typeof config.match === "string" ? [config.match] : config.match,
			
 
				-      });
			
 
				-    },
			
 
				-    // Comment this option to scrape the full website.
			
 
				-    maxRequestsPerCrawl: config.maxPagesToCrawl,
			
 
				-    // Uncomment this option to see the browser window.
			
 
				-    // headless: false,
			
 
				-  });
			
 
				-
			
 
				-  // Add first URL to the queue and start the crawl.
			
 
				-  await crawler.run([config.url]);
			
 
				-}
			
 
				-
			
 
				-const jsonFiles = await glob("storage/datasets/default/*.json", {
			
 
				-  absolute: true,
			
 
				-});
			
 
				-
			
 
				-const results = [];
			
 
				-for (const file of jsonFiles) {
			
 
				-  const data = JSON.parse(await readFile(file, "utf-8"));
			
 
				-  results.push(data);
			
 
				-}
			
 
				-
			
 
				-await writeFile(config.outputFileName, JSON.stringify(results, null, 2));
			
 
				+await crawl(defaultConfig);
			
 
				+await write(defaultConfig);
			
--- a/tsconfig.json
+++ b/tsconfig.json
@@ -6,7 +6,8 @@
 
				     "outDir": "dist",
			
 
				     "resolveJsonModule": true,
			
 
				     "noUnusedLocals": false,
			
 
				+    "skipLibCheck": true,
			
 
				     "lib": ["DOM"]
			
 
				   },
			
 
				-  "include": ["./src/**/*", "./config.ts"]
			
 
				+  "include": ["./src/**/*", "config.ts"]
			
 
				 }