|
@@ -1,47 +1,77 @@
|
|
#!/usr/bin/env node
|
|
#!/usr/bin/env node
|
|
|
|
|
|
import { program } from "commander";
|
|
import { program } from "commander";
|
|
-import { Config } from "../config.js";
|
|
|
|
|
|
+import { Config } from "./config.js";
|
|
import { crawl, write } from "./core.js";
|
|
import { crawl, write } from "./core.js";
|
|
import { createRequire } from "node:module";
|
|
import { createRequire } from "node:module";
|
|
import inquirer from "inquirer";
|
|
import inquirer from "inquirer";
|
|
|
|
|
|
const require = createRequire(import.meta.url);
|
|
const require = createRequire(import.meta.url);
|
|
-const { version, description } = require("../../package.json");
|
|
|
|
|
|
+const { version, description } = require("../package.json");
|
|
|
|
|
|
-async function handler(options: any) {
|
|
|
|
|
|
+const messages = {
|
|
|
|
+ url: "What is the first URL of the website you want to crawl?",
|
|
|
|
+ match: "What is the URL pattern you want to match?",
|
|
|
|
+ selector: "What is the CSS selector you want to match?",
|
|
|
|
+ maxPagesToCrawl: "How many pages do you want to crawl?",
|
|
|
|
+ outputFileName: "What is the name of the output file?",
|
|
|
|
+};
|
|
|
|
+
|
|
|
|
+async function handler(options: Config) {
|
|
try {
|
|
try {
|
|
|
|
+ const {
|
|
|
|
+ url,
|
|
|
|
+ match,
|
|
|
|
+ selector,
|
|
|
|
+ maxPagesToCrawl: maxPagesToCrawlStr,
|
|
|
|
+ outputFileName,
|
|
|
|
+ } = options;
|
|
|
|
+
|
|
|
|
+ // @ts-ignore
|
|
|
|
+ const maxPagesToCrawl = parseInt(maxPagesToCrawlStr, 10);
|
|
|
|
+
|
|
let config: Config = {
|
|
let config: Config = {
|
|
- url: options.url,
|
|
|
|
- match: options.match,
|
|
|
|
- selector: options.selector,
|
|
|
|
- maxPagesToCrawl: 50,
|
|
|
|
- outputFileName: options.outputFileName ?? "output.json",
|
|
|
|
|
|
+ url,
|
|
|
|
+ match,
|
|
|
|
+ selector,
|
|
|
|
+ maxPagesToCrawl,
|
|
|
|
+ outputFileName,
|
|
};
|
|
};
|
|
|
|
|
|
if (!config.url || !config.match || !config.selector) {
|
|
if (!config.url || !config.match || !config.selector) {
|
|
- const { url, match, selector } = await inquirer
|
|
|
|
- .prompt([
|
|
|
|
- {
|
|
|
|
- type: "input",
|
|
|
|
- name: "url",
|
|
|
|
- message: "What is the URL of the website you want to crawl?",
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- type: "input",
|
|
|
|
- name: "match",
|
|
|
|
- message: "What is the URL pattern you want to match?",
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- type: "input",
|
|
|
|
- name: "selector",
|
|
|
|
- message: "What is the CSS selector you want to match?",
|
|
|
|
- },
|
|
|
|
- ]);
|
|
|
|
-
|
|
|
|
- config.url = url;
|
|
|
|
- config.match = match;
|
|
|
|
- config.selector = selector;
|
|
|
|
|
|
+ const questions = [];
|
|
|
|
+
|
|
|
|
+ if (!config.url) {
|
|
|
|
+ questions.push({
|
|
|
|
+ type: "input",
|
|
|
|
+ name: "url",
|
|
|
|
+ message: messages.url,
|
|
|
|
+ });
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!config.match) {
|
|
|
|
+ questions.push({
|
|
|
|
+ type: "input",
|
|
|
|
+ name: "match",
|
|
|
|
+ message: messages.match,
|
|
|
|
+ });
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ if (!config.selector) {
|
|
|
|
+ questions.push({
|
|
|
|
+ type: "input",
|
|
|
|
+ name: "selector",
|
|
|
|
+ message: messages.selector,
|
|
|
|
+ });
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
+ const answers = await inquirer
|
|
|
|
+ .prompt(questions);
|
|
|
|
+
|
|
|
|
+ config = {
|
|
|
|
+ ...config,
|
|
|
|
+ ...answers,
|
|
|
|
+ };
|
|
}
|
|
}
|
|
|
|
|
|
await crawl(config);
|
|
await crawl(config);
|
|
@@ -56,11 +86,15 @@ program
|
|
.description(description);
|
|
.description(description);
|
|
|
|
|
|
program
|
|
program
|
|
- .option("-u, --url")
|
|
|
|
- .option("-m, --match")
|
|
|
|
- .option("-s, --selector")
|
|
|
|
- .option("-m, --maxPagesToCrawl")
|
|
|
|
- .option("-o, --outputFileName")
|
|
|
|
|
|
+ .option("-u, --url <string>", messages.url, "")
|
|
|
|
+ .option("-m, --match <string>", messages.match, "")
|
|
|
|
+ .option("-s, --selector <string>", messages.selector, "")
|
|
|
|
+ .option("-m, --maxPagesToCrawl <number>", messages.maxPagesToCrawl, "50")
|
|
|
|
+ .option(
|
|
|
|
+ "-o, --outputFileName <string>",
|
|
|
|
+ messages.outputFileName,
|
|
|
|
+ "output.json",
|
|
|
|
+ )
|
|
.action(handler);
|
|
.action(handler);
|
|
|
|
|
|
program.parse();
|
|
program.parse();
|