cli.ts 2.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. #!/usr/bin/env node
  2. import { program } from "commander";
  3. import { Config } from "../config.js";
  4. import { crawl, write } from "./core.js";
  5. import { createRequire } from "node:module";
  6. import inquirer from "inquirer";
  7. const require = createRequire(import.meta.url);
  8. const { version, description } = require("../../package.json");
  9. const messages = {
  10. url: "What is the first URL of the website you want to crawl?",
  11. match: "What is the URL pattern you want to match?",
  12. selector: "What is the CSS selector you want to match?",
  13. maxPagesToCrawl: "How many pages do you want to crawl?",
  14. outputFileName: "What is the name of the output file?",
  15. };
  16. async function handler(options: Config) {
  17. try {
  18. const {
  19. url,
  20. match,
  21. selector,
  22. maxPagesToCrawl: maxPagesToCrawlStr,
  23. outputFileName,
  24. } = options;
  25. // @ts-ignore
  26. const maxPagesToCrawl = parseInt(maxPagesToCrawlStr, 10);
  27. let config: Config = {
  28. url,
  29. match,
  30. selector,
  31. maxPagesToCrawl,
  32. outputFileName,
  33. };
  34. if (!config.url || !config.match || !config.selector) {
  35. const questions = [];
  36. if (!config.url) {
  37. questions.push({
  38. type: "input",
  39. name: "url",
  40. message: messages.url,
  41. });
  42. }
  43. if (!config.match) {
  44. questions.push({
  45. type: "input",
  46. name: "match",
  47. message: messages.match,
  48. });
  49. }
  50. if (!config.selector) {
  51. questions.push({
  52. type: "input",
  53. name: "selector",
  54. message: messages.selector,
  55. });
  56. }
  57. const answers = await inquirer
  58. .prompt(questions);
  59. config = {
  60. ...config,
  61. ...answers,
  62. };
  63. }
  64. await crawl(config);
  65. await write(config);
  66. } catch (error) {
  67. console.log(error);
  68. }
  69. }
  70. program
  71. .version(version)
  72. .description(description);
  73. program
  74. .option("-u, --url <string>", messages.url, "")
  75. .option("-m, --match <string>", messages.match, "")
  76. .option("-s, --selector <string>", messages.selector, "")
  77. .option("-m, --maxPagesToCrawl <number>", messages.maxPagesToCrawl, "50")
  78. .option(
  79. "-o, --outputFileName <string>",
  80. messages.outputFileName,
  81. "output.json",
  82. )
  83. .action(handler);
  84. program.parse();