소스 검색

Fix formatting in config.ts and core.ts

guillermoscript 1 년 전
부모
커밋
ed47ed48f4
3개의 변경된 파일31개의 추가작업 그리고 22개의 파일을 삭제
  1. 2 2
      README.md
  2. 3 3
      src/config.ts
  3. 26 17
      src/core.ts

+ 2 - 2
README.md

@@ -85,9 +85,9 @@ type Config = {
    */
   resourceExclusions?: string[];
   /** Optional maximum file size in megabytes to include in the output file */
-  maxFileSize?: number,
+  maxFileSize?: number;
   /** Optional maximum number tokens to include in the output file */
-  maxTokens?: number,
+  maxTokens?: number;
 };
 ```
 

+ 3 - 3
src/config.ts

@@ -64,11 +64,11 @@ export const configSchema = z.object({
 
   /** Optional maximum file size in megabytes to include in the output file
    * @example 1
-  */
+   */
   maxFileSize: z.number().int().positive().optional(),
-  /** Optional maximum number tokens to include in the output file 
+  /** Optional maximum number tokens to include in the output file
    * @example 5000
-  */
+   */
   maxTokens: z.number().int().positive().optional(),
 });
 

+ 26 - 17
src/core.ts

@@ -4,9 +4,7 @@ import { readFile, writeFile } from "fs/promises";
 import { glob } from "glob";
 import { Config, configSchema } from "./config.js";
 import { Page } from "playwright";
-import {
-  isWithinTokenLimit,
-} from 'gpt-tokenizer'
+import { isWithinTokenLimit } from "gpt-tokenizer";
 
 let pageCounter = 0;
 
@@ -144,20 +142,26 @@ export async function crawl(config: Config) {
   }
 }
 
-export async function write(config: Config)  {
-  const jsonFiles = await glob("storage/datasets/default/*.json", { absolute: true });
+export async function write(config: Config) {
+  const jsonFiles = await glob("storage/datasets/default/*.json", {
+    absolute: true,
+  });
 
   console.log(`Found ${jsonFiles.length} files to combine...`);
 
   let currentResults: Record<string, any>[] = [];
   let currentSize: number = 0;
   let fileCounter: number = 1;
-  const maxBytes: number = config.maxFileSize ? config.maxFileSize * 1024 * 1024 : Infinity;
-  
-  const getStringByteSize = (str: string): number => Buffer.byteLength(str, 'utf-8');
-  
-  const nextFileName = (): string => `${config.outputFileName.replace(/\.json$/, '')}-${fileCounter}.json`;
-  
+  const maxBytes: number = config.maxFileSize
+    ? config.maxFileSize * 1024 * 1024
+    : Infinity;
+
+  const getStringByteSize = (str: string): number =>
+    Buffer.byteLength(str, "utf-8");
+
+  const nextFileName = (): string =>
+    `${config.outputFileName.replace(/\.json$/, "")}-${fileCounter}.json`;
+
   const writeBatchToFile = async (): Promise<void> => {
     await writeFile(nextFileName(), JSON.stringify(currentResults, null, 2));
     console.log(`Wrote ${currentResults.length} items to ${nextFileName()}`);
@@ -165,14 +169,19 @@ export async function write(config: Config)  {
     currentSize = 0;
     fileCounter++;
   };
-  
+
   let estimatedTokens: number = 0;
 
-  const addContentOrSplit = async (data: Record<string, any>): Promise<void> => {
+  const addContentOrSplit = async (
+    data: Record<string, any>,
+  ): Promise<void> => {
     const contentString: string = JSON.stringify(data);
-    const tokenCount: number | false = isWithinTokenLimit(contentString, config.maxTokens || Infinity);
+    const tokenCount: number | false = isWithinTokenLimit(
+      contentString,
+      config.maxTokens || Infinity,
+    );
 
-    if (typeof tokenCount === 'number') {
+    if (typeof tokenCount === "number") {
       if (estimatedTokens + tokenCount > config.maxTokens!) {
         // Only write the batch if it's not empty (something to write)
         if (currentResults.length > 0) {
@@ -195,7 +204,7 @@ export async function write(config: Config)  {
 
   // Iterate over each JSON file and process its contents.
   for (const file of jsonFiles) {
-    const fileContent = await readFile(file, 'utf-8');
+    const fileContent = await readFile(file, "utf-8");
     const data: Record<string, any> = JSON.parse(fileContent);
     await addContentOrSplit(data);
   }
@@ -204,4 +213,4 @@ export async function write(config: Config)  {
   if (currentResults.length > 0) {
     await writeBatchToFile();
   }
-};
+}