|
@@ -518,7 +518,7 @@
|
|
|
},
|
|
|
{
|
|
|
"cell_type": "code",
|
|
|
- "execution_count": 67,
|
|
|
+ "execution_count": null,
|
|
|
"id": "7917dfdd-b3af-44fc-a8c0-2760ace9363e",
|
|
|
"metadata": {},
|
|
|
"outputs": [
|
|
@@ -2616,6 +2616,9 @@
|
|
|
}
|
|
|
],
|
|
|
"source": [
|
|
|
+ "#For printing\n",
|
|
|
+ "processed_text = \"\" \n",
|
|
|
+ "\n",
|
|
|
"with open(output_file, 'w', encoding='utf-8') as out_file:\n",
|
|
|
" for chunk_num, chunk in enumerate(tqdm(chunks, desc=\"Processing chunks\")):\n",
|
|
|
" # Process chunk and append to complete text\n",
|
|
@@ -2624,7 +2627,10 @@
|
|
|
" \n",
|
|
|
" # Write chunk immediately to file\n",
|
|
|
" out_file.write(processed_chunk + \"\\n\")\n",
|
|
|
- " out_file.flush()"
|
|
|
+ " out_file.flush()\n",
|
|
|
+ " \n",
|
|
|
+ "#if you want to print the entire processed_text at once\n",
|
|
|
+ "#print(processed_text)"
|
|
|
]
|
|
|
},
|
|
|
{
|