|
@@ -2,7 +2,7 @@
|
|
"cells": [
|
|
"cells": [
|
|
{
|
|
{
|
|
"cell_type": "markdown",
|
|
"cell_type": "markdown",
|
|
- "id": "atlantic-kingston",
|
|
|
|
|
|
+ "id": "vital-advertising",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"source": [
|
|
"source": [
|
|
"# \n",
|
|
"# \n",
|
|
@@ -24,7 +24,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "markdown",
|
|
"cell_type": "markdown",
|
|
- "id": "aerial-mills",
|
|
|
|
|
|
+ "id": "permanent-reception",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"source": [
|
|
"source": [
|
|
"--------------------------------------------------------------------------------------------------------------------\n",
|
|
"--------------------------------------------------------------------------------------------------------------------\n",
|
|
@@ -34,7 +34,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 2,
|
|
"execution_count": 2,
|
|
- "id": "organic-owner",
|
|
|
|
|
|
+ "id": "steady-henry",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -66,7 +66,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 13,
|
|
"execution_count": 13,
|
|
- "id": "apparent-newark",
|
|
|
|
|
|
+ "id": "exposed-mouth",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -84,7 +84,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 15,
|
|
"execution_count": 15,
|
|
- "id": "brave-plasma",
|
|
|
|
|
|
+ "id": "turned-navigator",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -94,7 +94,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 17,
|
|
"execution_count": 17,
|
|
- "id": "forward-telling",
|
|
|
|
|
|
+ "id": "level-discipline",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -112,7 +112,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 18,
|
|
"execution_count": 18,
|
|
- "id": "approximate-surname",
|
|
|
|
|
|
+ "id": "separated-payday",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -140,7 +140,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 20,
|
|
"execution_count": 20,
|
|
- "id": "occupational-globe",
|
|
|
|
|
|
+ "id": "guilty-comparative",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -172,7 +172,7 @@
|
|
" print(\"finish processing \",fname)\n",
|
|
" print(\"finish processing \",fname)\n",
|
|
" f.close()\n",
|
|
" f.close()\n",
|
|
" \n",
|
|
" \n",
|
|
- "out_path='../../../../dataset/SV/'\n",
|
|
|
|
|
|
+ "out_path='./dataset/SV/'\n",
|
|
"xml_f=out_path+'webbnyheter2013.xml'\n",
|
|
"xml_f=out_path+'webbnyheter2013.xml'\n",
|
|
"if xml_f.endswith('.xml') : \n",
|
|
"if xml_f.endswith('.xml') : \n",
|
|
" corpus = SBCorpusReader(xml_f)\n",
|
|
" corpus = SBCorpusReader(xml_f)\n",
|
|
@@ -190,7 +190,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": 21,
|
|
"execution_count": 21,
|
|
- "id": "banned-series",
|
|
|
|
|
|
+ "id": "rubber-finnish",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [
|
|
"outputs": [
|
|
{
|
|
{
|
|
@@ -207,7 +207,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "markdown",
|
|
"cell_type": "markdown",
|
|
- "id": "senior-solomon",
|
|
|
|
|
|
+ "id": "round-somewhere",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"source": [
|
|
"source": [
|
|
"---\n",
|
|
"---\n",
|
|
@@ -221,7 +221,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "markdown",
|
|
"cell_type": "markdown",
|
|
- "id": "nutritional-hammer",
|
|
|
|
|
|
+ "id": "celtic-appreciation",
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"source": [
|
|
"source": [
|
|
"-----\n",
|
|
"-----\n",
|