|
@@ -18,9 +18,7 @@
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"[1](01-LinearRegression-Hyperparam.ipynb)\n",
|
|
"[1](01-LinearRegression-Hyperparam.ipynb)\n",
|
|
- "[2](02-SGD.ipynb)\n",
|
|
|
|
- "[3](03_CuML_Exercise.ipynb)\n",
|
|
|
|
- "[4]"
|
|
|
|
|
|
+ "[2](03_CuML_Exercise.ipynb)"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
{
|
|
{
|
|
@@ -51,18 +49,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 1,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "NumPy Version: 1.19.2\n",
|
|
|
|
- "Scikit-Learn Version: 0.23.1\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import matplotlib.pyplot as plt\n",
|
|
"import numpy as np; print('NumPy Version:', np.__version__)\n",
|
|
"import numpy as np; print('NumPy Version:', np.__version__)\n",
|
|
@@ -130,71 +119,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 2,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 43 ms, sys: 10.1 ms, total: 53.1 ms\n",
|
|
|
|
- "Wall time: 52.5 ms\n",
|
|
|
|
- " Unnamed: 0 Source TMC Severity Start_Lat Start_Lng End_Lat \\\n",
|
|
|
|
- "0 0 1 201.0 3 39.865147 -84.058723 37.557578 \n",
|
|
|
|
- "1 1 1 201.0 2 39.928059 -82.831184 37.557578 \n",
|
|
|
|
- "2 2 1 201.0 2 39.063148 -84.032608 37.557578 \n",
|
|
|
|
- "3 3 1 201.0 3 39.747753 -84.205582 37.557578 \n",
|
|
|
|
- "4 4 1 201.0 2 39.627781 -84.188354 37.557578 \n",
|
|
|
|
- "... ... ... ... ... ... ... ... \n",
|
|
|
|
- "17317 17317 1 201.0 3 37.396164 -121.907578 37.557578 \n",
|
|
|
|
- "17318 17318 1 201.0 3 37.825649 -122.304092 37.557578 \n",
|
|
|
|
- "17319 17319 1 201.0 2 36.979454 -121.909035 37.557578 \n",
|
|
|
|
- "17320 17320 1 201.0 2 37.314030 -121.827065 37.557578 \n",
|
|
|
|
- "17321 17321 1 201.0 3 37.758404 -122.212173 37.557578 \n",
|
|
|
|
- "\n",
|
|
|
|
- " End_Lng Distance(mi) County ... Station Stop \\\n",
|
|
|
|
- "0 -100.455981 0.01 Montgomery ... 0.0 0.0 \n",
|
|
|
|
- "1 -100.455981 0.01 Franklin ... 0.0 0.0 \n",
|
|
|
|
- "2 -100.455981 0.01 Clermont ... 0.0 0.0 \n",
|
|
|
|
- "3 -100.455981 0.01 Montgomery ... 0.0 0.0 \n",
|
|
|
|
- "4 -100.455981 0.01 Montgomery ... 0.0 0.0 \n",
|
|
|
|
- "... ... ... ... ... ... ... \n",
|
|
|
|
- "17317 -100.455981 0.01 Santa Clara ... 0.0 0.0 \n",
|
|
|
|
- "17318 -100.455981 0.01 Alameda ... 0.0 0.0 \n",
|
|
|
|
- "17319 -100.455981 0.00 Santa Cruz ... 0.0 0.0 \n",
|
|
|
|
- "17320 -100.455981 0.01 Santa Clara ... 0.0 0.0 \n",
|
|
|
|
- "17321 -100.455981 0.01 Alameda ... NaN NaN \n",
|
|
|
|
- "\n",
|
|
|
|
- " Traffic_Calming Traffic_Signal Turning_Loop Sunrise_Sunset \\\n",
|
|
|
|
- "0 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "1 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "2 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "3 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "4 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "... ... ... ... ... \n",
|
|
|
|
- "17317 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "17318 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "17319 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "17320 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "17321 NaN NaN NaN NaN \n",
|
|
|
|
- "\n",
|
|
|
|
- " Civil_Twilight Nautical_Twilight Astronomical_Twilight cov_distance \n",
|
|
|
|
- "0 0.0 0.0 0.0 1443.524390 \n",
|
|
|
|
- "1 0.0 0.0 1.0 1548.467903 \n",
|
|
|
|
- "2 0.0 1.0 1.0 1440.697621 \n",
|
|
|
|
- "3 1.0 1.0 1.0 1429.927497 \n",
|
|
|
|
- "4 1.0 1.0 1.0 1430.383177 \n",
|
|
|
|
- "... ... ... ... ... \n",
|
|
|
|
- "17317 1.0 1.0 1.0 1888.935551 \n",
|
|
|
|
- "17318 1.0 1.0 1.0 1918.251042 \n",
|
|
|
|
- "17319 1.0 1.0 1.0 1895.341155 \n",
|
|
|
|
- "17320 1.0 1.0 1.0 1883.025767 \n",
|
|
|
|
- "17321 NaN NaN NaN NaN \n",
|
|
|
|
- "\n",
|
|
|
|
- "[17322 rows x 34 columns]\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%time df = pd.read_csv('../../data/data_proc.csv')\n",
|
|
"%time df = pd.read_csv('../../data/data_proc.csv')\n",
|
|
"print(df)"
|
|
"print(df)"
|
|
@@ -209,7 +136,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 3,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -225,216 +152,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 4,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/html": [
|
|
|
|
- "<div>\n",
|
|
|
|
- "<style scoped>\n",
|
|
|
|
- " .dataframe tbody tr th:only-of-type {\n",
|
|
|
|
- " vertical-align: middle;\n",
|
|
|
|
- " }\n",
|
|
|
|
- "\n",
|
|
|
|
- " .dataframe tbody tr th {\n",
|
|
|
|
- " vertical-align: top;\n",
|
|
|
|
- " }\n",
|
|
|
|
- "\n",
|
|
|
|
- " .dataframe thead th {\n",
|
|
|
|
- " text-align: right;\n",
|
|
|
|
- " }\n",
|
|
|
|
- "</style>\n",
|
|
|
|
- "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
|
- " <thead>\n",
|
|
|
|
- " <tr style=\"text-align: right;\">\n",
|
|
|
|
- " <th></th>\n",
|
|
|
|
- " <th>Source</th>\n",
|
|
|
|
- " <th>TMC</th>\n",
|
|
|
|
- " <th>Severity</th>\n",
|
|
|
|
- " <th>Start_Lat</th>\n",
|
|
|
|
- " <th>Start_Lng</th>\n",
|
|
|
|
- " <th>End_Lat</th>\n",
|
|
|
|
- " <th>End_Lng</th>\n",
|
|
|
|
- " <th>Distance(mi)</th>\n",
|
|
|
|
- " <th>County</th>\n",
|
|
|
|
- " <th>State</th>\n",
|
|
|
|
- " <th>...</th>\n",
|
|
|
|
- " <th>Station</th>\n",
|
|
|
|
- " <th>Stop</th>\n",
|
|
|
|
- " <th>Traffic_Calming</th>\n",
|
|
|
|
- " <th>Traffic_Signal</th>\n",
|
|
|
|
- " <th>Turning_Loop</th>\n",
|
|
|
|
- " <th>Sunrise_Sunset</th>\n",
|
|
|
|
- " <th>Civil_Twilight</th>\n",
|
|
|
|
- " <th>Nautical_Twilight</th>\n",
|
|
|
|
- " <th>Astronomical_Twilight</th>\n",
|
|
|
|
- " <th>cov_distance</th>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " </thead>\n",
|
|
|
|
- " <tbody>\n",
|
|
|
|
- " <tr>\n",
|
|
|
|
- " <th>0</th>\n",
|
|
|
|
- " <td>1</td>\n",
|
|
|
|
- " <td>201.0</td>\n",
|
|
|
|
- " <td>3</td>\n",
|
|
|
|
- " <td>39.865147</td>\n",
|
|
|
|
- " <td>-84.058723</td>\n",
|
|
|
|
- " <td>37.557578</td>\n",
|
|
|
|
- " <td>-100.455981</td>\n",
|
|
|
|
- " <td>0.01</td>\n",
|
|
|
|
- " <td>Montgomery</td>\n",
|
|
|
|
- " <td>OH</td>\n",
|
|
|
|
- " <td>...</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>1443.524390</td>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " <tr>\n",
|
|
|
|
- " <th>1</th>\n",
|
|
|
|
- " <td>1</td>\n",
|
|
|
|
- " <td>201.0</td>\n",
|
|
|
|
- " <td>2</td>\n",
|
|
|
|
- " <td>39.928059</td>\n",
|
|
|
|
- " <td>-82.831184</td>\n",
|
|
|
|
- " <td>37.557578</td>\n",
|
|
|
|
- " <td>-100.455981</td>\n",
|
|
|
|
- " <td>0.01</td>\n",
|
|
|
|
- " <td>Franklin</td>\n",
|
|
|
|
- " <td>OH</td>\n",
|
|
|
|
- " <td>...</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1548.467903</td>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " <tr>\n",
|
|
|
|
- " <th>2</th>\n",
|
|
|
|
- " <td>1</td>\n",
|
|
|
|
- " <td>201.0</td>\n",
|
|
|
|
- " <td>2</td>\n",
|
|
|
|
- " <td>39.063148</td>\n",
|
|
|
|
- " <td>-84.032608</td>\n",
|
|
|
|
- " <td>37.557578</td>\n",
|
|
|
|
- " <td>-100.455981</td>\n",
|
|
|
|
- " <td>0.01</td>\n",
|
|
|
|
- " <td>Clermont</td>\n",
|
|
|
|
- " <td>OH</td>\n",
|
|
|
|
- " <td>...</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1440.697621</td>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " <tr>\n",
|
|
|
|
- " <th>3</th>\n",
|
|
|
|
- " <td>1</td>\n",
|
|
|
|
- " <td>201.0</td>\n",
|
|
|
|
- " <td>3</td>\n",
|
|
|
|
- " <td>39.747753</td>\n",
|
|
|
|
- " <td>-84.205582</td>\n",
|
|
|
|
- " <td>37.557578</td>\n",
|
|
|
|
- " <td>-100.455981</td>\n",
|
|
|
|
- " <td>0.01</td>\n",
|
|
|
|
- " <td>Montgomery</td>\n",
|
|
|
|
- " <td>OH</td>\n",
|
|
|
|
- " <td>...</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1429.927497</td>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " <tr>\n",
|
|
|
|
- " <th>4</th>\n",
|
|
|
|
- " <td>1</td>\n",
|
|
|
|
- " <td>201.0</td>\n",
|
|
|
|
- " <td>2</td>\n",
|
|
|
|
- " <td>39.627781</td>\n",
|
|
|
|
- " <td>-84.188354</td>\n",
|
|
|
|
- " <td>37.557578</td>\n",
|
|
|
|
- " <td>-100.455981</td>\n",
|
|
|
|
- " <td>0.01</td>\n",
|
|
|
|
- " <td>Montgomery</td>\n",
|
|
|
|
- " <td>OH</td>\n",
|
|
|
|
- " <td>...</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>0.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1.0</td>\n",
|
|
|
|
- " <td>1430.383177</td>\n",
|
|
|
|
- " </tr>\n",
|
|
|
|
- " </tbody>\n",
|
|
|
|
- "</table>\n",
|
|
|
|
- "<p>5 rows × 33 columns</p>\n",
|
|
|
|
- "</div>"
|
|
|
|
- ],
|
|
|
|
- "text/plain": [
|
|
|
|
- " Source TMC Severity Start_Lat Start_Lng End_Lat End_Lng \\\n",
|
|
|
|
- "0 1 201.0 3 39.865147 -84.058723 37.557578 -100.455981 \n",
|
|
|
|
- "1 1 201.0 2 39.928059 -82.831184 37.557578 -100.455981 \n",
|
|
|
|
- "2 1 201.0 2 39.063148 -84.032608 37.557578 -100.455981 \n",
|
|
|
|
- "3 1 201.0 3 39.747753 -84.205582 37.557578 -100.455981 \n",
|
|
|
|
- "4 1 201.0 2 39.627781 -84.188354 37.557578 -100.455981 \n",
|
|
|
|
- "\n",
|
|
|
|
- " Distance(mi) County State ... Station Stop Traffic_Calming \\\n",
|
|
|
|
- "0 0.01 Montgomery OH ... 0.0 0.0 0.0 \n",
|
|
|
|
- "1 0.01 Franklin OH ... 0.0 0.0 0.0 \n",
|
|
|
|
- "2 0.01 Clermont OH ... 0.0 0.0 0.0 \n",
|
|
|
|
- "3 0.01 Montgomery OH ... 0.0 0.0 0.0 \n",
|
|
|
|
- "4 0.01 Montgomery OH ... 0.0 0.0 0.0 \n",
|
|
|
|
- "\n",
|
|
|
|
- " Traffic_Signal Turning_Loop Sunrise_Sunset Civil_Twilight \\\n",
|
|
|
|
- "0 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "1 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "2 0.0 0.0 0.0 0.0 \n",
|
|
|
|
- "3 0.0 0.0 0.0 1.0 \n",
|
|
|
|
- "4 0.0 0.0 1.0 1.0 \n",
|
|
|
|
- "\n",
|
|
|
|
- " Nautical_Twilight Astronomical_Twilight cov_distance \n",
|
|
|
|
- "0 0.0 0.0 1443.524390 \n",
|
|
|
|
- "1 0.0 1.0 1548.467903 \n",
|
|
|
|
- "2 1.0 1.0 1440.697621 \n",
|
|
|
|
- "3 1.0 1.0 1429.927497 \n",
|
|
|
|
- "4 1.0 1.0 1430.383177 \n",
|
|
|
|
- "\n",
|
|
|
|
- "[5 rows x 33 columns]"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 4,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"df.head()"
|
|
"df.head()"
|
|
]
|
|
]
|
|
@@ -448,7 +168,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 5,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -464,21 +184,12 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 6,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 14.6 ms, sys: 515 µs, total: 15.1 ms\n",
|
|
|
|
- "Wall time: 14.9 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
- "link to label encoder\n",
|
|
|
|
|
|
+ "#link to label encoder\n",
|
|
"label_encoder = preprocessing.LabelEncoder() \n",
|
|
"label_encoder = preprocessing.LabelEncoder() \n",
|
|
"df['County']= label_encoder.fit_transform(df['County']) \n",
|
|
"df['County']= label_encoder.fit_transform(df['County']) \n",
|
|
"df['State']= label_encoder.fit_transform(df['State'])\n",
|
|
"df['State']= label_encoder.fit_transform(df['State'])\n",
|
|
@@ -536,33 +247,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 7,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 30.1 ms, sys: 4.45 ms, total: 34.6 ms\n",
|
|
|
|
- "Wall time: 34.1 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "Severity\n",
|
|
|
|
- "1 10584\n",
|
|
|
|
- "2 10584\n",
|
|
|
|
- "3 10584\n",
|
|
|
|
- "4 10584\n",
|
|
|
|
- "Name: Severity, dtype: int64"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 7,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"# Class Balancing | Using Up Sampling\n",
|
|
"# Class Balancing | Using Up Sampling\n",
|
|
@@ -597,18 +284,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 8,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 7.11 ms, sys: 5.16 ms, total: 12.3 ms\n",
|
|
|
|
- "Wall time: 11.5 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"# Set the target for the prediction\n",
|
|
"# Set the target for the prediction\n",
|
|
@@ -640,7 +318,7 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 9,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
@@ -656,22 +334,13 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 10,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 855 ms, sys: 445 ms, total: 1.3 s\n",
|
|
|
|
- "Wall time: 1.31 s\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Convert the data to CuDF dataframes here\n",
|
|
"#Convert the data to CuDF dataframes here\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
"X_cudf_train = cudf.DataFrame.from_pandas(X_train)\n",
|
|
"X_cudf_train = cudf.DataFrame.from_pandas(X_train)\n",
|
|
"X_cudf_test = cudf.DataFrame.from_pandas(X_test)\n",
|
|
"X_cudf_test = cudf.DataFrame.from_pandas(X_test)\n",
|
|
"\n",
|
|
"\n",
|
|
@@ -704,42 +373,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 19,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 20 s, sys: 50.9 s, total: 1min 10s\n",
|
|
|
|
- "Wall time: 1.9 s\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "name": "stderr",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "/opt/conda/envs/rapids/lib/python3.7/site-packages/sklearn/linear_model/_logistic.py:764: ConvergenceWarning: lbfgs failed to converge (status=1):\n",
|
|
|
|
- "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
|
|
|
|
- "\n",
|
|
|
|
- "Increase the number of iterations (max_iter) or scale the data as shown in:\n",
|
|
|
|
- " https://scikit-learn.org/stable/modules/preprocessing.html\n",
|
|
|
|
- "Please also refer to the documentation for alternative solver options:\n",
|
|
|
|
- " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
|
|
|
|
- " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG)\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "LogisticRegression()"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 19,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"clf = skLogistic()\n",
|
|
"clf = skLogistic()\n",
|
|
@@ -755,19 +391,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 20,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.501299110306275\n",
|
|
|
|
- "CPU times: user 81.7 ms, sys: 193 ms, total: 275 ms\n",
|
|
|
|
- "Wall time: 7.28 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"print(clf.score(X_test, y_test))"
|
|
"print(clf.score(X_test, y_test))"
|
|
@@ -786,33 +412,13 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 21,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "[E] [23:59:24.749199] L-BFGS line search failed\n",
|
|
|
|
- "CPU times: user 686 ms, sys: 2.11 s, total: 2.8 s\n",
|
|
|
|
- "Wall time: 74 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "LogisticRegression(penalty='l2', tol=0.0001, C=1.0, fit_intercept=True, max_iter=1000, linesearch_max_iter=50, verbose=4, l1_ratio=None, solver='qn', handle=<cuml.raft.common.handle.Handle object at 0x7fd97c0ee1f0>, output_type='cudf')"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 21,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
"reg = LogisticRegression()\n",
|
|
"reg = LogisticRegression()\n",
|
|
"reg.fit() # Pass the train cudf dataframes as arguments here"
|
|
"reg.fit() # Pass the train cudf dataframes as arguments here"
|
|
]
|
|
]
|
|
@@ -826,23 +432,14 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 22,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.24864183366298676\n",
|
|
|
|
- "CPU times: user 171 ms, sys: 523 ms, total: 695 ms\n",
|
|
|
|
- "Wall time: 18.4 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
|
|
+ "\n",
|
|
"print(reg.score()) # Pass the test cudf dataframes as arguments here"
|
|
"print(reg.score()) # Pass the test cudf dataframes as arguments here"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
@@ -861,28 +458,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 31,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 522 ms, sys: 4.43 ms, total: 527 ms\n",
|
|
|
|
- "Wall time: 526 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "KNeighborsClassifier(n_neighbors=3)"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 31,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"neigh = KNeighborsClassifier(n_neighbors=3)\n",
|
|
"neigh = KNeighborsClassifier(n_neighbors=3)\n",
|
|
@@ -898,19 +476,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 32,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.8876466419966932\n",
|
|
|
|
- "CPU times: user 1.15 s, sys: 5.22 ms, total: 1.15 s\n",
|
|
|
|
- "Wall time: 1.15 s\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"print(neigh.score(X_test, y_test))"
|
|
"print(neigh.score(X_test, y_test))"
|
|
@@ -929,32 +497,14 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 33,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 14.5 ms, sys: 2.39 ms, total: 16.8 ms\n",
|
|
|
|
- "Wall time: 16 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "KNeighborsClassifier(weights='uniform')"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 33,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
|
|
+ "\n",
|
|
"knn = KNeighborsC(n_neighbors=10)\n",
|
|
"knn = KNeighborsC(n_neighbors=10)\n",
|
|
"knn.fit() # Pass the train cudf dataframes as arguments here"
|
|
"knn.fit() # Pass the train cudf dataframes as arguments here"
|
|
]
|
|
]
|
|
@@ -968,23 +518,14 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 34,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.8689079880714417\n",
|
|
|
|
- "CPU times: user 22.1 ms, sys: 126 ms, total: 148 ms\n",
|
|
|
|
- "Wall time: 148 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
|
|
+ "\n",
|
|
"print(knn.score()) # Pass the test cudf dataframes as arguments here"
|
|
"print(knn.score()) # Pass the test cudf dataframes as arguments here"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
@@ -1004,28 +545,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 11,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 163 ms, sys: 62.1 ms, total: 225 ms\n",
|
|
|
|
- "Wall time: 226 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "ElasticNet(alpha=1.0, l1_ratio=0.5, fit_intercept=True, normalize=False, max_iter=1000, tol=0.001, selection='cyclic', handle=<cuml.raft.common.handle.Handle object at 0x7fd97c163210>, output_type='numpy', verbose=4)"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 11,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"regr = ElasticNet()\n",
|
|
"regr = ElasticNet()\n",
|
|
@@ -1041,19 +563,9 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 12,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.22519596677633613\n",
|
|
|
|
- "CPU times: user 5.97 ms, sys: 2.98 ms, total: 8.96 ms\n",
|
|
|
|
- "Wall time: 8.11 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"%%time\n",
|
|
"%%time\n",
|
|
"X_test = X_test.astype(np.float64)\n",
|
|
"X_test = X_test.astype(np.float64)\n",
|
|
@@ -1074,32 +586,14 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 13,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "CPU times: user 126 ms, sys: 3.94 ms, total: 130 ms\n",
|
|
|
|
- "Wall time: 129 ms\n"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- {
|
|
|
|
- "data": {
|
|
|
|
- "text/plain": [
|
|
|
|
- "ElasticNet(alpha=1.0, l1_ratio=0.5, fit_intercept=True, normalize=False, max_iter=1000, tol=0.001, selection='cyclic', handle=<cuml.raft.common.handle.Handle object at 0x7fd97c152b70>, output_type='cudf', verbose=4)"
|
|
|
|
- ]
|
|
|
|
- },
|
|
|
|
- "execution_count": 13,
|
|
|
|
- "metadata": {},
|
|
|
|
- "output_type": "execute_result"
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
|
|
+ "\n",
|
|
"enet = ElasticNet()\n",
|
|
"enet = ElasticNet()\n",
|
|
"\n",
|
|
"\n",
|
|
"enet.fit() # Pass the train cudf dataframes as arguments here"
|
|
"enet.fit() # Pass the train cudf dataframes as arguments here"
|
|
@@ -1114,23 +608,14 @@
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
- "execution_count": 14,
|
|
|
|
|
|
+ "execution_count": null,
|
|
"metadata": {},
|
|
"metadata": {},
|
|
- "outputs": [
|
|
|
|
- {
|
|
|
|
- "name": "stdout",
|
|
|
|
- "output_type": "stream",
|
|
|
|
- "text": [
|
|
|
|
- "0.22519596677633613\n",
|
|
|
|
- "CPU times: user 6.12 ms, sys: 2.09 ms, total: 8.21 ms\n",
|
|
|
|
- "Wall time: 7.49 ms\n"
|
|
|
|
- ]
|
|
|
|
- }
|
|
|
|
- ],
|
|
|
|
|
|
+ "outputs": [],
|
|
"source": [
|
|
"source": [
|
|
|
|
+ "%%time\n",
|
|
"#Modify the code in this cell\n",
|
|
"#Modify the code in this cell\n",
|
|
"\n",
|
|
"\n",
|
|
- "%%time\n",
|
|
|
|
|
|
+ "\n",
|
|
"X_cudf_test = X_cudf_test.astype(np.float64)\n",
|
|
"X_cudf_test = X_cudf_test.astype(np.float64)\n",
|
|
"y_cudf_test = y_cudf_test.astype(np.float64)\n",
|
|
"y_cudf_test = y_cudf_test.astype(np.float64)\n",
|
|
"print(enet.score()) # Pass the test cudf dataframes as arguments here"
|
|
"print(enet.score()) # Pass the test cudf dataframes as arguments here"
|
|
@@ -1213,9 +698,8 @@
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"[1](01-LinearRegression-Hyperparam.ipynb)\n",
|
|
"[1](01-LinearRegression-Hyperparam.ipynb)\n",
|
|
- "[2](02-SGD.ipynb)\n",
|
|
|
|
- "[3](03_CuML_Exercise.ipynb)\n",
|
|
|
|
- "[4]\n",
|
|
|
|
|
|
+ "[2](03_CuML_Exercise.ipynb)\n",
|
|
|
|
+ "\n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
"     \n",
|
|
@@ -1248,7 +732,7 @@
|
|
"name": "python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"pygments_lexer": "ipython3",
|
|
- "version": "3.7.8"
|
|
|
|
|
|
+ "version": "3.6.2"
|
|
}
|
|
}
|
|
},
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat": 4,
|