WillKoehrsen 6 gadi atpakaļ
vecāks
revīzija
4080e88675

+ 288 - 33
poisson/asteroid-impacts-poisson.ipynb

@@ -117,11 +117,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 33,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-02-03T13:38:07.398423Z",
-     "start_time": "2019-02-03T13:38:07.167804Z"
+     "end_time": "2019-02-03T13:45:11.971911Z",
+     "start_time": "2019-02-03T13:45:11.579117Z"
     }
    },
    "outputs": [
@@ -257,7 +257,7 @@
        "4                  0.994           115000.0                       0.000191  "
       ]
      },
-     "execution_count": 4,
+     "execution_count": 33,
      "metadata": {},
      "output_type": "execute_result"
     }
@@ -840,11 +840,250 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 26,
+   "execution_count": 36,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-02-03T13:46:21.264111Z",
+     "start_time": "2019-02-03T13:46:21.260115Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "Index(['range_diameter', 'diameter', 'range_absolute_magnitude',\n",
+       "       'absolute_magnitude', 'impact_energy', 'cumulative_number_greater',\n",
+       "       'number', 'impact_frequency', 'undiscovered_fraction',\n",
+       "       'undiscover_number', 'undiscovered_impact_frequency', 'min_diameter',\n",
+       "       'max_diameter'],\n",
+       "      dtype='object')"
+      ]
+     },
+     "execution_count": 36,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df.columns"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 38,
    "metadata": {
     "ExecuteTime": {
-     "end_time": "2019-02-03T13:43:15.675688Z",
-     "start_time": "2019-02-03T13:43:15.645513Z"
+     "end_time": "2019-02-03T13:47:41.711527Z",
+     "start_time": "2019-02-03T13:47:41.691614Z"
+    }
+   },
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>range_diameter</th>\n",
+       "      <th>diameter</th>\n",
+       "      <th>absolute_magnitude</th>\n",
+       "      <th>impact_energy</th>\n",
+       "      <th>number</th>\n",
+       "      <th>impact_frequency</th>\n",
+       "      <th>min_diameter</th>\n",
+       "      <th>max_diameter</th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>0</th>\n",
+       "      <td>.0200–.0251</td>\n",
+       "      <td>0.0224</td>\n",
+       "      <td>26.0</td>\n",
+       "      <td>4.523-01</td>\n",
+       "      <td>2850000.00</td>\n",
+       "      <td>4.730000e-03</td>\n",
+       "      <td>.0200</td>\n",
+       "      <td>.0251</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>1</th>\n",
+       "      <td>.0251–.0316</td>\n",
+       "      <td>0.0282</td>\n",
+       "      <td>25.5</td>\n",
+       "      <td>9.02e-01</td>\n",
+       "      <td>1350000.00</td>\n",
+       "      <td>2.240000e-03</td>\n",
+       "      <td>.0251</td>\n",
+       "      <td>.0316</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2</th>\n",
+       "      <td>.0316–.0398</td>\n",
+       "      <td>0.0355</td>\n",
+       "      <td>25.0</td>\n",
+       "      <td>1.80e+00</td>\n",
+       "      <td>526000.00</td>\n",
+       "      <td>8.730000e-04</td>\n",
+       "      <td>.0316</td>\n",
+       "      <td>.0398</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3</th>\n",
+       "      <td>.0398–.0501</td>\n",
+       "      <td>0.0447</td>\n",
+       "      <td>24.5</td>\n",
+       "      <td>3.59e+00</td>\n",
+       "      <td>263000.00</td>\n",
+       "      <td>4.370000e-04</td>\n",
+       "      <td>.0398</td>\n",
+       "      <td>.0501</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>4</th>\n",
+       "      <td>.0501–.0631</td>\n",
+       "      <td>0.0562</td>\n",
+       "      <td>24.0</td>\n",
+       "      <td>7.16e+00</td>\n",
+       "      <td>116000.00</td>\n",
+       "      <td>1.930000e-04</td>\n",
+       "      <td>.0501</td>\n",
+       "      <td>.0631</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>...</th>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "      <td>...</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>22</th>\n",
+       "      <td>3.16–3.98</td>\n",
+       "      <td>3.5500</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>1.80e+06</td>\n",
+       "      <td>36.80</td>\n",
+       "      <td>6.110000e-08</td>\n",
+       "      <td>3.16</td>\n",
+       "      <td>3.98</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>23</th>\n",
+       "      <td>3.98–5.01</td>\n",
+       "      <td>4.4700</td>\n",
+       "      <td>14.5</td>\n",
+       "      <td>3.59e+06</td>\n",
+       "      <td>20.40</td>\n",
+       "      <td>3.390000e-08</td>\n",
+       "      <td>3.98</td>\n",
+       "      <td>5.01</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>24</th>\n",
+       "      <td>5.01–6.31</td>\n",
+       "      <td>5.6200</td>\n",
+       "      <td>14.0</td>\n",
+       "      <td>7.16e+06</td>\n",
+       "      <td>12.20</td>\n",
+       "      <td>2.030000e-08</td>\n",
+       "      <td>5.01</td>\n",
+       "      <td>6.31</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>25</th>\n",
+       "      <td>6.31–7.94</td>\n",
+       "      <td>7.0800</td>\n",
+       "      <td>13.5</td>\n",
+       "      <td>1.43e+07</td>\n",
+       "      <td>2.03</td>\n",
+       "      <td>3.370000e-09</td>\n",
+       "      <td>6.31</td>\n",
+       "      <td>7.94</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>26</th>\n",
+       "      <td>7.94–10.0</td>\n",
+       "      <td>8.9100</td>\n",
+       "      <td>13.0</td>\n",
+       "      <td>2.85e+07</td>\n",
+       "      <td>2.02</td>\n",
+       "      <td>3.350000e-09</td>\n",
+       "      <td>7.94</td>\n",
+       "      <td>10.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "<p>27 rows × 8 columns</p>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "   range_diameter  diameter  absolute_magnitude impact_energy      number  \\\n",
+       "0     .0200–.0251    0.0224                26.0      4.523-01  2850000.00   \n",
+       "1     .0251–.0316    0.0282                25.5      9.02e-01  1350000.00   \n",
+       "2     .0316–.0398    0.0355                25.0      1.80e+00   526000.00   \n",
+       "3     .0398–.0501    0.0447                24.5      3.59e+00   263000.00   \n",
+       "4     .0501–.0631    0.0562                24.0      7.16e+00   116000.00   \n",
+       "..            ...       ...                 ...           ...         ...   \n",
+       "22      3.16–3.98    3.5500                15.0      1.80e+06       36.80   \n",
+       "23      3.98–5.01    4.4700                14.5      3.59e+06       20.40   \n",
+       "24      5.01–6.31    5.6200                14.0      7.16e+06       12.20   \n",
+       "25      6.31–7.94    7.0800                13.5      1.43e+07        2.03   \n",
+       "26      7.94–10.0    8.9100                13.0      2.85e+07        2.02   \n",
+       "\n",
+       "    impact_frequency min_diameter max_diameter  \n",
+       "0       4.730000e-03        .0200        .0251  \n",
+       "1       2.240000e-03        .0251        .0316  \n",
+       "2       8.730000e-04        .0316        .0398  \n",
+       "3       4.370000e-04        .0398        .0501  \n",
+       "4       1.930000e-04        .0501        .0631  \n",
+       "..               ...          ...          ...  \n",
+       "22      6.110000e-08         3.16         3.98  \n",
+       "23      3.390000e-08         3.98         5.01  \n",
+       "24      2.030000e-08         5.01         6.31  \n",
+       "25      3.370000e-09         6.31         7.94  \n",
+       "26      3.350000e-09         7.94         10.0  \n",
+       "\n",
+       "[27 rows x 8 columns]"
+      ]
+     },
+     "execution_count": 38,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "df = df.drop(columns=[c for c in df if 'undiscover' in c] + \n",
+    "             ['cumulative_number_greater', 'range_absolute_magnitude'])\n",
+    "df"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 34,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-02-03T13:45:17.208082Z",
+     "start_time": "2019-02-03T13:45:17.179574Z"
     }
    },
    "outputs": [
@@ -880,8 +1119,8 @@
        "      <th>undiscovered_fraction</th>\n",
        "      <th>undiscover_number</th>\n",
        "      <th>undiscovered_impact_frequency</th>\n",
-       "      <th>(range_min, range_max)</th>\n",
-       "      <th>(min_diameter, max_diameter)</th>\n",
+       "      <th>min_diameter</th>\n",
+       "      <th>max_diameter</th>\n",
        "    </tr>\n",
        "  </thead>\n",
        "  <tbody>\n",
@@ -899,7 +1138,7 @@
        "      <td>2850000.000</td>\n",
        "      <td>4.730000e-03</td>\n",
        "      <td>.0200</td>\n",
-       "      <td>[.0200, .0251]</td>\n",
+       "      <td>.0251</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>1</th>\n",
@@ -915,7 +1154,7 @@
        "      <td>1350000.000</td>\n",
        "      <td>2.240000e-03</td>\n",
        "      <td>.0251</td>\n",
-       "      <td>[.0251, .0316]</td>\n",
+       "      <td>.0316</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>2</th>\n",
@@ -931,7 +1170,7 @@
        "      <td>525000.000</td>\n",
        "      <td>8.720000e-04</td>\n",
        "      <td>.0316</td>\n",
-       "      <td>[.0316, .0398]</td>\n",
+       "      <td>.0398</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>3</th>\n",
@@ -947,7 +1186,7 @@
        "      <td>262000.000</td>\n",
        "      <td>4.350000e-04</td>\n",
        "      <td>.0398</td>\n",
-       "      <td>[.0398, .0501]</td>\n",
+       "      <td>.0501</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>4</th>\n",
@@ -963,7 +1202,7 @@
        "      <td>115000.000</td>\n",
        "      <td>1.910000e-04</td>\n",
        "      <td>.0501</td>\n",
-       "      <td>[.0501, .0631]</td>\n",
+       "      <td>.0631</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>...</th>\n",
@@ -995,7 +1234,7 @@
        "      <td>0.081</td>\n",
        "      <td>1.350000e-09</td>\n",
        "      <td>3.16</td>\n",
-       "      <td>[3.16, 3.98]</td>\n",
+       "      <td>3.98</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>23</th>\n",
@@ -1011,7 +1250,7 @@
        "      <td>0.410</td>\n",
        "      <td>6.800000e-10</td>\n",
        "      <td>3.98</td>\n",
-       "      <td>[3.98, 5.01]</td>\n",
+       "      <td>5.01</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>24</th>\n",
@@ -1027,7 +1266,7 @@
        "      <td>0.021</td>\n",
        "      <td>3.400000e-10</td>\n",
        "      <td>5.01</td>\n",
-       "      <td>[5.01, 6.31]</td>\n",
+       "      <td>6.31</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>25</th>\n",
@@ -1043,7 +1282,7 @@
        "      <td>0.030</td>\n",
        "      <td>4.700000e-11</td>\n",
        "      <td>6.31</td>\n",
-       "      <td>[6.31, 7.94]</td>\n",
+       "      <td>7.94</td>\n",
        "    </tr>\n",
        "    <tr>\n",
        "      <th>26</th>\n",
@@ -1059,7 +1298,7 @@
        "      <td>0.020</td>\n",
        "      <td>3.400000e-11</td>\n",
        "      <td>7.94</td>\n",
-       "      <td>[7.94, 10.0]</td>\n",
+       "      <td>10.0</td>\n",
        "    </tr>\n",
        "  </tbody>\n",
        "</table>\n",
@@ -1106,34 +1345,50 @@
        "25                 0.0140              0.030                   4.700000e-11   \n",
        "26                 0.0100              0.020                   3.400000e-11   \n",
        "\n",
-       "   (range_min, range_max) (min_diameter, max_diameter)  \n",
-       "0                   .0200               [.0200, .0251]  \n",
-       "1                   .0251               [.0251, .0316]  \n",
-       "2                   .0316               [.0316, .0398]  \n",
-       "3                   .0398               [.0398, .0501]  \n",
-       "4                   .0501               [.0501, .0631]  \n",
-       "..                    ...                          ...  \n",
-       "22                   3.16                 [3.16, 3.98]  \n",
-       "23                   3.98                 [3.98, 5.01]  \n",
-       "24                   5.01                 [5.01, 6.31]  \n",
-       "25                   6.31                 [6.31, 7.94]  \n",
-       "26                   7.94                 [7.94, 10.0]  \n",
+       "   min_diameter max_diameter  \n",
+       "0         .0200        .0251  \n",
+       "1         .0251        .0316  \n",
+       "2         .0316        .0398  \n",
+       "3         .0398        .0501  \n",
+       "4         .0501        .0631  \n",
+       "..          ...          ...  \n",
+       "22         3.16         3.98  \n",
+       "23         3.98         5.01  \n",
+       "24         5.01         6.31  \n",
+       "25         6.31         7.94  \n",
+       "26         7.94         10.0  \n",
        "\n",
        "[27 rows x 13 columns]"
       ]
      },
-     "execution_count": 26,
+     "execution_count": 34,
      "metadata": {},
      "output_type": "execute_result"
     }
    ],
    "source": [
-    "df['min_diameter', 'max_diameter'] = df['range_diameter'].str.split('–', n=2)\n",
+    "diameters = df['range_diameter'].str.split('–', n=2, expand=True)\n",
+    "df['min_diameter'] = diameters[0]\n",
+    "df['max_diameter'] = diameters[1]\n",
     "df"
    ]
   },
   {
    "cell_type": "code",
+   "execution_count": 35,
+   "metadata": {
+    "ExecuteTime": {
+     "end_time": "2019-02-03T13:45:33.328240Z",
+     "start_time": "2019-02-03T13:45:33.307897Z"
+    }
+   },
+   "outputs": [],
+   "source": [
+    "df.to_parquet('data/asteroid-impact-data')"
+   ]
+  },
+  {
+   "cell_type": "code",
    "execution_count": 21,
    "metadata": {
     "ExecuteTime": {

BIN
poisson/data/asteroid-impact-data-cleaned