6 years ago · 2972e63c6e
--- a/bayesian_log_reg/understanding-logistic-regression.ipynb
+++ b/bayesian_log_reg/understanding-logistic-regression.ipynb
--- a/data-science-tips/validate-merges.ipynb
+++ b/data-science-tips/validate-merges.ipynb
--- a/economics/^GSPC.csv
+++ b/economics/^GSPC.csv
@@ -0,0 +1,21 @@
 
				+Date,Open,High,Low,Close,Adj Close,Volume
			
 
				+2019-01-28,2644.969971,2644.969971,2624.060059,2643.850098,2643.850098,3612810000
			
 
				+2019-01-29,2644.889893,2650.929932,2631.050049,2640.000000,2640.000000,3504200000
			
 
				+2019-01-30,2653.620117,2690.439941,2648.340088,2681.050049,2681.050049,3857810000
			
 
				+2019-01-31,2685.489990,2708.949951,2678.649902,2704.100098,2704.100098,4917650000
			
 
				+2019-02-01,2702.320068,2716.659912,2696.879883,2706.530029,2706.530029,3759270000
			
 
				+2019-02-04,2706.489990,2724.989990,2698.750000,2724.870117,2724.870117,3359840000
			
 
				+2019-02-05,2728.340088,2738.979980,2724.030029,2737.699951,2737.699951,3560430000
			
 
				+2019-02-06,2735.050049,2738.080078,2724.149902,2731.610107,2731.610107,3472690000
			
 
				+2019-02-07,2717.530029,2719.320068,2687.260010,2706.050049,2706.050049,4099490000
			
 
				+2019-02-08,2692.360107,2708.070068,2681.830078,2707.879883,2707.879883,3622330000
			
 
				+2019-02-11,2712.399902,2718.050049,2703.790039,2709.800049,2709.800049,3361970000
			
 
				+2019-02-12,2722.610107,2748.189941,2722.610107,2744.729980,2744.729980,3827770000
			
 
				+2019-02-13,2750.300049,2761.850098,2748.629883,2753.030029,2753.030029,3670770000
			
 
				+2019-02-14,2743.500000,2757.899902,2731.229980,2745.729980,2745.729980,3836700000
			
 
				+2019-02-15,2760.239990,2775.659912,2760.239990,2775.600098,2775.600098,3641370000
			
 
				+2019-02-19,2769.280029,2787.330078,2767.290039,2779.760010,2779.760010,3533710000
			
 
				+2019-02-20,2779.050049,2789.879883,2774.060059,2784.699951,2784.699951,3835450000
			
 
				+2019-02-21,2780.239990,2781.580078,2764.550049,2774.879883,2774.879883,3559710000
			
 
				+2019-02-22,2780.669922,2794.199951,2779.110107,2792.669922,2792.669922,3427810000
			
 
				+2019-02-25,2804.350098,2813.489990,2794.989990,2796.110107,2796.110107,3804380000
			
--- a/economics/economics.ipynb
+++ b/economics/economics.ipynb
--- a/sp500tickers.pickle
+++ b/sp500tickers.pickle
--- a/testing-exercises.ipynb
+++ b/testing-exercises.ipynb
@@ -0,0 +1,670 @@
 
				+{
			
 
				+ "cells": [
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 1,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:38:38.465675Z",
			
 
				+     "start_time": "2019-02-23T16:38:38.422180Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "name": "stdout",
			
 
				+     "output_type": "stream",
			
 
				+     "text": [
			
 
				+      "The autoreload extension is already loaded. To reload it, use:\n",
			
 
				+      "  %reload_ext autoreload\n"
			
 
				+     ]
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
			
 
				+      ],
			
 
				+      "text/vnd.plotly.v1+html": [
			
 
				+       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    },
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
			
 
				+      ],
			
 
				+      "text/vnd.plotly.v1+html": [
			
 
				+       "<script type=\"text/javascript\">window.PlotlyConfig = {MathJaxConfig: 'local'};</script><script type=\"text/javascript\">if (window.MathJax) {MathJax.Hub.Config({SVG: {font: \"STIX-Web\"}});}</script><script>requirejs.config({paths: { 'plotly': ['https://cdn.plot.ly/plotly-latest.min']},});if(!window._Plotly) {require(['plotly'],function(plotly) {window._Plotly=plotly;});}</script>"
			
 
				+      ]
			
 
				+     },
			
 
				+     "metadata": {},
			
 
				+     "output_type": "display_data"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "import pandas as pd\n",
			
 
				+    "import numpy as np\n",
			
 
				+    "\n",
			
 
				+    "%load_ext autoreload\n",
			
 
				+    "%autoreload 2\n",
			
 
				+    "\n",
			
 
				+    "import sys\n",
			
 
				+    "sys.path.append('../..')\n",
			
 
				+    "\n",
			
 
				+    "# Options for pandas\n",
			
 
				+    "pd.options.display.max_columns = 20\n",
			
 
				+    "pd.options.display.max_rows = 10\n",
			
 
				+    "\n",
			
 
				+    "# Display all cell outputs\n",
			
 
				+    "from IPython.core.interactiveshell import InteractiveShell\n",
			
 
				+    "InteractiveShell.ast_node_interactivity = 'all'\n",
			
 
				+    "\n",
			
 
				+    "import plotly.plotly as py\n",
			
 
				+    "import plotly.graph_objs as go\n",
			
 
				+    "from plotly.offline import iplot, init_notebook_mode\n",
			
 
				+    "init_notebook_mode(connected=True)\n",
			
 
				+    "\n",
			
 
				+    "import cufflinks\n",
			
 
				+    "cf.go_offline(connected=True)\n",
			
 
				+    "cf.set_config_file(theme='pearl')\n"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 2,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:39:00.607978Z",
			
 
				+     "start_time": "2019-02-23T16:39:00.567876Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "(1000, 100)"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 2,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "df = pd.DataFrame(np.random.randn(1000, 100))\n",
			
 
				+    "df.shape"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 3,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:39:16.845392Z",
			
 
				+     "start_time": "2019-02-23T16:39:16.774748Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "(100, 100)"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 3,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs = df.corr()\n",
			
 
				+    "corrs.shape"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 9,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:42:26.032480Z",
			
 
				+     "start_time": "2019-02-23T16:42:25.998079Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "(array([ 3,  8, 44, 45, 54, 96]), array([54, 96, 45, 44,  3,  8]))"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 9,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "threshold = -0.1\n",
			
 
				+    "direction = 'less'\n",
			
 
				+    "\n",
			
 
				+    "if direction == 'greater':\n",
			
 
				+    "    values_index = np.where(corrs > threshold)\n",
			
 
				+    "elif direction == 'less':\n",
			
 
				+    "    values_index = np.where(corrs < threshold)\n",
			
 
				+    "    \n",
			
 
				+    "values_index"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 35,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:47:56.940313Z",
			
 
				+     "start_time": "2019-02-23T16:47:56.909882Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "rows_index = values_index[0]\n",
			
 
				+    "columns_index = values_index[1]\n",
			
 
				+    "\n",
			
 
				+    "pairs = list(map(tuple, set([frozenset((x, y)) for x, y in zip(rows_index, columns_index)])))\n",
			
 
				+    "\n",
			
 
				+    "from collections import Counter\n",
			
 
				+    "\n",
			
 
				+    "# Counter(pairs)"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 36,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:47:57.429941Z",
			
 
				+     "start_time": "2019-02-23T16:47:57.397928Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "[(3, 54), (8, 96), (44, 45)]"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 36,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "pairs"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 49,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T17:04:56.074717Z",
			
 
				+     "start_time": "2019-02-23T17:04:56.041811Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "subset_df = pd.DataFrame(dict(value=corrs.values[values_index], var1=corrs.index[values_index[0]],\n",
			
 
				+    "                         var2=corrs.columns[values_index[1]]))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 58,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-24T20:36:52.221603Z",
			
 
				+     "start_time": "2019-02-24T20:36:52.182531Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<div>\n",
			
 
				+       "<style scoped>\n",
			
 
				+       "    .dataframe tbody tr th:only-of-type {\n",
			
 
				+       "        vertical-align: middle;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe tbody tr th {\n",
			
 
				+       "        vertical-align: top;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe thead th {\n",
			
 
				+       "        text-align: right;\n",
			
 
				+       "    }\n",
			
 
				+       "</style>\n",
			
 
				+       "<table border=\"1\" class=\"dataframe\">\n",
			
 
				+       "  <thead>\n",
			
 
				+       "    <tr style=\"text-align: right;\">\n",
			
 
				+       "      <th></th>\n",
			
 
				+       "      <th>value</th>\n",
			
 
				+       "      <th>var1</th>\n",
			
 
				+       "      <th>var2</th>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </thead>\n",
			
 
				+       "  <tbody>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>0</th>\n",
			
 
				+       "      <td>-0.111172</td>\n",
			
 
				+       "      <td>3</td>\n",
			
 
				+       "      <td>54</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>1</th>\n",
			
 
				+       "      <td>-0.117402</td>\n",
			
 
				+       "      <td>8</td>\n",
			
 
				+       "      <td>96</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>2</th>\n",
			
 
				+       "      <td>-0.104640</td>\n",
			
 
				+       "      <td>44</td>\n",
			
 
				+       "      <td>45</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </tbody>\n",
			
 
				+       "</table>\n",
			
 
				+       "</div>"
			
 
				+      ],
			
 
				+      "text/plain": [
			
 
				+       "      value  var1  var2\n",
			
 
				+       "0 -0.111172     3    54\n",
			
 
				+       "1 -0.117402     8    96\n",
			
 
				+       "2 -0.104640    44    45"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 58,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "subset_df.iloc[:int(len(subset_df)/2)]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 53,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T17:05:53.369263Z",
			
 
				+     "start_time": "2019-02-23T17:05:53.337720Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "var1  var2\n",
			
 
				+       "3     54      1\n",
			
 
				+       "8     96      1\n",
			
 
				+       "44    45      1\n",
			
 
				+       "45    44      1\n",
			
 
				+       "54    3       1\n",
			
 
				+       "96    8       1\n",
			
 
				+       "dtype: int64"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 53,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "subset_df.groupby(['var1', 'var2']).size()"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 55,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-24T14:57:30.059725Z",
			
 
				+     "start_time": "2019-02-24T14:57:30.027029Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "Index(['value', 'variable1', 'var2'], dtype='object')"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 55,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "subset_df.columns.str.replace('var1', 'variable1')"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 40,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:50:24.548345Z",
			
 
				+     "start_time": "2019-02-23T16:50:24.513301Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [],
			
 
				+   "source": [
			
 
				+    "values = []; indices = []; columns = []\n",
			
 
				+    "\n",
			
 
				+    "for pair in pairs:\n",
			
 
				+    "    indices.append(corrs.index[pair[0]])\n",
			
 
				+    "    columns.append(corrs.columns[pair[1]])\n",
			
 
				+    "    values.append(corrs.values[pair])\n",
			
 
				+    "    \n",
			
 
				+    "subset_df = pd.DataFrame(dict(value=values, var1=indices, var2=columns))"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 41,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:50:25.388032Z",
			
 
				+     "start_time": "2019-02-23T16:50:25.352969Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/html": [
			
 
				+       "<div>\n",
			
 
				+       "<style scoped>\n",
			
 
				+       "    .dataframe tbody tr th:only-of-type {\n",
			
 
				+       "        vertical-align: middle;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe tbody tr th {\n",
			
 
				+       "        vertical-align: top;\n",
			
 
				+       "    }\n",
			
 
				+       "\n",
			
 
				+       "    .dataframe thead th {\n",
			
 
				+       "        text-align: right;\n",
			
 
				+       "    }\n",
			
 
				+       "</style>\n",
			
 
				+       "<table border=\"1\" class=\"dataframe\">\n",
			
 
				+       "  <thead>\n",
			
 
				+       "    <tr style=\"text-align: right;\">\n",
			
 
				+       "      <th></th>\n",
			
 
				+       "      <th>value</th>\n",
			
 
				+       "      <th>var1</th>\n",
			
 
				+       "      <th>var2</th>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </thead>\n",
			
 
				+       "  <tbody>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>0</th>\n",
			
 
				+       "      <td>-0.111172</td>\n",
			
 
				+       "      <td>3</td>\n",
			
 
				+       "      <td>54</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>1</th>\n",
			
 
				+       "      <td>-0.117402</td>\n",
			
 
				+       "      <td>8</td>\n",
			
 
				+       "      <td>96</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "    <tr>\n",
			
 
				+       "      <th>2</th>\n",
			
 
				+       "      <td>-0.104640</td>\n",
			
 
				+       "      <td>44</td>\n",
			
 
				+       "      <td>45</td>\n",
			
 
				+       "    </tr>\n",
			
 
				+       "  </tbody>\n",
			
 
				+       "</table>\n",
			
 
				+       "</div>"
			
 
				+      ],
			
 
				+      "text/plain": [
			
 
				+       "      value  var1  var2\n",
			
 
				+       "0 -0.111172     3    54\n",
			
 
				+       "1 -0.117402     8    96\n",
			
 
				+       "2 -0.104640    44    45"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 41,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "subset_df"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 42,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:50:35.252569Z",
			
 
				+     "start_time": "2019-02-23T16:50:35.217497Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "-0.11117190190235929"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 42,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs.loc[3, 54]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 43,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:50:55.128251Z",
			
 
				+     "start_time": "2019-02-23T16:50:55.096675Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "-0.11117190190235929"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 43,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs.loc[54, 3]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 44,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:51:03.603309Z",
			
 
				+     "start_time": "2019-02-23T16:51:03.569575Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "-0.11740191658722447"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 44,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs.loc[96, 8]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 45,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:51:12.504120Z",
			
 
				+     "start_time": "2019-02-23T16:51:12.472562Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "-0.10463995106844964"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 45,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs.loc[44, 45]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 32,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:47:20.066448Z",
			
 
				+     "start_time": "2019-02-23T16:47:20.032246Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "-0.11117190190235929"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 32,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "corrs.values[(3, 54)]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": 14,
			
 
				+   "metadata": {
			
 
				+    "ExecuteTime": {
			
 
				+     "end_time": "2019-02-23T16:43:58.225875Z",
			
 
				+     "start_time": "2019-02-23T16:43:58.190963Z"
			
 
				+    }
			
 
				+   },
			
 
				+   "outputs": [
			
 
				+    {
			
 
				+     "data": {
			
 
				+      "text/plain": [
			
 
				+       "[(3, 54), (8, 96), (44, 45)]"
			
 
				+      ]
			
 
				+     },
			
 
				+     "execution_count": 14,
			
 
				+     "metadata": {},
			
 
				+     "output_type": "execute_result"
			
 
				+    }
			
 
				+   ],
			
 
				+   "source": [
			
 
				+    "[tuple(x) for x in set(map(frozenset, pairs))]"
			
 
				+   ]
			
 
				+  },
			
 
				+  {
			
 
				+   "cell_type": "code",
			
 
				+   "execution_count": null,
			
 
				+   "metadata": {},
			
 
				+   "outputs": [],
			
 
				+   "source": []
			
 
				+  }
			
 
				+ ],
			
 
				+ "metadata": {
			
 
				+  "hide_input": false,
			
 
				+  "kernelspec": {
			
 
				+   "display_name": "Python 3",
			
 
				+   "language": "python",
			
 
				+   "name": "python3"
			
 
				+  },
			
 
				+  "language_info": {
			
 
				+   "codemirror_mode": {
			
 
				+    "name": "ipython",
			
 
				+    "version": 3
			
 
				+   },
			
 
				+   "file_extension": ".py",
			
 
				+   "mimetype": "text/x-python",
			
 
				+   "name": "python",
			
 
				+   "nbconvert_exporter": "python",
			
 
				+   "pygments_lexer": "ipython3",
			
 
				+   "version": "3.6.5"
			
 
				+  },
			
 
				+  "toc": {
			
 
				+   "base_numbering": 1,
			
 
				+   "nav_menu": {},
			
 
				+   "number_sections": true,
			
 
				+   "sideBar": true,
			
 
				+   "skip_h1_title": false,
			
 
				+   "title_cell": "Table of Contents",
			
 
				+   "title_sidebar": "Contents",
			
 
				+   "toc_cell": false,
			
 
				+   "toc_position": {},
			
 
				+   "toc_section_display": true,
			
 
				+   "toc_window_display": false
			
 
				+  },
			
 
				+  "varInspector": {
			
 
				+   "cols": {
			
 
				+    "lenName": 16,
			
 
				+    "lenType": 16,
			
 
				+    "lenVar": 40
			
 
				+   },
			
 
				+   "kernels_config": {
			
 
				+    "python": {
			
 
				+     "delete_cmd_postfix": "",
			
 
				+     "delete_cmd_prefix": "del ",
			
 
				+     "library": "var_list.py",
			
 
				+     "varRefreshCmd": "print(var_dic_list())"
			
 
				+    },
			
 
				+    "r": {
			
 
				+     "delete_cmd_postfix": ") ",
			
 
				+     "delete_cmd_prefix": "rm(",
			
 
				+     "library": "var_list.r",
			
 
				+     "varRefreshCmd": "cat(var_dic_list()) "
			
 
				+    }
			
 
				+   },
			
 
				+   "types_to_exclude": [
			
 
				+    "module",
			
 
				+    "function",
			
 
				+    "builtin_function_or_method",
			
 
				+    "instance",
			
 
				+    "_Feature"
			
 
				+   ],
			
 
				+   "window_display": false
			
 
				+  }
			
 
				+ },
			
 
				+ "nbformat": 4,
			
 
				+ "nbformat_minor": 2
			
 
				+}
			
--- a/time_work/Datetime
+++ b/time_work/Datetime
--- a/time_work/__pycache__/get_datetime_info.cpython-36.pyc
+++ b/time_work/__pycache__/get_datetime_info.cpython-36.pyc
--- a/time_work/building_one.csv
+++ b/time_work/building_one.csv
@@ -1,3 +0,0 @@
 
				-version https://git-lfs.github.com/spec/v1
			
 
				-oid sha256:018718445791268012ecf6d65f38321fcf0cb897ff2a958bb98a534b3670c454
			
 
				-size 3918175
			
--- a/time_work/building_one_with_tz.csv
+++ b/time_work/building_one_with_tz.csv
@@ -1,3 +0,0 @@
 
				-version https://git-lfs.github.com/spec/v1
			
 
				-oid sha256:328919221962eed3cbf1e555e37203f63e6dc02a649b0d8faf053da458633ce3
			
 
				-size 4121697
			
--- a/time_work/get_datetime_info.py
+++ b/time_work/get_datetime_info.py
@@ -1,72 +0,0 @@
 
				-import pandas as pd
			
 
				-
			
 
				-def get_datetime_info(df, date_col, timezone=None, drop=False):
			
 
				-    """
			
 
				-    Extract date and time information from a column in dataframe
			
 
				-    and add as new columns. Time zones are converted to local time if specified.
			
 
				-
			
 
				-    :param df: pandas dataframe
			
 
				-    :param date_col: string representing the column containing datetimes. Can also be 'index' to use the index
			
 
				-    :param timezone: string for the time zone. If passed, times are converted to local
			
 
				-    :param drop: boolean indicating whether the original column should be dropped from the df
			
 
				-
			
 
				-    :return df: dataframe with added date and time columns
			
 
				-    """
			
 
				-    df = df.copy()
			
 
				-
			
 
				-    # Extract the field
			
 
				-    if date_col == 'index':
			
 
				-        fld = df.index.to_series()
			
 
				-        prefix = df.index.name if df.index.name is not None else 'datetime'
			
 
				-    else:
			
 
				-        fld = df[date_col]
			
 
				-        prefix = date_col
			
 
				-
			
 
				-    # Make sure the field type is a datetime
			
 
				-    if timezone is not None:
			
 
				-        fld = pd.to_datetime(fld, utc=True)
			
 
				-    else:
			
 
				-        fld = pd.to_datetime(fld)
			
 
				-
			
 
				-    # Convert to local time and then remove time zone information
			
 
				-    if timezone:
			
 
				-        df['utc'] = fld.dt.tz_convert('UTC').dt.tz_localize(None)
			
 
				-        fld = fld.dt.tz_convert(timezone).dt.tz_localize(None)
			
 
				-        df['local'] = fld
			
 
				-
			
 
				-    # Used for naming the columns
			
 
				-    prefix += '_'
			
 
				-
			
 
				-    # Basic attributes
			
 
				-    attr = ['Year', 'Month', 'Week', 'Day', 'Dayofweek', 'Dayofyear']
			
 
				-
			
 
				-    # Additional attributes to extract
			
 
				-    attr = attr + [
			
 
				-        'Is_month_end', 'Is_month_start', 'Is_quarter_end', 'Is_quarter_start',
			
 
				-        'Is_year_end', 'Is_year_start'
			
 
				-    ]
			
 
				-
			
 
				-    # Time attributes
			
 
				-    attr = attr + ['Hour', 'Minute', 'Second']
			
 
				-
			
 
				-    # Iterate through each attribute and add it to the dataframe
			
 
				-    for n in attr:
			
 
				-        df[prefix + n] = getattr(fld.dt, n.lower())
			
 
				-
			
 
				-    # Add fractional time of day
			
 
				-    df[prefix + 'FracDay'] = (df[prefix + 'Hour'] / 24) + (
			
 
				-        df[prefix + 'Minute'] / 60 / 24) + (
			
 
				-            df[prefix + 'Second'] / 60 / 60 / 24)
			
 
				-
			
 
				-    # Add fractional time of week
			
 
				-    df[prefix + 'FracWeek'] = ((df[prefix + 'Dayofweek'] * 24) +
			
 
				-                               (df[prefix + 'FracDay'] * 24)) / (7 * 24)
			
 
				-
			
 
				-    # Drop the column if specified
			
 
				-    if drop:
			
 
				-        if date_col == 'index':
			
 
				-            df = df.reset_index().iloc[:, 1:].copy()
			
 
				-        else:
			
 
				-            df = df.drop(date_col, axis=1)
			
 
				-
			
 
				-    return df