浏览代码

Starting work

WillKoehrsen 6 年之前
父节点
当前提交
201d557a9a
共有 41 个文件被更改,包括 368 次插入0 次删除
  1. 368 0
      cyclical-features/Testing Cyclical Encoding.ipynb
  2. 0 0
      cyclical-features/data/building_10_energy_data.csv
  3. 0 0
      cyclical-features/data/building_11_energy_data.csv
  4. 0 0
      cyclical-features/data/building_12_energy_data.csv
  5. 0 0
      cyclical-features/data/building_13_energy_data.csv
  6. 0 0
      cyclical-features/data/building_14_energy_data.csv
  7. 0 0
      cyclical-features/data/building_15_energy_data.csv
  8. 0 0
      cyclical-features/data/building_16_energy_data.csv
  9. 0 0
      cyclical-features/data/building_17_energy_data.csv
  10. 0 0
      cyclical-features/data/building_18_energy_data.csv
  11. 0 0
      cyclical-features/data/building_19_energy_data.csv
  12. 0 0
      cyclical-features/data/building_1_energy_data.csv
  13. 0 0
      cyclical-features/data/building_20_energy_data.csv
  14. 0 0
      cyclical-features/data/building_21_energy_data.csv
  15. 0 0
      cyclical-features/data/building_22_energy_data.csv
  16. 0 0
      cyclical-features/data/building_23_energy_data.csv
  17. 0 0
      cyclical-features/data/building_24_energy_data.csv
  18. 0 0
      cyclical-features/data/building_25_energy_data.csv
  19. 0 0
      cyclical-features/data/building_26_energy_data.csv
  20. 0 0
      cyclical-features/data/building_27_energy_data.csv
  21. 0 0
      cyclical-features/data/building_28_energy_data.csv
  22. 0 0
      cyclical-features/data/building_29_energy_data.csv
  23. 0 0
      cyclical-features/data/building_2_energy_data.csv
  24. 0 0
      cyclical-features/data/building_30_energy_data.csv
  25. 0 0
      cyclical-features/data/building_31_energy_data.csv
  26. 0 0
      cyclical-features/data/building_32_energy_data.csv
  27. 0 0
      cyclical-features/data/building_33_energy_data.csv
  28. 0 0
      cyclical-features/data/building_34_energy_data.csv
  29. 0 0
      cyclical-features/data/building_35_energy_data.csv
  30. 0 0
      cyclical-features/data/building_36_energy_data.csv
  31. 0 0
      cyclical-features/data/building_37_energy_data.csv
  32. 0 0
      cyclical-features/data/building_38_energy_data.csv
  33. 0 0
      cyclical-features/data/building_39_energy_data.csv
  34. 0 0
      cyclical-features/data/building_3_energy_data.csv
  35. 0 0
      cyclical-features/data/building_40_energy_data.csv
  36. 0 0
      cyclical-features/data/building_4_energy_data.csv
  37. 0 0
      cyclical-features/data/building_5_energy_data.csv
  38. 0 0
      cyclical-features/data/building_6_energy_data.csv
  39. 0 0
      cyclical-features/data/building_7_energy_data.csv
  40. 0 0
      cyclical-features/data/building_8_energy_data.csv
  41. 0 0
      cyclical-features/data/building_9_energy_data.csv

+ 368 - 0
cyclical-features/Testing Cyclical Encoding.ipynb

@@ -0,0 +1,368 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Introduction: Testing Cyclical Encoding of Features for Machine Learning"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "\n",
+    "import glob"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "40"
+      ]
+     },
+     "execution_count": 8,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "building_data_files = glob.glob('data/building*')\n",
+    "len(building_data_files)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 10,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>energy</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:00:00</th>\n",
+       "      <td>56.240300</td>\n",
+       "      <td>1.682686</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:15:00</th>\n",
+       "      <td>56.087501</td>\n",
+       "      <td>2.086212</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:30:00</th>\n",
+       "      <td>56.213232</td>\n",
+       "      <td>1.687880</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:45:00</th>\n",
+       "      <td>56.400049</td>\n",
+       "      <td>1.926518</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 05:00:00</th>\n",
+       "      <td>56.592497</td>\n",
+       "      <td>1.922459</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     temperature    energy\n",
+       "timestamp                                 \n",
+       "2016-09-18 04:00:00    56.240300  1.682686\n",
+       "2016-09-18 04:15:00    56.087501  2.086212\n",
+       "2016-09-18 04:30:00    56.213232  1.687880\n",
+       "2016-09-18 04:45:00    56.400049  1.926518\n",
+       "2016-09-18 05:00:00    56.592497  1.922459"
+      ]
+     },
+     "execution_count": 10,
+     "metadata": {},
+     "output_type": "execute_result"
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "<class 'pandas.core.frame.DataFrame'>\n",
+      "DatetimeIndex: 36960 entries, 2016-09-18 04:00:00 to 2017-10-08 03:45:00\n",
+      "Data columns (total 2 columns):\n",
+      "temperature    36960 non-null float64\n",
+      "energy         36960 non-null float64\n",
+      "dtypes: float64(2)\n",
+      "memory usage: 866.2 KB\n"
+     ]
+    }
+   ],
+   "source": [
+    "data = pd.read_csv(building_data_files[10], parse_dates=['timestamp'], index_col=0).set_index('timestamp')\n",
+    "data.head()\n",
+    "data.info()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from sklearn.base import BaseEstimator, TransformerMixin\n",
+    "\n",
+    "\n",
+    "class DateTimeFeatures(BaseEstimator, TransformerMixin):\n",
+    "    def __init__(self):\n",
+    "        pass\n",
+    "\n",
+    "    def fit(self, X, y=None):\n",
+    "        return self\n",
+    "\n",
+    "    def transform(self, X, y=None):\n",
+    "        field = X.index\n",
+    "        X[\"time_of_day\"] = field.hour + field.minute / 60\n",
+    "        X[\"day_of_year\"] = field.dayofyear\n",
+    "        return X\n",
+    "\n",
+    "\n",
+    "class CyclicalDateTimeFeatures(BaseEstimator, TransformerMixin):\n",
+    "    def __init__(self):\n",
+    "        pass\n",
+    "\n",
+    "    def fit(self, X, y=None):\n",
+    "        return self\n",
+    "\n",
+    "    def transform(self, X, y=None):\n",
+    "        X[\"sin_time_of_day\"], X[\"cos_time_of_day\"] = _cyclical_encoding(\n",
+    "            X[\"time_of_day\"], period=24\n",
+    "        )\n",
+    "        X[\"sin_day_of_year\"], X[\"cos_day_of_year\"] = _cylical_encoding(\n",
+    "            X[\"day_of_year\"], period=366\n",
+    "        )\n",
+    "        return X\n",
+    "\n",
+    "\n",
+    "def _cyclical_encoding(series, period):\n",
+    "    base = 2 * np.pi * series / period\n",
+    "    return np.sin(base), np.cos(base)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 16,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>temperature</th>\n",
+       "      <th>energy</th>\n",
+       "      <th>time_of_day</th>\n",
+       "      <th>day_of_year</th>\n",
+       "      <th>sin_time_of_day</th>\n",
+       "      <th>cos_time_of_day</th>\n",
+       "      <th>sin_day_of_year</th>\n",
+       "      <th>cos_day_of_year</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>timestamp</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:00:00</th>\n",
+       "      <td>56.240300</td>\n",
+       "      <td>1.682686</td>\n",
+       "      <td>4.00</td>\n",
+       "      <td>262</td>\n",
+       "      <td>0.866025</td>\n",
+       "      <td>0.500000</td>\n",
+       "      <td>-0.977064</td>\n",
+       "      <td>-0.212947</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:15:00</th>\n",
+       "      <td>56.087501</td>\n",
+       "      <td>2.086212</td>\n",
+       "      <td>4.25</td>\n",
+       "      <td>262</td>\n",
+       "      <td>0.896873</td>\n",
+       "      <td>0.442289</td>\n",
+       "      <td>-0.977064</td>\n",
+       "      <td>-0.212947</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:30:00</th>\n",
+       "      <td>56.213232</td>\n",
+       "      <td>1.687880</td>\n",
+       "      <td>4.50</td>\n",
+       "      <td>262</td>\n",
+       "      <td>0.923880</td>\n",
+       "      <td>0.382683</td>\n",
+       "      <td>-0.977064</td>\n",
+       "      <td>-0.212947</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 04:45:00</th>\n",
+       "      <td>56.400049</td>\n",
+       "      <td>1.926518</td>\n",
+       "      <td>4.75</td>\n",
+       "      <td>262</td>\n",
+       "      <td>0.946930</td>\n",
+       "      <td>0.321439</td>\n",
+       "      <td>-0.977064</td>\n",
+       "      <td>-0.212947</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>2016-09-18 05:00:00</th>\n",
+       "      <td>56.592497</td>\n",
+       "      <td>1.922459</td>\n",
+       "      <td>5.00</td>\n",
+       "      <td>262</td>\n",
+       "      <td>0.965926</td>\n",
+       "      <td>0.258819</td>\n",
+       "      <td>-0.977064</td>\n",
+       "      <td>-0.212947</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "                     temperature    energy  time_of_day  day_of_year  \\\n",
+       "timestamp                                                              \n",
+       "2016-09-18 04:00:00    56.240300  1.682686         4.00          262   \n",
+       "2016-09-18 04:15:00    56.087501  2.086212         4.25          262   \n",
+       "2016-09-18 04:30:00    56.213232  1.687880         4.50          262   \n",
+       "2016-09-18 04:45:00    56.400049  1.926518         4.75          262   \n",
+       "2016-09-18 05:00:00    56.592497  1.922459         5.00          262   \n",
+       "\n",
+       "                     sin_time_of_day  cos_time_of_day  sin_day_of_year  \\\n",
+       "timestamp                                                                \n",
+       "2016-09-18 04:00:00         0.866025         0.500000        -0.977064   \n",
+       "2016-09-18 04:15:00         0.896873         0.442289        -0.977064   \n",
+       "2016-09-18 04:30:00         0.923880         0.382683        -0.977064   \n",
+       "2016-09-18 04:45:00         0.946930         0.321439        -0.977064   \n",
+       "2016-09-18 05:00:00         0.965926         0.258819        -0.977064   \n",
+       "\n",
+       "                     cos_day_of_year  \n",
+       "timestamp                             \n",
+       "2016-09-18 04:00:00        -0.212947  \n",
+       "2016-09-18 04:15:00        -0.212947  \n",
+       "2016-09-18 04:30:00        -0.212947  \n",
+       "2016-09-18 04:45:00        -0.212947  \n",
+       "2016-09-18 05:00:00        -0.212947  "
+      ]
+     },
+     "execution_count": 16,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from sklearn.pipeline import Pipeline\n",
+    "\n",
+    "transforms = Pipeline(\n",
+    "    steps=[\n",
+    "        (\"date_time_features\", DateTimeFeatures()),\n",
+    "        (\"cylical_date_time_features\", CyclicalDateTimeFeatures()),\n",
+    "    ]\n",
+    ")\n",
+    "\n",
+    "transformed_data = transforms.transform(data)\n",
+    "transformed_data.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

cyclical-features/data/building_10.csv → cyclical-features/data/building_10_energy_data.csv


cyclical-features/data/building_11.csv → cyclical-features/data/building_11_energy_data.csv


cyclical-features/data/building_12.csv → cyclical-features/data/building_12_energy_data.csv


cyclical-features/data/building_13.csv → cyclical-features/data/building_13_energy_data.csv


cyclical-features/data/building_14.csv → cyclical-features/data/building_14_energy_data.csv


cyclical-features/data/building_15.csv → cyclical-features/data/building_15_energy_data.csv


cyclical-features/data/building_16.csv → cyclical-features/data/building_16_energy_data.csv


cyclical-features/data/building_17.csv → cyclical-features/data/building_17_energy_data.csv


cyclical-features/data/building_18.csv → cyclical-features/data/building_18_energy_data.csv


cyclical-features/data/building_19.csv → cyclical-features/data/building_19_energy_data.csv


cyclical-features/data/building_1.csv → cyclical-features/data/building_1_energy_data.csv


cyclical-features/data/building_20.csv → cyclical-features/data/building_20_energy_data.csv


cyclical-features/data/building_21.csv → cyclical-features/data/building_21_energy_data.csv


cyclical-features/data/building_22.csv → cyclical-features/data/building_22_energy_data.csv


cyclical-features/data/building_23.csv → cyclical-features/data/building_23_energy_data.csv


cyclical-features/data/building_24.csv → cyclical-features/data/building_24_energy_data.csv


cyclical-features/data/building_25.csv → cyclical-features/data/building_25_energy_data.csv


cyclical-features/data/building_26.csv → cyclical-features/data/building_26_energy_data.csv


cyclical-features/data/building_27.csv → cyclical-features/data/building_27_energy_data.csv


cyclical-features/data/building_28.csv → cyclical-features/data/building_28_energy_data.csv


cyclical-features/data/building_29.csv → cyclical-features/data/building_29_energy_data.csv


cyclical-features/data/building_2.csv → cyclical-features/data/building_2_energy_data.csv


cyclical-features/data/building_30.csv → cyclical-features/data/building_30_energy_data.csv


cyclical-features/data/building_31.csv → cyclical-features/data/building_31_energy_data.csv


cyclical-features/data/building_32.csv → cyclical-features/data/building_32_energy_data.csv


cyclical-features/data/building_33.csv → cyclical-features/data/building_33_energy_data.csv


cyclical-features/data/building_34.csv → cyclical-features/data/building_34_energy_data.csv


cyclical-features/data/building_35.csv → cyclical-features/data/building_35_energy_data.csv


cyclical-features/data/building_36.csv → cyclical-features/data/building_36_energy_data.csv


cyclical-features/data/building_37.csv → cyclical-features/data/building_37_energy_data.csv


cyclical-features/data/building_38.csv → cyclical-features/data/building_38_energy_data.csv


cyclical-features/data/building_39.csv → cyclical-features/data/building_39_energy_data.csv


cyclical-features/data/building_3.csv → cyclical-features/data/building_3_energy_data.csv


cyclical-features/data/building_40.csv → cyclical-features/data/building_40_energy_data.csv


cyclical-features/data/building_4.csv → cyclical-features/data/building_4_energy_data.csv


cyclical-features/data/building_5.csv → cyclical-features/data/building_5_energy_data.csv


cyclical-features/data/building_6.csv → cyclical-features/data/building_6_energy_data.csv


cyclical-features/data/building_7.csv → cyclical-features/data/building_7_energy_data.csv


cyclical-features/data/building_8.csv → cyclical-features/data/building_8_energy_data.csv


cyclical-features/data/building_9.csv → cyclical-features/data/building_9_energy_data.csv