|
@@ -0,0 +1,368 @@
|
|
|
+{
|
|
|
+ "cells": [
|
|
|
+ {
|
|
|
+ "cell_type": "markdown",
|
|
|
+ "metadata": {},
|
|
|
+ "source": [
|
|
|
+ "# Introduction: Testing Cyclical Encoding of Features for Machine Learning"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 7,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "import pandas as pd\n",
|
|
|
+ "import numpy as np\n",
|
|
|
+ "\n",
|
|
|
+ "import glob"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/plain": [
|
|
|
+ "40"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 8,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "building_data_files = glob.glob('data/building*')\n",
|
|
|
+ "len(building_data_files)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>temperature</th>\n",
|
|
|
+ " <th>energy</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>timestamp</th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:00:00</th>\n",
|
|
|
+ " <td>56.240300</td>\n",
|
|
|
+ " <td>1.682686</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:15:00</th>\n",
|
|
|
+ " <td>56.087501</td>\n",
|
|
|
+ " <td>2.086212</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:30:00</th>\n",
|
|
|
+ " <td>56.213232</td>\n",
|
|
|
+ " <td>1.687880</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:45:00</th>\n",
|
|
|
+ " <td>56.400049</td>\n",
|
|
|
+ " <td>1.926518</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 05:00:00</th>\n",
|
|
|
+ " <td>56.592497</td>\n",
|
|
|
+ " <td>1.922459</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " temperature energy\n",
|
|
|
+ "timestamp \n",
|
|
|
+ "2016-09-18 04:00:00 56.240300 1.682686\n",
|
|
|
+ "2016-09-18 04:15:00 56.087501 2.086212\n",
|
|
|
+ "2016-09-18 04:30:00 56.213232 1.687880\n",
|
|
|
+ "2016-09-18 04:45:00 56.400049 1.926518\n",
|
|
|
+ "2016-09-18 05:00:00 56.592497 1.922459"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 10,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "name": "stdout",
|
|
|
+ "output_type": "stream",
|
|
|
+ "text": [
|
|
|
+ "<class 'pandas.core.frame.DataFrame'>\n",
|
|
|
+ "DatetimeIndex: 36960 entries, 2016-09-18 04:00:00 to 2017-10-08 03:45:00\n",
|
|
|
+ "Data columns (total 2 columns):\n",
|
|
|
+ "temperature 36960 non-null float64\n",
|
|
|
+ "energy 36960 non-null float64\n",
|
|
|
+ "dtypes: float64(2)\n",
|
|
|
+ "memory usage: 866.2 KB\n"
|
|
|
+ ]
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "data = pd.read_csv(building_data_files[10], parse_dates=['timestamp'], index_col=0).set_index('timestamp')\n",
|
|
|
+ "data.head()\n",
|
|
|
+ "data.info()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 15,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": [
|
|
|
+ "from sklearn.base import BaseEstimator, TransformerMixin\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "class DateTimeFeatures(BaseEstimator, TransformerMixin):\n",
|
|
|
+ " def __init__(self):\n",
|
|
|
+ " pass\n",
|
|
|
+ "\n",
|
|
|
+ " def fit(self, X, y=None):\n",
|
|
|
+ " return self\n",
|
|
|
+ "\n",
|
|
|
+ " def transform(self, X, y=None):\n",
|
|
|
+ " field = X.index\n",
|
|
|
+ " X[\"time_of_day\"] = field.hour + field.minute / 60\n",
|
|
|
+ " X[\"day_of_year\"] = field.dayofyear\n",
|
|
|
+ " return X\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "class CyclicalDateTimeFeatures(BaseEstimator, TransformerMixin):\n",
|
|
|
+ " def __init__(self):\n",
|
|
|
+ " pass\n",
|
|
|
+ "\n",
|
|
|
+ " def fit(self, X, y=None):\n",
|
|
|
+ " return self\n",
|
|
|
+ "\n",
|
|
|
+ " def transform(self, X, y=None):\n",
|
|
|
+ " X[\"sin_time_of_day\"], X[\"cos_time_of_day\"] = _cyclical_encoding(\n",
|
|
|
+ " X[\"time_of_day\"], period=24\n",
|
|
|
+ " )\n",
|
|
|
+ " X[\"sin_day_of_year\"], X[\"cos_day_of_year\"] = _cylical_encoding(\n",
|
|
|
+ " X[\"day_of_year\"], period=366\n",
|
|
|
+ " )\n",
|
|
|
+ " return X\n",
|
|
|
+ "\n",
|
|
|
+ "\n",
|
|
|
+ "def _cyclical_encoding(series, period):\n",
|
|
|
+ " base = 2 * np.pi * series / period\n",
|
|
|
+ " return np.sin(base), np.cos(base)"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [
|
|
|
+ {
|
|
|
+ "data": {
|
|
|
+ "text/html": [
|
|
|
+ "<div>\n",
|
|
|
+ "<style scoped>\n",
|
|
|
+ " .dataframe tbody tr th:only-of-type {\n",
|
|
|
+ " vertical-align: middle;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe tbody tr th {\n",
|
|
|
+ " vertical-align: top;\n",
|
|
|
+ " }\n",
|
|
|
+ "\n",
|
|
|
+ " .dataframe thead th {\n",
|
|
|
+ " text-align: right;\n",
|
|
|
+ " }\n",
|
|
|
+ "</style>\n",
|
|
|
+ "<table border=\"1\" class=\"dataframe\">\n",
|
|
|
+ " <thead>\n",
|
|
|
+ " <tr style=\"text-align: right;\">\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th>temperature</th>\n",
|
|
|
+ " <th>energy</th>\n",
|
|
|
+ " <th>time_of_day</th>\n",
|
|
|
+ " <th>day_of_year</th>\n",
|
|
|
+ " <th>sin_time_of_day</th>\n",
|
|
|
+ " <th>cos_time_of_day</th>\n",
|
|
|
+ " <th>sin_day_of_year</th>\n",
|
|
|
+ " <th>cos_day_of_year</th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>timestamp</th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " <th></th>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </thead>\n",
|
|
|
+ " <tbody>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:00:00</th>\n",
|
|
|
+ " <td>56.240300</td>\n",
|
|
|
+ " <td>1.682686</td>\n",
|
|
|
+ " <td>4.00</td>\n",
|
|
|
+ " <td>262</td>\n",
|
|
|
+ " <td>0.866025</td>\n",
|
|
|
+ " <td>0.500000</td>\n",
|
|
|
+ " <td>-0.977064</td>\n",
|
|
|
+ " <td>-0.212947</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:15:00</th>\n",
|
|
|
+ " <td>56.087501</td>\n",
|
|
|
+ " <td>2.086212</td>\n",
|
|
|
+ " <td>4.25</td>\n",
|
|
|
+ " <td>262</td>\n",
|
|
|
+ " <td>0.896873</td>\n",
|
|
|
+ " <td>0.442289</td>\n",
|
|
|
+ " <td>-0.977064</td>\n",
|
|
|
+ " <td>-0.212947</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:30:00</th>\n",
|
|
|
+ " <td>56.213232</td>\n",
|
|
|
+ " <td>1.687880</td>\n",
|
|
|
+ " <td>4.50</td>\n",
|
|
|
+ " <td>262</td>\n",
|
|
|
+ " <td>0.923880</td>\n",
|
|
|
+ " <td>0.382683</td>\n",
|
|
|
+ " <td>-0.977064</td>\n",
|
|
|
+ " <td>-0.212947</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 04:45:00</th>\n",
|
|
|
+ " <td>56.400049</td>\n",
|
|
|
+ " <td>1.926518</td>\n",
|
|
|
+ " <td>4.75</td>\n",
|
|
|
+ " <td>262</td>\n",
|
|
|
+ " <td>0.946930</td>\n",
|
|
|
+ " <td>0.321439</td>\n",
|
|
|
+ " <td>-0.977064</td>\n",
|
|
|
+ " <td>-0.212947</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " <tr>\n",
|
|
|
+ " <th>2016-09-18 05:00:00</th>\n",
|
|
|
+ " <td>56.592497</td>\n",
|
|
|
+ " <td>1.922459</td>\n",
|
|
|
+ " <td>5.00</td>\n",
|
|
|
+ " <td>262</td>\n",
|
|
|
+ " <td>0.965926</td>\n",
|
|
|
+ " <td>0.258819</td>\n",
|
|
|
+ " <td>-0.977064</td>\n",
|
|
|
+ " <td>-0.212947</td>\n",
|
|
|
+ " </tr>\n",
|
|
|
+ " </tbody>\n",
|
|
|
+ "</table>\n",
|
|
|
+ "</div>"
|
|
|
+ ],
|
|
|
+ "text/plain": [
|
|
|
+ " temperature energy time_of_day day_of_year \\\n",
|
|
|
+ "timestamp \n",
|
|
|
+ "2016-09-18 04:00:00 56.240300 1.682686 4.00 262 \n",
|
|
|
+ "2016-09-18 04:15:00 56.087501 2.086212 4.25 262 \n",
|
|
|
+ "2016-09-18 04:30:00 56.213232 1.687880 4.50 262 \n",
|
|
|
+ "2016-09-18 04:45:00 56.400049 1.926518 4.75 262 \n",
|
|
|
+ "2016-09-18 05:00:00 56.592497 1.922459 5.00 262 \n",
|
|
|
+ "\n",
|
|
|
+ " sin_time_of_day cos_time_of_day sin_day_of_year \\\n",
|
|
|
+ "timestamp \n",
|
|
|
+ "2016-09-18 04:00:00 0.866025 0.500000 -0.977064 \n",
|
|
|
+ "2016-09-18 04:15:00 0.896873 0.442289 -0.977064 \n",
|
|
|
+ "2016-09-18 04:30:00 0.923880 0.382683 -0.977064 \n",
|
|
|
+ "2016-09-18 04:45:00 0.946930 0.321439 -0.977064 \n",
|
|
|
+ "2016-09-18 05:00:00 0.965926 0.258819 -0.977064 \n",
|
|
|
+ "\n",
|
|
|
+ " cos_day_of_year \n",
|
|
|
+ "timestamp \n",
|
|
|
+ "2016-09-18 04:00:00 -0.212947 \n",
|
|
|
+ "2016-09-18 04:15:00 -0.212947 \n",
|
|
|
+ "2016-09-18 04:30:00 -0.212947 \n",
|
|
|
+ "2016-09-18 04:45:00 -0.212947 \n",
|
|
|
+ "2016-09-18 05:00:00 -0.212947 "
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ "execution_count": 16,
|
|
|
+ "metadata": {},
|
|
|
+ "output_type": "execute_result"
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "source": [
|
|
|
+ "from sklearn.pipeline import Pipeline\n",
|
|
|
+ "\n",
|
|
|
+ "transforms = Pipeline(\n",
|
|
|
+ " steps=[\n",
|
|
|
+ " (\"date_time_features\", DateTimeFeatures()),\n",
|
|
|
+ " (\"cylical_date_time_features\", CyclicalDateTimeFeatures()),\n",
|
|
|
+ " ]\n",
|
|
|
+ ")\n",
|
|
|
+ "\n",
|
|
|
+ "transformed_data = transforms.transform(data)\n",
|
|
|
+ "transformed_data.head()"
|
|
|
+ ]
|
|
|
+ },
|
|
|
+ {
|
|
|
+ "cell_type": "code",
|
|
|
+ "execution_count": null,
|
|
|
+ "metadata": {},
|
|
|
+ "outputs": [],
|
|
|
+ "source": []
|
|
|
+ }
|
|
|
+ ],
|
|
|
+ "metadata": {
|
|
|
+ "kernelspec": {
|
|
|
+ "display_name": "Python 3",
|
|
|
+ "language": "python",
|
|
|
+ "name": "python3"
|
|
|
+ },
|
|
|
+ "language_info": {
|
|
|
+ "codemirror_mode": {
|
|
|
+ "name": "ipython",
|
|
|
+ "version": 3
|
|
|
+ },
|
|
|
+ "file_extension": ".py",
|
|
|
+ "mimetype": "text/x-python",
|
|
|
+ "name": "python",
|
|
|
+ "nbconvert_exporter": "python",
|
|
|
+ "pygments_lexer": "ipython3",
|
|
|
+ "version": "3.7.3"
|
|
|
+ }
|
|
|
+ },
|
|
|
+ "nbformat": 4,
|
|
|
+ "nbformat_minor": 2
|
|
|
+}
|