{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-21T21:34:45.977558Z",
"start_time": "2019-02-21T21:34:45.936495Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"The autoreload extension is already loaded. To reload it, use:\n",
" %reload_ext autoreload\n"
]
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
""
],
"text/vnd.plotly.v1+html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import pandas as pd\n",
"import numpy as np\n",
"\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"import sys\n",
"sys.path.append('../..')\n",
"\n",
"# Options for pandas\n",
"pd.options.display.max_columns = 20\n",
"pd.options.display.max_rows = 10\n",
"\n",
"# Display all cell outputs\n",
"from IPython.core.interactiveshell import InteractiveShell\n",
"InteractiveShell.ast_node_interactivity = 'all'\n",
"\n",
"import plotly.plotly as py\n",
"import plotly.graph_objs as go\n",
"from plotly.offline import iplot, init_notebook_mode\n",
"init_notebook_mode(connected=True)\n",
"\n",
"import cufflinks\n",
"cf.go_offline(connected=True)\n",
"cf.set_config_file(theme='pearl')\n"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-21T21:38:38.419483Z",
"start_time": "2019-02-21T21:38:38.378325Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 1 | \n",
" 5 | \n",
"
\n",
" \n",
" 1 | \n",
" 2 | \n",
" 6 | \n",
"
\n",
" \n",
" 2 | \n",
" 3 | \n",
" 7 | \n",
"
\n",
" \n",
" 3 | \n",
" 4 | \n",
" 7 | \n",
"
\n",
" \n",
" 4 | \n",
" 5 | \n",
" 8 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b\n",
"0 1 5\n",
"1 2 6\n",
"2 3 7\n",
"3 4 7\n",
"4 5 8"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 8 | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" 8 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" 9 | \n",
"
\n",
" \n",
" 3 | \n",
" 8 | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a c\n",
"0 5 8\n",
"1 6 8\n",
"2 7 9\n",
"3 8 1\n",
"4 8 2"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_left = pd.DataFrame(dict(a=[1, 2, 3, 4, 5], b = [5, 6, 7, 7, 8]))\n",
"df_right = pd.DataFrame(dict(a=[5, 6, 7, 8, 8], c = [8, 8, 9, 1, 2]))\n",
"\n",
"df_left\n",
"df_right"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-21T21:41:24.757480Z",
"start_time": "2019-02-21T21:41:24.717637Z"
}
},
"outputs": [
{
"ename": "MergeError",
"evalue": "Merge keys are not unique in right dataset; not a one-to-one merge",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mMergeError\u001b[0m Traceback (most recent call last)",
"\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 2\u001b[0m \u001b[0;31m# Validate a one to one merge\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0mvalidate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m'1:1'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 4\u001b[0;31m how='right')\n\u001b[0m",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/frame.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(self, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 5368\u001b[0m \u001b[0mright_on\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_on\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mleft_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mleft_index\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5369\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 5370\u001b[0;31m copy=copy, indicator=indicator, validate=validate)\n\u001b[0m\u001b[1;32m 5371\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 5372\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mround\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mdecimals\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m0\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36mmerge\u001b[0;34m(left, right, how, on, left_on, right_on, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 55\u001b[0m \u001b[0mright_index\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mright_index\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msort\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msort\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0msuffixes\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0msuffixes\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 56\u001b[0m \u001b[0mcopy\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mcopy\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mindicator\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mindicator\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 57\u001b[0;31m validate=validate)\n\u001b[0m\u001b[1;32m 58\u001b[0m \u001b[0;32mreturn\u001b[0m \u001b[0mop\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 59\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m__init__\u001b[0;34m(self, left, right, how, on, left_on, right_on, axis, left_index, right_index, sort, suffixes, copy, indicator, validate)\u001b[0m\n\u001b[1;32m 573\u001b[0m \u001b[0;31m# are in fact unique.\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 574\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mvalidate\u001b[0m \u001b[0;32mis\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 575\u001b[0;31m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_validate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mvalidate\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 576\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 577\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_result\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;32m/usr/local/lib/python3.6/site-packages/pandas/core/reshape/merge.py\u001b[0m in \u001b[0;36m_validate\u001b[0;34m(self, validate)\u001b[0m\n\u001b[1;32m 1002\u001b[0m \" not a one-to-one merge\")\n\u001b[1;32m 1003\u001b[0m \u001b[0;32melif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0mright_unique\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m-> 1004\u001b[0;31m raise MergeError(\"Merge keys are not unique in right dataset;\"\n\u001b[0m\u001b[1;32m 1005\u001b[0m \" not a one-to-one merge\")\n\u001b[1;32m 1006\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mMergeError\u001b[0m: Merge keys are not unique in right dataset; not a one-to-one merge"
]
}
],
"source": [
"df_left.merge(df_right, on ='a',\n",
" # Validate a one to one merge\n",
" validate = '1:1',\n",
" how='right')"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2019-02-21T21:41:15.282234Z",
"start_time": "2019-02-21T21:41:15.242064Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" a | \n",
" b | \n",
" c | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 5 | \n",
" 8.0 | \n",
" 8 | \n",
"
\n",
" \n",
" 1 | \n",
" 6 | \n",
" NaN | \n",
" 8 | \n",
"
\n",
" \n",
" 2 | \n",
" 7 | \n",
" NaN | \n",
" 9 | \n",
"
\n",
" \n",
" 3 | \n",
" 8 | \n",
" NaN | \n",
" 1 | \n",
"
\n",
" \n",
" 4 | \n",
" 8 | \n",
" NaN | \n",
" 2 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" a b c\n",
"0 5 8.0 8\n",
"1 6 NaN 8\n",
"2 7 NaN 9\n",
"3 8 NaN 1\n",
"4 8 NaN 2"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df_left.merge(df_right, on='a', \n",
" # Validate a one to many merge\n",
" validate='1:m',\n",
" how='right')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 2
}