|
@@ -6,7 +6,7 @@
|
|
"metadata": {},
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
- "# Lab 04: Linear and logistic regressions"
|
|
|
|
|
|
+ "# Lab 03: Linear and logistic regressions"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
{
|
|
{
|
|
@@ -21,9 +21,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"import pandas as pd\n",
|
|
"import pandas as pd\n",
|
|
@@ -46,9 +44,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"# load the regression task data\n",
|
|
"# load the regression task data\n",
|
|
@@ -59,9 +55,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"# Load the data into X and y data arrays\n",
|
|
"# Load the data into X and y data arrays\n",
|
|
@@ -87,22 +81,18 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"# set up folds for cross_validation\n",
|
|
"# set up folds for cross_validation\n",
|
|
"from sklearn import model_selection\n",
|
|
"from sklearn import model_selection\n",
|
|
- "folds_regr = model_selection.KFold(y_regr.size, n_folds=10, shuffle=True)"
|
|
|
|
|
|
+ "folds_regr = model_selection.KFold(n_splits=10, shuffle=True)"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"def cross_validate_regr(design_matrix, labels, regressor, cv_folds):\n",
|
|
"def cross_validate_regr(design_matrix, labels, regressor, cv_folds):\n",
|
|
@@ -144,9 +134,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"from sklearn import linear_model\n",
|
|
"from sklearn import linear_model\n",
|
|
@@ -207,9 +195,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"# TODO"
|
|
"# TODO"
|
|
@@ -227,22 +213,18 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"# Set up folds for cross_validation\n",
|
|
"# Set up folds for cross_validation\n",
|
|
"from sklearn import model_selection\n",
|
|
"from sklearn import model_selection\n",
|
|
- "folds_clf = model_selection.StratifiedKFold(y_clf, n_folds=10, shuffle=True)"
|
|
|
|
|
|
+ "folds_clf = model_selection.StratifiedKFold(n_splits=10, shuffle=True)"
|
|
]
|
|
]
|
|
},
|
|
},
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"def cross_validate_clf(design_matrix, labels, classifier, cv_folds):\n",
|
|
"def cross_validate_clf(design_matrix, labels, classifier, cv_folds):\n",
|
|
@@ -285,9 +267,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"from sklearn import linear_model\n",
|
|
"from sklearn import linear_model\n",
|
|
@@ -312,9 +292,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"fpr_logreg, tpr_logreg, thresholds = # TODO\n",
|
|
"fpr_logreg, tpr_logreg, thresholds = # TODO\n",
|
|
@@ -347,9 +325,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"from sklearn import preprocessing\n",
|
|
"from sklearn import preprocessing\n",
|
|
@@ -380,9 +356,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"fpr_logreg_scaled, tpr_logreg_scaled, thresholds = # TODO\n",
|
|
"fpr_logreg_scaled, tpr_logreg_scaled, thresholds = # TODO\n",
|
|
@@ -414,9 +388,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"def cross_validate_clf_with_scaling(design_matrix, labels, classifier, cv_folds):\n",
|
|
"def cross_validate_clf_with_scaling(design_matrix, labels, classifier, cv_folds):\n",
|
|
@@ -456,9 +428,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"clf = linear_model.LogisticRegression(C=1e6) \n",
|
|
"clf = linear_model.LogisticRegression(C=1e6) \n",
|
|
@@ -476,9 +446,7 @@
|
|
{
|
|
{
|
|
"cell_type": "code",
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"execution_count": null,
|
|
- "metadata": {
|
|
|
|
- "collapsed": true
|
|
|
|
- },
|
|
|
|
|
|
+ "metadata": {},
|
|
"outputs": [],
|
|
"outputs": [],
|
|
"source": [
|
|
"source": [
|
|
"fpr_logreg_scaled_, tpr_logreg_scaled_, thresholds = # TODO\n",
|
|
"fpr_logreg_scaled_, tpr_logreg_scaled_, thresholds = # TODO\n",
|