Browse Source

Changed evaluate method to use validation set

Will Koehrsen 7 years ago
parent
commit
a3afbca554

File diff suppressed because it is too large
+ 192 - 177
stocker/Stocker Prediction Usage.ipynb


BIN
stocker/__pycache__/stocker.cpython-36.pyc


BIN
stocker/images/amazn_short_forecast.png


BIN
stocker/images/amzn_changepoints_graphdefault.png


BIN
stocker/images/amzn_default_evaluation.png


BIN
stocker/images/amzn_long_forecast.png


BIN
stocker/images/amzn_playing_negative.png


BIN
stocker/images/amzn_playingresults.png


BIN
stocker/images/amzn_refined_evaluation.png


BIN
stocker/images/amzn_training_testingcurves.png


BIN
stocker/images/amzn_training_testingrefined.png


BIN
stocker/images/amzn_uncertainty.png


+ 38 - 7
stocker/stocker.py

@@ -810,18 +810,47 @@ class Stocker():
         plt.xlabel('Date'); plt.title('Predictions for %s' % self.symbol);
         plt.show()
         
-    def changepoint_prior_validation(self, changepoint_priors = [0.001, 0.05, 0.1, 0.2]):
+    def changepoint_prior_validation(self, start_date=None, end_date=None,changepoint_priors = [0.001, 0.05, 0.1, 0.2]):
+
+
+        # Default start date is two years before end of data
+        # Default end date is one year before end of data
+        if start_date is None:
+            start_date = self.max_date - pd.DateOffset(years=2)
+        if end_date is None:
+            end_date = self.max_date - pd.DateOffset(years=1)
+            
+        # Convert to pandas datetime for indexing dataframe
+        start_date = pd.to_datetime(start_date)
+        end_date = pd.to_datetime(end_date)
+        
+        if end_date.date() < start_date.date():
+            print('End Date must be later than start date.')
+            return
+        
+        # Check to make sure dates are in the data
+        if (start_date not in list(self.stock['Date'])):
+            print('Start Date not in data (either out of range or not a trading day.)')
+            return
+        elif (end_date not in list(self.stock['Date'])):
+            print('End Date not in data (either out of range or not a trading day.)')
+            return
                                
         # Select self.training_years number of years
-        train = self.stock[(self.stock['Date'] < (max(self.stock['Date']) - pd.DateOffset(years=1)).date()) & 
-                           (self.stock['Date'] > (max(self.stock['Date']) - pd.DateOffset(years=self.training_years + 1)).date())]
+        train = self.stock[(self.stock['Date'] > (start_date - pd.DateOffset(years=self.training_years)).date()) & 
+        (self.stock['Date'] < start_date.date())]
         
-        # Testing data from past year used for answers
-        test = self.stock[(self.stock['Date'] >= (max(self.stock['Date']) - pd.DateOffset(years=1)).date())]
+        # Testing data is specified by range
+        test = self.stock[(self.stock['Date'] >= start_date.date()) & (self.stock['Date'] <= end_date.date())]
+
         eval_days = (max(test['Date']).date() - min(test['Date']).date()).days
         
         results = pd.DataFrame(0, index = list(range(len(changepoint_priors))), 
-                               columns = ['cps', 'train_err', 'train_range', 'test_err', 'test_range'])
+            columns = ['cps', 'train_err', 'train_range', 'test_err', 'test_range'])
+
+        print('\nValidation Range {} to {}.\n'.format(min(test['Date']).date(),
+            max(test['Date']).date()))
+            
         
         # Iterate through all the changepoints and make models
         for i, prior in enumerate(changepoint_priors):
@@ -852,8 +881,10 @@ class Stocker():
             
             results.ix[i, 'test_err'] = avg_test_error
             results.ix[i, 'test_range'] = avg_test_uncertainty
-            
+
         print(results)
+
+
         
         # Plot of training and testing average errors
         self.reset_plot()