| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394 | 
							- import cudf
 
- import os
 
- import pandas as pd
 
- import urllib
 
- import tqdm
 
- from zipfile import ZipFile 
 
- pbar = None
 
- def show_progress(block_num, block_size, total_size):
 
-     global pbar
 
-     if pbar is None:
 
-         pbar = tqdm.tqdm(total=total_size / 1024, unit='kB')
 
-     downloaded = block_num * block_size
 
-     if downloaded < total_size:
 
-         pbar.update(block_size / 1024)
 
-     else:
 
-         pbar.close()
 
-         pbar = None
 
-         
 
- def fetch_bike_dataset(years, data_dir="data"):
 
-     """ Dowload bike dataset for a given year and return the list of files.
 
-     """
 
-     base_url = "https://s3.amazonaws.com/capitalbikeshare-data/"
 
-     files = []
 
-     for year in years:
 
-         filename = str(year) + "-capitalbikeshare-tripdata.zip"
 
-         filepath = os.path.join(data_dir, filename)
 
-         if not os.path.isfile(filepath):
 
-             urllib.request.urlretrieve(base_url+filename, filepath, reporthook=show_progress)
 
-         with ZipFile(filepath) as myzip:
 
-             files += [os.path.join(data_dir, name) for name in myzip.namelist()]
 
-             myzip.extractall(data_dir)
 
-     
 
-     print("Files extracted: "+ str(files))
 
-     return files
 
- def fetch_weather_dataset(data_dir='data'):
 
-     base_url = 'https://archive.ics.uci.edu/ml/machine-learning-databases/00275/'
 
-     fn = 'Bike-Sharing-Dataset.zip'
 
-     
 
-     if not os.path.isdir(data_dir):
 
-         os.makedirs(data_dir)
 
-     filepath = os.path.join(data_dir, fn)    
 
-     
 
-     if not os.path.isfile(filepath):
 
-         print(f'Downloading {base_url+fn} to {filepath}')
 
-         urllib.request.urlretrieve(base_url+fn, filepath)
 
-     
 
-     files = []
 
-     with ZipFile(filepath) as myzip:
 
-         files = [os.path.join(data_dir, name) for name in myzip.namelist()]
 
-         myzip.extractall(data_dir)
 
-     
 
-     # Extract weather features from the dataset
 
-     # Note this weather dataset is already preprocessed.
 
-     # We reverse the steps to provide a more interesting exercise.
 
-     weather = cudf.read_csv(files[2], parse_dates=[1])
 
-     out = cudf.DataFrame();
 
-     out['Hour'] = weather['dteday'] + cudf.Series(pd.to_timedelta(weather['hr'].to_pandas(), unit='h'))
 
-     out['Temperature'] = weather['temp'] * 47.0 -8
 
-     out['Relative Temperature'] = weather['atemp'] * 66.0 - 16
 
-     out['Rel. humidity'] = (weather['hum'] * 100).astype('int')
 
-     out['Wind'] = weather['windspeed'] * 67
 
-     
 
-     # Spell out weather categories
 
-     # - 1: Clear, Few clouds, Partly cloudy, Partly cloudy
 
-     #- 2: Mist + Cloudy, Mist + Broken clouds, Mist + Few clouds, Mist
 
-     #- 3: Light Snow, Light Rain + Thunderstorm + Scattered clouds, Light Rain + Scattered clouds
 
-     # - 4: Heavy Rain + Ice Pallets + Thunderstorm + Mist, Snow + Fog 
 
-     out['Weather'] = 'Clear or Partly cloudy'
 
-     out['Weather'][weather['weathersit']==2] = 'Mist or Cloudy'
 
-     out['Weather'][weather['weathersit']==3] = 'Light Rain or Snow, Thunderstorm'
 
-     out['Weather'][weather['weathersit']==4] = 'Heavy Rain, Snow + Fog, Ice'
 
-     
 
-     filepath = os.path.join(data_dir, 'weather2011-2012.csv')
 
-     out.to_csv(filepath, index=False)
 
-     print("Weather file saved at ", filepath)
 
-     return filepath
 
- def read_bike_data_pandas(files):
 
-     # Reads a list of files and concatenates them
 
-     tables = []
 
-     for filename in files:
 
-         tmp_df = pd.read_csv(filename, usecols=[1], parse_dates=['Start date'])
 
-         tables.append(tmp_df)
 
-     merged_df = pd.concat(tables, ignore_index=True)
 
 
  |