# Copyright (c) 2012, NVIDIA CORPORATION. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
 # are met:
 #  * Redistributions of source code must retain the above copyright
 #    notice, this list of conditions and the following disclaimer.
 #  * Redistributions in binary form must reproduce the above copyright
 #    notice, this list of conditions and the following disclaimer in the
 #    documentation and/or other materials provided with the distribution.
 #  * Neither the name of NVIDIA CORPORATION nor the names of its
 #    contributors may be used to endorse or promote products derived
 #    from this software without specific prior written permission.
 #
 # THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AS IS'' AND ANY
 # EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
 # PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
 # CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
 # EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
 # PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
 # PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
 # OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import cv2
import tensorflow as tf
import numpy as np
def dummy():
    pass
def load_image(name,interpolation = cv2.INTER_AREA):
    img=cv2.imread(name,1)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    inter_area = cv2.resize(img,(256,256),interpolation=interpolation)
    start_pt= np.random.randint(24,size=2)
    end_pt = start_pt + [232,232]
    img = inter_area[start_pt[0]:end_pt[0],start_pt[1]:end_pt[1]]
    return img

def load_dataset(augment_fn = dummy):
    import os
    import cv2
    from datetime import datetime
    import numpy as np
    import pandas as pd
    from scipy import interpolate 
    import matplotlib.pyplot as plt

    #Variables to be used later
    filenames = []
    labels =[]
    i = 0  
    #Read CSV file using Pandas
    df = pd.read_csv('atlantic_storms.csv')

    dir ='Dataset/tcdat/'
    a = os.listdir(dir)

    file_path = "Dataset/Aug/"
    directory = os.path.dirname(file_path)

    try:
        os.stat(directory)
    except:
        os.mkdir(directory)   
    aug = 0 
    for j in a :
        c = os.listdir(dir+'/'+j)
        for k in c :
            d = os.listdir(dir+'/'+j+'/'+k)
            for l in d :
                print('.',end='')
                start_year= '20'+j[2:]+ '-01-01'
                end_year= '20'+j[2:]+ '-12-31'
                cyc_name = l[4:]
                mask = (df['date'] > start_year ) & (df['date'] <= end_year ) & ( df['name'] == cyc_name )
                cyc_pd = df.loc[mask]
                first = (datetime.strptime(cyc_pd['date'].iloc[0], "%Y-%m-%d %H:%M:%S"))
                last = (datetime.strptime(cyc_pd['date'].iloc[-1], "%Y-%m-%d %H:%M:%S"))
                text_time=[]
                text_vel=[]
                for q in range(len(cyc_pd['date'])):
                    text_vel.append(cyc_pd['maximum_sustained_wind_knots'].iloc[q])
                    text_time.append((datetime.strptime(cyc_pd['date'].iloc[q],"%Y-%m-%d %H:%M:%S")-first).total_seconds())
                func = interpolate.splrep(text_time,text_vel)
                e = os.listdir(dir+'/'+j+'/'+k+'/'+l+'/ir/geo/1km')
                e.sort()
                for m in e :
                    try :
                        time=(datetime.strptime(m[:13], "%Y%m%d.%H%M"))
                        name = dir+j+'/'+k+'/'+l+'/ir/geo/1km/'+m
                        if(time>first and time < last):
                            val = int(interpolate.splev((time-first).total_seconds(),func))
                            filenames.append(name)
                            if val <=20 :
                                labels.append(0)
                            elif val>20 and val <=33 :
                                labels.append(1)
                            elif val>33 and val <=63 :
                                labels.append(2)
                            elif val>63 and val <=82 :
                                labels.append(3)
                            elif val>82 and val <=95 :
                                labels.append(4)
                            elif val>95 and val <=112 :
                                labels.append(5)
                            elif val>112 and val <=136 :
                                labels.append(6)
                            elif val>136 :
                                labels.append(7)
                            i = augment_fn(name,labels[-1],filenames,labels,i)
                    except :
                        pass
    print('')
    print(len(filenames)) 
     # Shuffle The Data
    import random
    # Zip Images with Appropriate Labels before Shuffling
    c = list(zip(filenames, labels))
    random.shuffle(c)
    #Unzip the Data Post Shuffling
    filenames, labels = zip(*c)
    filenames = list(filenames)
    labels = list(labels)
    return filenames,labels
# Let's make a Validation Set with 10% of the Original Data with 1.25% contribution of every class
def make_test_set(filenames,labels,val=0.1):
    classes = 8
    j=0
    val_filenames=[]
    val_labels=[]
    new = [int(val*len(filenames)/classes)]*classes
    print(new)
    try:
        for i in range(len(filenames)):
            if(new[labels[i]]>0):
                val_filenames.append(filenames[i])
                val_labels.append(labels[i])
                new[labels[i]] = new[labels[i]]-1
                del filenames[i]
                del labels[i]
    except :
        pass
    
     # Shuffle The Data
    import random
    # Zip Images with Appropriate Labels before Shuffling
    c = list(zip(val_filenames, val_labels))
    random.shuffle(c)
    #Unzip the Data Post Shuffling
    val_filenames, val_labels = zip(*c)
    val_filenames = list(val_filenames)
    val_labels = list(val_labels)
    from collections import Counter
    print(Counter(labels))
    return val_filenames,val_labels  

def parse_function(filename, label):
    image_string = tf.io.read_file(filename)

    #Don't use tf.image.decode_image, or the output shape will be undefined
    image = tf.image.decode_jpeg(image_string, channels=3)

    #This will convert to float values in [0, 1]
    image = tf.image.convert_image_dtype(image, tf.float32)
    #Resize Image 
    image = tf.image.resize(image, [232, 232])
    
    return image, label

def make_dataset(train_in,test_in,val_in):
    import tensorflow as tf
    train = tf.data.Dataset.from_tensor_slices((train_in[0], train_in[1]))
    train = train.shuffle(len(train_in[0]))
    train = train.map(parse_function,num_parallel_calls=8)
    train = train.batch(train_in[2])
    train = train.prefetch(1)
    test = tf.data.Dataset.from_tensor_slices((test_in[0], test_in[1]))
    test = test.shuffle(len(test_in[0]))
    test = test.map(parse_function, num_parallel_calls=8)
    test = test.batch(test_in[2])
    test = test.prefetch(1)
    val = tf.data.Dataset.from_tensor_slices((val_in[0],val_in[1] ))
    val = val.map(parse_function, num_parallel_calls=8)
    val = val.batch(val_in[2])
    val = val.prefetch(1)
    return train,test,val