import sys import time from matplotlib import cm from tqdm import tqdm from skimage.filters import threshold_otsu from keras.models import load_model import numpy as np import pandas as pd import matplotlib.pyplot as plt import os.path as osp import openslide from pathlib import Path from skimage.filters import threshold_otsu import glob import math # before importing HDFStore, make sure 'tables' is installed by pip3 install tables from pandas import HDFStore from openslide.deepzoom import DeepZoomGenerator from sklearn.model_selection import StratifiedShuffleSplit import cv2 from keras.utils.np_utils import to_categorical import os.path as osp import os import openslide from pathlib import Path from keras.models import Sequential from keras.layers import Lambda, Dropout from keras.layers.convolutional import Convolution2D, Conv2DTranspose from keras.layers.pooling import MaxPooling2D from keras.models import model_from_json import numpy as np import sys import skimage.io as io import skimage.transform as trans import numpy as np from keras.models import * from keras.layers import * from keras.optimizers import * from keras.callbacks import ModelCheckpoint, LearningRateScheduler from keras import backend as keras import re import staintools ############################################# import h5py from keras.utils import HDF5Matrix import stain_utils as utils import stainNorm_Reinhard import stainNorm_Macenko import stainNorm_Vahadane from datetime import datetime cores = int(os.environ['NSLOTS']) keras.set_session(keras.tf.Session(config=keras.tf.ConfigProto(intra_op_parallelism_threads=cores, inter_op_parallelism_threads=cores))) pred_size = int(os.environ['PRED_SIZE']) stride = 16 mcn = os.environ['MCN'] model = load_model(mcn, compile=False) # '/home/weizhe.li/Training/googlenetmainmodel1119HNM-02-0.92.hdf5') # '/home/weizhe.li/Training/googlenetmainmodel1119HNM-02-0.92.hdf5', compile=False) # '/home/weizhe.li/Training/HNM_models/no_noise_no_norm/googlenetv1_no_noise_no_norm_0210_hnm_transfer_learn_02.10.20_09:31_original_256_patches-03-0.91.hdf5', compile=False) # /home/weizhe.li/Training/googlenetmainmodel1119HNM-02-0.92.hdf5 def main(): ''' Four command line arguments: - $DIR Directory where HDF5 is located - $HDF5_FILE HDF5 file name, like test_001.h5 - $BASENAME subgroup suffix, like 1, 2, ... - $HEATMAP_DIR heatmap directory name ''' # print command line arguments for arg in sys.argv[1:]: print (arg) print (os.environ['HOSTNAME']) print (os.environ['SGE_TASK_ID']) x = int(os.environ['SGE_TASK_ID']) print ("x = ", x) dir = sys.argv[1] hdf5_file = sys.argv[2] grp_suffix = sys.argv[3] heatmap_dir = sys.argv[4] print ("dir = " + dir) print ("hdf5_file = " + hdf5_file) print ("grp_suffix = " + grp_suffix) print ("heatmap_dir = " + heatmap_dir) # patches, coords = [], [] start_time = time.time() # patches, coords = get_patches( dir, get_patches( dir, hdf5_file, grp_suffix, heatmap_dir, verbose=True) print("--- %s seconds ---" % (time.time() - start_time)) # end of main () ########################################################################### # HDF5-specific helper functions # ########################################################################### def get_patches(db_location, file_name, grp_suffix, heatmap_dir, verbose=False): """ Loads the numpy patches from HDF5 files. """ patches, coords = [], [] # Now load the images from H5 file. file = h5py.File(db_location + "/" + file_name,'r+') grp='t'+grp_suffix # dataset = file['/' + ds] group = file['/' + grp] for key, value in group.items(): if key == 'img': dataset=value if key == 'coord': dataset2=value new_patches = np.array(dataset).astype('uint8') # for patch in new_patches: # patches.append(patch) print ("COLOR_NORM on line # 133 is: ", color_norm) output_preds_final_grp = [] for patch in new_patches: ################################################color normalization############################## if color_norm: patch_normalized = color_norm_pred(patch, fit, log_file, current_time) else: patch_normalized = patch output_preds_final = patch_pred_collect_from_slide_window(pred_size, patch_normalized, model, stride) output_preds_final_grp.append(output_preds_final) output_preds_final_grp = np.array(output_preds_final_grp) np.save(osp.join(heatmap_dir, '%s_%s' % (file_name[:-3], grp)), output_preds_final_grp) print ("Group " + grp) # dataset2 = group['/' + "coord"] new_coords = np.array(dataset2).astype('int64') for coord in new_coords: coords.append(coord) file.close() # output_preds_final_160 = [] # for i in range(len(patches)): # output_preds_final = patch_pred_collect_from_slide_window(pred_size, patches[i], model, stride) # output_preds_final_160.append(output_preds_final) # output_preds_final_160 = np.array(output_preds_final_160) # np.save(osp.join(heatmap_dir, '%s_%s' % (file_name[:-3], grp)), output_preds_final_160) if verbose: print("[py-wsi] loaded from", file_name, grp) # return patches, coords # end of get_patches () def patch_pred_collect_from_slide_window(pred_size, fullimage, model, stride): """ create a nxn matrix that includes all the patches extracted from one big patch by slide window sampling. :param integer pred_size: the size of patches to be extracted and predicted as tumor or normal patch. :param nxn matrix fullimage: the image used for slide window prediction, which is larger than the patch to be predicted to avoid side effect. :param object model: the trained network to predict the patches. :return a nxn matrix for one patch to be predicted by slide window method """ output_preds_final = [] for x in tqdm(range(0, pred_size, stride)): patchforprediction_batch = [] for y in range(0, pred_size, stride): patchforprediction = fullimage[x:x+pred_size, y:y+pred_size] patchforprediction_batch.append(patchforprediction) X_train = np.array(patchforprediction_batch) preds = predict_batch_from_model(X_train, model) output_preds_final.append(preds) output_preds_final = np.array(output_preds_final) return output_preds_final # end of patch_pred_collect_from_slide_window def predict_batch_from_model(patches, model): """ There are two values for each prediction: one is for the score of normal patches. ; the other one is for the score of tumor patches. The function is used to select the score of tumor patches :param array patches: a list of image patches to be predicted. :param object model: the trained neural network. :return lsit predictions: a list of scores for each predicted image patch. The score here is the probability of the image as a tumor image. """ predictions = model.predict(patches) predictions = predictions[:, 1] return predictions # end of predict_batch_from_model def color_normalization(template_image_path, color_norm_method): """ The function put all the color normalization methods together. :param string template_image_path: the path of the image used as a template :param string color_norm_method: one of the three methods: vahadane, macenko, reinhard. :return object """ template_image = staintools.read_image(template_image_path) standardizer = staintools.LuminosityStandardizer.standardize( template_image) if color_norm_method == 'Reinhard': color_normalizer = stainNorm_Reinhard.Normalizer() color_normalizer.fit(standardizer) elif color_norm_method == 'Macenko': color_normalizer = stainNorm_Macenko.Normalizer() color_normalizer.fit(standardizer) elif color_norm_method == 'Vahadane': color_normalizer = staintools.StainNormalizer(method='vahadane') color_normalizer.fit(standardizer) return color_normalizer def color_norm_pred(image_patch, fit, log_file, current_time): """ To perform color normalization based on the method used. :param matrix img: the image to be color normalized :param object fit: the initialized method for normalization :return matrix img_norm: the normalized images :note if the color normalization fails, the original image patches will be used. But this event will be written in the log file. """ img = image_patch img_norm = img try: img_standard = staintools.LuminosityStandardizer.standardize(img) img_norm = fit.transform(img_standard) except Exception as e: log_file.write(str(image_patch) + ';' + str(e) + ';' + current_time) #print(img_norm) return img_norm # end of color_norm_pred if __name__ == "__main__": current_time = datetime.now().strftime("%d-%m-%Y_%I-%M-%S_%p") color_norm_methods = ['Vahadane', 'Reinhard', 'Macenko'] template_image_path = '/home/weizhe.li/tumor_st.png' # log_path = '/home/weizhe.li/log_files' # color_norm = True cn = os.environ['COLOR_NORM'] if (cn == "True"): color_norm = True else: color_norm = False if color_norm: print ("COLOR_NORM on line # 276 is: ", color_norm) color_norm_method = color_norm_methods[0] fit = color_normalization(template_image_path, color_norm_method) else: print ("COLOR_NORM on Line # 280 is: ", color_norm) color_norm_method = 'baseline' fit = None log_path = os.environ['LOG_DIR'] log_file = open('%s/%s.txt' % (log_path, color_norm_method), 'w') main()