--- a +++ b/UNET.py @@ -0,0 +1,386 @@ +""" +data: + CT : used + mask : used + labels(txt): not used + labelsJson : not used +""" + + +# U-Net Structure: +# 1->64->64...............................................................->(*/)128->64->64=>2 # 1: input image, 2: output segmentation map +# (-+)64->128->128...........................................->(*/)256->128->128 +# (-+)128->256->256.......................->(*/)512->256->256 +# (-+)256->512->512..->(*/)1024->512->512 # 1024: 512+512 +# (-+)512->1024->1024 +# -> : conv 3x3, RELU +# ..->: copy & crop +# (-+): max pool 2x2 +# (*/): up-conv 2x2 +# => : conv 1x1 + +''' +Issues: +1. 要不直接把preprocessing_tmp1 经过"data_generation"分割90% 出来用作训练集, 并存为dataset放在外面目录下 + ----> 看下之后test部分的图片怎么预处理, 如果处理方式一样那就dataset拿出来放到外面去 +''' + +from keras._tf_keras import keras # CPU - keras > 3.* +from keras._tf_keras.keras.layers import * # CPU - keras > 3.* +from keras._tf_keras.keras.preprocessing.image import ( + ImageDataGenerator, +) # CPU - keras > 3.* + +# from keras.layers import * # GPU - keras > 2.* +# from keras.callbacks import ModelCheckpoint # GPU - keras > 2.* +# from keras.preprocessing.image import ImageDataGenerator # GPU - keras > 2.* + +from keras import Model +from keras import backend as K + +import os +import numpy as np +# from data_preparation import draw_image +import matplotlib.pyplot as plt +import cv2 + + +# img & mask +# 3. data augmentation: (import tensorflow.keras.preprocessing.Image) +# [1] define an image_generator -> ImageDataGenerator() +# [2] image data augmentation -> flow_from_directory() +# [3] image normalization +# 问题: +# [1] 先进行.nii -> png/json/txt, 后进一步keras数据增强 +# 有个问题: images/mask增强后随之的json/txt是否也要发生改变 ---> ??? +# [2] tensorflow和torch一起用 ---> 可以 +# model -> Yolo 使用的是pytorch +# data augmentation 使用的是 tensorflow->keras +# [3] gene后一定要跟fit()均值化,否则会提示: +# F:\AI_Outils\Anaconda\1\envs\opencv_CPU\Lib\site-packages\keras\src\legacy\preprocessing\image.py:1263: UserWarning: This ImageDataGenerator specifies `featurewise_center`, but it hasn't been fit on any training data. Fit it first by calling `.fit(numpy_data)`. + + +# data augmentation for train +def train_generator(dataset_path, type): + data_path = os.path.join(dataset_path, type) + data_pre_path = os.path.join(dataset_path, f"{type}_generator") + img_png_path = os.path.join(data_pre_path, "images") + mask_png_path = os.path.join(data_pre_path, "masks") + + PATH = { + data_pre_path, + img_png_path, + mask_png_path, + } + for path in PATH: + os.makedirs(path, exist_ok=True) + + # 3.1 define an image_generator: to perform various transformations on object + generator_args = dict( + rotation_range=0.1, + width_shift_range=0.05, + height_shift_range=0.05, + shear_range=0.05, + zoom_range=0.05, + horizontal_flip=False, + vertical_flip=False, + ) + generator_image = ImageDataGenerator(generator_args) + generator_mask = ImageDataGenerator(generator_args) + + # 3.2 implement further data augmentation for image & mask + generation_image = generator_image.flow_from_directory( + directory=data_path, + classes=["images"], + class_mode=None, + color_mode="grayscale", + target_size=(512, 512), + batch_size=2, + save_to_dir=os.path.join(data_pre_path, "images"), + # save_prefix='ct_', + seed=123, + ) + generation_mask = generator_mask.flow_from_directory( + directory=data_path, + classes=["masks"], + class_mode=None, + color_mode="grayscale", + target_size=(512, 512), + batch_size=2, + save_to_dir=os.path.join(data_pre_path, "masks"), + # save_prefix='mask_', + seed=123, + ) + generation = zip(generation_image, generation_mask) + + print("2--------------------------") + i = 0 + # 3.3 image normalization (image -> not normalized yet, mask -> binary) + for image, mask in generation: + ''' + i = i + 1 + + # output image data to TXT + arr = np.array(image[0][:, :, 0]) + np.savetxt("array_0.txt", arr) + print(f"image: min: {np.nanmin(arr)}, max: {np.nanmax(arr)}.") + + # output image data to TXT + arr = np.array(normalization(image)[0][:, :, 0]) + np.savetxt("array_1.txt", arr) + print( + f"normalization_image: min: {np.nanmin(arr)}, max: {np.nanmax(arr)}." + ) + + print(image.shape, mask.shape) # (2, 256, 256, 1) (2, 256, 256, 1) --> batch_size=2 + + # visualization + data_img_slices = [ + image[0][:, :, 0], + normalization(image)[0][:, :, 0], + mask[0][:, :, 0], + image[1][:, :, 0], + normalization(image)[1][:, :, 0], + mask[1][:, :, 0], + ] + draw_image(data_img_slices, 1, 6, None) + + if i == 1: + break + ''' + + yield (normalization(image), mask) + # image[0][:, :, 0] = normalization(image[0][:, :, 0]) + # image[1][:, :, 0] = normalization(image[1][:, :, 0]) + print("Further data augmentation was completed successfully.") + + +def binarization(data): + """ + Binarization: Converts data to only two values, e.g. 0 & 1 + To do: To highlight certain features in the image + Processing: x'[x/255.0 > 0.5] = 1.0 + x'[x/255.0 <= 0.5] = 0.0 + """ + data_binary = data / 255.0 + data_binary[data_binary > 0.5] = 1.0 + data_binary[data_binary <= 0.5] = 0.0 + return data_binary + + +def standardization(data): + """ + Standardization: Converts the data into a new distribution with a mean of 0 and a standard deviation of 1 + To do: to have comparability between different features (if data feature value range/unit is quite different --> perform standardization). + --> Standardization does not change the distribution of feature data + Processing: x' = (x - mean) / std + """ + mean = np.mean(data) + std = np.std(data) + data_std = (data - mean) / std + return data_std + + +def normalization(data): + """ + Normalization: Scale the data to a specific range, e.g. 0-1 + To do: To make the influence of each feature on the target variable consistent. + --> Data normalization changes the distribution of feature data + Processing: x' = (x - min)/(max - min) + Note: In the medical field, normalization is generally performed --> to accelerate the convergence of the network and make the model more stable + """ + min = np.nanmin(data) + max = np.nanmax(data) + data_nor = (data - min) / (max - min) + return data_nor + + +def u_net(input_size = (512, 512, 1), path=None): + # layer 1-1 + inputs_L1_1 = Input(input_size) + conv1_L1_1 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(inputs_L1_1) # filters: 64, kernel_size:3x3, kernel_initializer: use normal distribution to initializer Weights of kernel + conv2_L1_1 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv1_L1_1) + pool1_L1_1 = MaxPool2D(pool_size=(2,2))(conv2_L1_1) + + # layer 2-1 + conv3_L2_1 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool1_L1_1) + conv4_L2_1 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv3_L2_1) + pool2_L2_1 = MaxPool2D(pool_size=(2, 2))(conv4_L2_1) + + # layer 3-1 + conv5_L3_1 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool2_L2_1) + conv6_L3_1 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv5_L3_1) + pool3_L3_1 = MaxPool2D(pool_size=(2, 2))(conv6_L3_1) + + # layer 4-1 + conv7_L4_1 = Conv2D(512, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool3_L3_1) + conv8_L4_1 = Conv2D(512, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv7_L4_1) + pool4_L4_1 = MaxPool2D(pool_size=(2, 2))(conv8_L4_1) + + # layer 5 + conv9_L5 = Conv2D(1024, 3, activation="relu", padding="same", kernel_initializer="he_normal")(pool4_L4_1) + conv10_L5 = Conv2D(1024, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv9_L5) + up1_L5 = UpSampling2D(size=(2, 2))(conv10_L5) # deconvolution + + # layer 4-2 + conv11_L4_2 = Conv2D(512, 3, activation="relu", padding="same", kernel_initializer="he_normal")(concatenate([up1_L5, conv8_L4_1], axis=3)) # concatenation + conv12_L4_2 = Conv2D(512, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv11_L4_2) + up2_L4_2 = UpSampling2D(size=(2, 2))(conv12_L4_2) + + # layer 3-2 + conv13_L3_2 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(concatenate([up2_L4_2, conv6_L3_1], axis=3)) + conv14_L3_2 = Conv2D(256, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv13_L3_2) + up3_L3_2 = UpSampling2D(size=(2, 2))(conv14_L3_2) + + # layer 2-2 + conv15_L2_2 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(concatenate([up3_L3_2, conv4_L2_1], axis=3)) + conv16_L2_2 = Conv2D(128, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv15_L2_2) + up4_L2_2 = UpSampling2D(size=(2, 2))(conv16_L2_2) + + # layer 1-2 + conv17_L1_2 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(concatenate([up4_L2_2, conv2_L1_1], axis=3)) + conv18_L1_2 = Conv2D(64, 3, activation="relu", padding="same", kernel_initializer="he_normal")(conv17_L1_2) + outputs_L1_2 = Conv2D(1, 1, activation="sigmoid")(conv18_L1_2) + + # build model + model = Model(inputs = inputs_L1_1, outputs = outputs_L1_2) + + # compile model + ''' + Loss : 0-1 binary cross-entropy (binary_crossentropy) + Optimizer: Adaptive Descent (Adam) + Callback : After each epoch is trained, autosave a best pre-trained model(optimal weights). (keras.callbacks.ModelCheckpoint) + ''' + model.compile(optimizer="adam", loss="binary_crossentropy", metrics=["accuracy"]) + + return model + + +class ShowMask(keras.callbacks.Callback): + def __init__(self): + super().__init__() + + def on_epoch_end(self, epoch, logs=None): + print() + idx = 0 + for img, mask in gene: + compare_list = [img[0], mask[0], model.predict(img[0].reshape(1, 512, 512, 1))[0]] + for i in range(0, len(compare_list)): + plt.subplot(1, 3, i+1) + plt.imshow(compare_list[i], cmap="gray") + plt.axis(False) + # plt.show() + plt.savefig(f"compare_{idx}.png") + idx = idx + 1 + break + # return super().on_epoch_end(epoch, logs) + + +# Test + +# dataset: data augmentation for train data +UNETDataset_path = "./UNETDataset" +gene = train_generator(UNETDataset_path, "train") # could be used as input to the model and directly as training + +# model params +steps_per_epoch = 50 +epochs = 100 +model_name = f"u_net-512-512-1-pneumonia_{epochs}_{steps_per_epoch}.keras" +models_path = "./models/" +model_path = os.path.join(models_path, model_name) +model_ckpt = keras.callbacks.ModelCheckpoint(model_path, save_best_only=False, verbose=1) + +# train +K.clear_session() # keras + +model = u_net(path=model_path) # structure +# print(model.summary()) + +# model.fit(gene, steps_per_epoch=steps_per_epoch, epochs=epochs, callbacks=[model_ckpt, ShowMask()]) # train +model.fit( + gene, + steps_per_epoch=steps_per_epoch, + epochs=epochs, + callbacks=[model_ckpt], +) # train + + + +# 思路: +# 1. Dataset_mini -> CPU OK, GPU KO +# 2. cudnn v8 -> GPU KO +# 3. 缩减 input_size -> 512->256 +# 4. 缩减 unet structure + +# evalution +data_val_generator_path = os.path.join(UNETDataset_path, "val_generator") +compare_path = os.path.join(data_val_generator_path, "compare") +PATH = {data_val_generator_path, compare_path} +for path in PATH: + os.makedirs(path, exist_ok=True) + +# model_test= keras.models.load_model(model_path) +''' +gene = train_generator(UNETDataset_path, "val") +idx = 0 +for img, mask in gene: + predict_mask = model_test.predict(img)[0] + # predict_mask_np = (predict_mask * 255).numpy() + + _, real_mask = cv2.threshold(mask[0], 127, 255, 0) + real_mask = (real_mask).astype('uint8') + real_contours, _ = cv2.findContours(real_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + real_overlap_img = cv2.drawContours(img[0].copy(), real_contours, -1, (0, 255, 0), 2) + + _, pred_mask = cv2.threshold((predict_mask * 255).astype("uint8"), 127, 255, 0) + pred_mask = (pred_mask).astype('uint8') + pred_contours, _ = cv2.findContours(pred_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) + pred_overlap_img = cv2.drawContours(img[0].copy(), pred_contours, -1, (255, 0, 0), 2) + + compare_list = [img[0], pred_mask, real_overlap_img, pred_overlap_img] + + for i in range(0, len(compare_list)): + plt.subplot(1, 4, i+1) + plt.imshow(compare_list[i], cmap="gray") + plt.axis(False) + # plt.show() + save_path = os.path.join(compare_path, f"compare_{idx}.png") + plt.savefig(save_path) + + idx = idx + 1 +''' + +''' +# test save compare_png + +images_path = os.path.join(data_val_generator_path, "images") +masks_path = os.path.join(data_val_generator_path, "masks") + +idx = 0 +for png_name in os.listdir(images_path): + # predict_mask = model_test.predict(img)[0] + + img = cv2.imread(os.path.join(images_path, png_name)) + mask = cv2.imread(os.path.join(masks_path, png_name), cv2.IMREAD_GRAYSCALE) + + _, real_mask = cv2.threshold(mask, 127, 255, 0) + real_mask = (real_mask).astype("uint8") + real_contours, _ = cv2.findContours( + real_mask, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE + ) + real_overlap_img = cv2.drawContours( + img.copy(), real_contours, -1, (0, 255, 0), 2 + ) + + compare_list = [img, real_overlap_img] + + for i in range(0, len(compare_list)): + plt.subplot(1, 2, i + 1) + plt.imshow(compare_list[i], cmap="gray") + plt.axis(False) + # plt.show() + save_path = os.path.join(compare_path, f"compare_{idx}.png") + plt.savefig(save_path) + + idx = idx + 1 +''' \ No newline at end of file