--- a +++ b/dataprocess/get_dicom_info.py @@ -0,0 +1,192 @@ +''' +Created by Wang Qiu Li +7/3/2018 + +get dicom info according to malignancy.csv and ld_scan.txt +''' + +import csvTools +import os +import pandas as pd +import pydicom +import scipy.misc +import cv2 +import numpy as np +import glob + +import xmlopt + +basedir = '/home/wangqiuli/Data/LIDC/DOI/' +three_dir = 'three_channel/' +imagedir = 'ori_images/' +maskdir = 'ori_masks/' +png_dir = 'image_1/' + +noduleinfo = csvTools.readCSV('files/malignancy.csv') +idscaninfo = csvTools.readCSV('files/id_scan.txt') +maskinfo = glob.glob(maskdir) + +def get_pixels_hu(ds): + image = ds.pixel_array + image = np.array(image , dtype = np.float32) + intercept = ds.RescaleIntercept + slope = ds.RescaleSlope + image = image * slope + image += intercept + return image + +def getThreeChannel(pixhu): + lungwindow = truncate_hu(pixhu, 800, -1000) + highattenuation = truncate_hu(pixhu, 240, -160) + lowattenuation = truncate_hu(pixhu, -950, -1400) + pngfile = [lungwindow, highattenuation, lowattenuation] + pngfile = np.array(pngfile).transpose(1,2,0) + return pngfile + +def truncate_hu(image_array, max, min): + image = image_array.copy() + image[image > max] = max + image[image < min] = min + image = normalazation(image) + return image + +# LUNA2016 data prepare ,second step: normalzation the HU +def normalazation(image_array): + max = image_array.max() + min = image_array.min() + image_array = (image_array-min)/(max-min) # float cannot apply the compute,or array error will occur + avg = image_array.mean() + image_array = image_array-avg + return image_array # a bug here, a array must be returned,directly appling function did't work + + +def cutTheImage(x, y, pix): + temp = 25 + x1 = x - temp + x2 = x + temp + y1 = y - temp + y2 = y + temp + img_cut = pix[x1:x2, y1:y2] + return img_cut + +def caseid_to_scanid(caseid): + returnstr = '' + if caseid < 10: + returnstr = '000' + str(caseid) + elif caseid < 100: + returnstr = '00' + str(caseid) + elif caseid < 1000: + returnstr = '0' + str(caseid) + else: + returnstr = str(caseid) + return 'LIDC-IDRI-' + returnstr + +def reverse(inputarray): + shape = inputarray.shape + nparray = np.ones(shape) + for i in range(shape[0]): + for j in range(shape[1]): + if inputarray[i][j] == 0: + nparray[i][j] = 1 + else: + nparray[i][j] = 0 + return nparray + + +f = open('errlist.txt', 'w') +count1 = 0 +count2 = 0 +count3 = 0 +count4 = 0 +count5 = 0 + +errorcount = 0 + +tempsign = 0 + +import tqdm + +for onenodule in tqdm.tqdm(noduleinfo[:10]): + xml = '' + # try: + scanid = onenodule[1] + scanid = caseid_to_scanid(int(scanid)) + noduleid = onenodule[3] + scan_list_id = onenodule[2] + # if scanid != 'LIDC-IDRI-0195': + # continue + + # if int(noduleid) != 2: + # continue + scanpaths = [] + for idscan in idscaninfo: + if scanid in idscan[0]: + scanpaths.append(idscan[0]) +# print('len of paths: ', len(scanpaths)) + + noduleld_list = [] + for i in range(10, 14): + if str(onenodule[i]).strip() != '': + noduleld_list.append(onenodule[i]) + # print('id list: ', noduleld_list) + + for scanpath in scanpaths: + try: + filelist1 = os.listdir(basedir + scanpath) + filelist2 = [] + + xmlfiles = [] + for onefile in filelist1: + if '.dcm' in onefile: + filelist2.append(onefile) + elif '.xml' in onefile: + xmlfiles.append(onefile) + + xmlfile = basedir + scanpath + '/' + xmlfiles[0] + xml = xmlfile + slices = [pydicom.dcmread(basedir + scanpath + '/' + s) for s in filelist2] + + slices.sort(key = lambda x : float(x.ImagePositionPatient[2]),reverse=True) + x_loc = int(onenodule[6]) + y_loc = int(onenodule[7]) + z_loc = int(onenodule[8]) + ds = slices[z_loc] + if (str(ds.SeriesNumber) == onenodule[2]) or (str(onenodule[2]) == str(0)): + slice_location = ds.ImagePositionPatient[2] + # print('slice location: ', slice_location) + # print('noduleld_list: ', noduleld_list) + mask_image, signtemp = xmlopt.getEdgeMap(xmlfile, slice_location, noduleld_list) + # # print(signtemp) + # if signtemp == True: + # zzz = 1 + # else: + + ori_hu = get_pixels_hu(ds) + pix = getThreeChannel(ori_hu) + + if (x_loc < 25 or x_loc > (512 - 25)) or (y_loc < 25 or y_loc > (512 - 25)): + print('out of size:', scanid, noduleid) + else: + cut_img = cutTheImage(y_loc, x_loc, pix) + cut_mask = cutTheImage(y_loc, x_loc, mask_image) + # cut_hu = cutTheImage(y_loc, x_loc, ori_hu) + + #cut_img = cv2.resize(cut_img,(128, 128)) + #cut_mask = cv2.resize(cut_mask,(128, 128)) + #cut_hu = cv2.resize(cut_hu,(128, 128)) + + + # reverse_cut_mask = reverse(cut_mask) + # np.save(three_dir + str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id) + '_5', cut_hu) + # np.save(maskdir + str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id), cut_mask) + # np.save(three_dir + str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id), cut_img) + + + scipy.misc.imsave(png_dir + str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id) + '2.png', cut_img) + scipy.misc.imsave(png_dir + str(scanid) + '_' + str(noduleid) + '_' + str(scan_list_id) + '_mask2.png', cut_mask) + else: + print(scanid) + print('not equal') + except: + print(scanid) + print('Error')