--- a +++ b/notebooks/remove_duplicate_series.py @@ -0,0 +1,139 @@ + +# %% +from pathlib import Path + +import matplotlib.pyplot as plt +from mpl_toolkits.axes_grid1 import make_axes_locatable +import numpy as np +import nibabel as nib +import pydicom as dcm +from collections import defaultdict + +import os +os.chdir(Path(__file__).resolve().parent.parent) +from adpkd_segmentation.data.data_config import dataroot # noqa +# %% + + +def nii2dcm_sequences(nii_file): + sequence_dict = defaultdict(list) + + dcm_files = list(nii_file.parent.glob("*.dcm")) + dcm_files = [dcm.read_file(d) for d in dcm_files] + dcm_files.sort(key=lambda x: int(x.ImagePositionPatient[2])) + + for d in dcm_files: + sequence_dict[d.SeriesNumber].append(d) + + if len(sequence_dict) > 1: + seq_lens = [len(v) for v in sequence_dict.values()] + if len(set(seq_lens)) != len(seq_lens): + return [v[0].SeriesNumber for v in sequence_dict.values()] + + return [] + + +def nii2dcm(nii_file, sequence_num=None): + dcm_files = list(nii_file.parent.glob("*.dcm")) + dcm_files = [dcm.read_file(d) for d in dcm_files] + dcm_files.sort(key=lambda x: int(x.ImagePositionPatient[2])) + + if sequence_num is not None: + dcm_files = [ + d for d in dcm_files if d.SeriesNumber == sequence_num + ] + dcm_arrs = [d.pixel_array for d in dcm_files] + + label_arr = nib.load(nii_file).get_fdata().T + + return np.stack(dcm_arrs), label_arr + +# %% + + +def plot_image_bar(image, label=None, title=None, standardize=True): + + ax = plt.subplot(111) + im = ax.imshow( + image, cmap="gray", origin='lower', alpha=1 + ) + + if label is not None: + if standardize is True: + label[label > 0] = 1 + im = ax.imshow( + label, cmap="viridis", origin='lower', alpha=0.5 + ) + + divider = make_axes_locatable(ax) + cax = divider.append_axes("right", size="5%", pad=0.05) + plt.colorbar(im, cax=cax) + + if title is not None: + ax.set_title(title, size=8) + + plt.show() +# %% + + +def plot_nii_dcm(nii_file, axis=0, seq_list=None): + + if seq_list is not None: + for seq in seq_list: + dcms, label = nii2dcm(nii_file, sequence_num=seq) + idx = 3*len(dcms) // 4 + plot_image_bar( + dcms[idx], + label[idx], + title=nii_file.parent.parent.name + " " + str(seq) + ) + else: + dcms, label = nii2dcm(nii_file) + plot_image_bar( + np.sum(dcms, axis=axis), + label=np.sum(label, axis=axis), + title=nii_file.parent.parent.name + ) + + +# %% +completed = dataroot / "annotation_completed" +niis = list(completed.glob("**/*.gz")) + +# %% + +# get list of duplicates with same series len +checks = [] +for i, n in enumerate(niis): + res = nii2dcm_sequences(n) + if res: + checks.append((n, res)) + +# %% + +# dispay series to identify which matches mask + +for idx, c in enumerate(checks): + print(idx, c[0]) + plot_nii_dcm(c[0], seq_list=c[1]) + + +# %% + +# manually delete a series by number +def remove_dicom_series(nii_file, SeriesNumber): + dcm_files = list(nii_file.parent.glob("*.dcm")) + # dcm_files = [dcm.read_file(d) for d in dcm_files] + + print(f"there are {len(dcm_files)}...") + + dcm_files = [ + d for d in dcm_files if dcm.read_file(d).SeriesNumber == SeriesNumber + ] + + print(f"removing {len(dcm_files)}...") + + for d in dcm_files: + os.remove(d) + +# %%