|
a |
|
b/data/csv_process.py |
|
|
1 |
import numpy as np |
|
|
2 |
import pandas as pd |
|
|
3 |
import random |
|
|
4 |
from glob import glob |
|
|
5 |
import os, shutil |
|
|
6 |
from tqdm import tqdm |
|
|
7 |
tqdm.pandas() |
|
|
8 |
import time |
|
|
9 |
from PIL import Image |
|
|
10 |
import sys |
|
|
11 |
|
|
|
12 |
def prepare_label_df(label_csv_path): |
|
|
13 |
df = pd.read_csv(label_csv_path) |
|
|
14 |
df["absent"] = df["segmentation"].map(lambda x: int(pd.isna(x))) # 1 means the organ is absent, 0 means its present |
|
|
15 |
df["case"] = df["id"].str.split('_').str[0] |
|
|
16 |
df["day"] = df["id"].str.split('_').str[1] |
|
|
17 |
df["slice_id"] = df["id"].str.split('_').str[3] |
|
|
18 |
return df |
|
|
19 |
|
|
|
20 |
def prepare_image_df(dataset_folder_path, path_split): |
|
|
21 |
images = glob(dataset_folder_path + '/*/*/*/*.png') |
|
|
22 |
image_df = pd.DataFrame(images, columns=["image_path"]) |
|
|
23 |
|
|
|
24 |
image_df["case"] = image_df["image_path"].str.split(path_split).str[1] |
|
|
25 |
image_df["day"] = image_df["image_path"].str.split(path_split).str[2].str.split('_').str[1] |
|
|
26 |
image_df["slice_id"] = image_df["image_path"].str.split(path_split).str[4].str.split('_').str[1] |
|
|
27 |
|
|
|
28 |
image_df["pic_info"] = image_df["image_path"].str.split(path_split).str[4] |
|
|
29 |
image_df["slice_height"] = image_df["pic_info"].str.split("_").str[2].astype(int) |
|
|
30 |
image_df["slice_width"] = image_df["pic_info"].str.split("_").str[3].astype(int) |
|
|
31 |
image_df["pixel_height"] = image_df["pic_info"].str.split("_").str[4].astype(float) |
|
|
32 |
image_df["pixel_width"] = image_df["pic_info"].str.split("_").str[5].str.split('.png').str[0].astype(float) |
|
|
33 |
return image_df |
|
|
34 |
|
|
|
35 |
if __name__ == '__main__': |
|
|
36 |
# usage: python csv_process.py [label_csv_path] [dataset_folder_path] [output_csv_name] [path_splitter] |
|
|
37 |
label_csv_path = sys.argv[1] |
|
|
38 |
dataset_folder_path = sys.argv[2] |
|
|
39 |
output_csv_path = sys.argv[3] |
|
|
40 |
path_split = sys.argv[4] |
|
|
41 |
label_df = prepare_label_df(label_csv_path) |
|
|
42 |
image_df = prepare_image_df(dataset_folder_path, path_split) |
|
|
43 |
combined_df = pd.merge(label_df, image_df, how='left', on=['case','day','slice_id']) |
|
|
44 |
combined_df.to_csv(output_csv_path + ".csv") |
|
|
45 |
print("Combined CSV generated!") |
|
|
46 |
|
|
|
47 |
|
|
|
48 |
|
|
|
49 |
|
|
|
50 |
|