Diff of /data/csv_process.py [000000] .. [139527]

Switch to unified view

a b/data/csv_process.py
1
import numpy as np
2
import pandas as pd
3
import random
4
from glob import glob
5
import os, shutil
6
from tqdm import tqdm
7
tqdm.pandas()
8
import time
9
from PIL import Image
10
import sys
11
12
def prepare_label_df(label_csv_path):
13
    df = pd.read_csv(label_csv_path)
14
    df["absent"] = df["segmentation"].map(lambda x: int(pd.isna(x))) # 1 means the organ is absent, 0 means its present
15
    df["case"] = df["id"].str.split('_').str[0]
16
    df["day"] = df["id"].str.split('_').str[1]
17
    df["slice_id"] = df["id"].str.split('_').str[3]
18
    return df
19
20
def prepare_image_df(dataset_folder_path, path_split):
21
    images = glob(dataset_folder_path + '/*/*/*/*.png')
22
    image_df = pd.DataFrame(images, columns=["image_path"])
23
24
    image_df["case"] = image_df["image_path"].str.split(path_split).str[1]
25
    image_df["day"] = image_df["image_path"].str.split(path_split).str[2].str.split('_').str[1]
26
    image_df["slice_id"] = image_df["image_path"].str.split(path_split).str[4].str.split('_').str[1]
27
28
    image_df["pic_info"] = image_df["image_path"].str.split(path_split).str[4]
29
    image_df["slice_height"] = image_df["pic_info"].str.split("_").str[2].astype(int)
30
    image_df["slice_width"] = image_df["pic_info"].str.split("_").str[3].astype(int)
31
    image_df["pixel_height"] = image_df["pic_info"].str.split("_").str[4].astype(float)
32
    image_df["pixel_width"] = image_df["pic_info"].str.split("_").str[5].str.split('.png').str[0].astype(float)
33
    return image_df
34
35
if __name__ == '__main__':
36
    # usage: python csv_process.py [label_csv_path] [dataset_folder_path] [output_csv_name] [path_splitter]
37
    label_csv_path = sys.argv[1]
38
    dataset_folder_path = sys.argv[2]
39
    output_csv_path = sys.argv[3]
40
    path_split = sys.argv[4]
41
    label_df = prepare_label_df(label_csv_path)
42
    image_df = prepare_image_df(dataset_folder_path, path_split)
43
    combined_df = pd.merge(label_df, image_df, how='left', on=['case','day','slice_id'])
44
    combined_df.to_csv(output_csv_path + ".csv")
45
    print("Combined CSV generated!")
46
47
48
49
    
50