Diff of /download_images.sh [000000] .. [dec218]

Switch to side-by-side view

--- a
+++ b/download_images.sh
@@ -0,0 +1,58 @@
+#!/bin/bash
+
+# This script downloads the EHRXQA dataset images after gathering image paths from csv files.
+
+# Capture the start time
+start_time=$(date +%s)
+
+# Prompt for PhysioNet credentials
+echo "Enter your PhysioNet credentials"
+read -p "Username: " USERNAME
+read -s -p "Password: " PASSWORD
+echo
+
+# Base URL for the MIMIC-CXR dataset
+MIMIC_CXR_JPG_DIR="https://physionet.org/files/mimic-cxr-jpg/2.0.0"
+
+# wget parameters for downloading files
+WGET_PARAMS="-r -N -c -np --user $USERNAME --password $PASSWORD"
+
+# Function to download files
+download() {
+    local file_url=$1
+    wget $WGET_PARAMS "$file_url" || { echo "Error: Failed to download $file_url" >&2; exit 1; }
+}
+
+# Function to extract image paths from JSON files
+get_image_paths() {
+    local csv_file=$1
+    python -c "import pandas as pd; df=pd.read_csv('$csv_file'); image_path_list = df.apply(lambda x: 'p'+str(x.subject_id)[:2]+'/p'+str(x.subject_id)+'/s'+str(x.study_id)+'/'+str(x.image_id)+'.jpg', axis=1); print('\n'.join(image_path_list))"
+}
+
+# Gather image paths from JSON dataset files
+image_paths_train=$(get_image_paths 'database/mimic_iv_cxr/train/tb_cxr.csv')
+image_paths_test=$(get_image_paths 'database/mimic_iv_cxr/test/tb_cxr.csv')
+
+# Combine paths from train, valid, and test
+image_paths=$(echo -e "$image_paths_train\n$image_paths_test")
+
+# Remove duplicates and convert to an array
+readarray -t arr <<<"$(echo "$image_paths" | sort -u)"
+
+# Display the total number of unique images
+echo "Total number of unique images: ${#arr[@]}"
+
+# Download the images
+echo "Downloading images..."
+for image_path in "${arr[@]}"; do
+    echo "Downloading $image_path"
+    download "$MIMIC_CXR_JPG_DIR/files/$image_path"
+done
+echo "All images have been successfully downloaded."
+
+# Capture the end time and calculate runtime
+end_time=$(date +%s)
+runtime=$((end_time - start_time))
+
+# Display the script runtime
+echo "Script runtime: $runtime seconds"
\ No newline at end of file