[f7242c]: / setup.sh

Download this file

182 lines (146 with data), 6.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
#!/bin/bash
# DrugGEN Setup Script
# Downloads all necessary resources from Google Drive repository
# Function to clean up and exit
cleanup_and_exit() {
local exit_code=$1
echo "Cleaning up temporary files..."
rm -rf "$TEMP_DIR"
exit "$exit_code"
}
# Function to handle errors
handle_error() {
echo "Error occurred in script at line $1"
cleanup_and_exit 1
}
# Function to display error message and exit
display_error_and_exit() {
echo "====================================================================="
echo "ERROR: $1"
echo ""
echo "SOLUTION: $2"
echo "====================================================================="
cleanup_and_exit 1
}
# Set up error handling
trap 'handle_error $LINENO' ERR
# Main Google Drive folder ID
MAIN_FOLDER_ID="1k-amlOwNQEWGx751MtWZc4SbZCUs8iqK"
TEMP_DIR="temp_download"
echo "DrugGEN Setup Script"
echo "===================="
echo "This script will download datasets, encoders/decoders, model weights"
echo "and SMILES correction files from Google Drive repository."
echo ""
# Install required packages if not already installed
if ! command -v gdown &> /dev/null; then
echo "Installing gdown package for Google Drive downloads..."
pip install gdown || cleanup_and_exit 1
fi
if ! command -v curl &> /dev/null; then
echo "Installing curl as a backup download method..."
apt-get update && apt-get install -y curl || cleanup_and_exit 1
fi
# Create temporary directory
rm -rf "$TEMP_DIR" # Clean up any existing temp directory
mkdir -p "$TEMP_DIR" || cleanup_and_exit 1
echo "Downloading resources from Google Drive..."
echo "This may take some time depending on your internet connection."
# Attempt to download with gdown with more robust error handling
echo "Attempting primary download method with gdown..."
if ! gdown --folder --continue --id $MAIN_FOLDER_ID --output $TEMP_DIR; then
echo "Initial download attempt failed. Trying alternate gdown method..."
if ! gdown --folder --continue --id $MAIN_FOLDER_ID --output $TEMP_DIR --remaining-ok; then
display_error_and_exit "Automated download failed. This is often due to Google Drive limitations or permission settings." "Please download the files manually:
1. Visit this link in your browser:
https://drive.google.com/drive/folders/1k-amlOwNQEWGx751MtWZc4SbZCUs8iqK
2. Download the entire folder structure:
- Download 'data/' directory and place it in the project root
- Download 'experiments/' directory and place it in the project root
3. Ensure you maintain the exact directory structure from Google Drive:
- data/encoders/ should contain encoder files
- data/decoders/ should contain decoder files
- experiments/models/ should contain model weights"
fi
fi
# Verify that critical folders were downloaded before proceeding
echo "Verifying downloaded files..."
# Check for expected top-level directories
if [ ! -d "$TEMP_DIR/data" ] || [ ! -d "$TEMP_DIR/experiments" ]; then
display_error_and_exit "Download appears to be incomplete or incorrect." "The downloaded content doesn't contain expected directories (data and experiments).
Please download the files manually:
1. Visit https://drive.google.com/drive/folders/1k-amlOwNQEWGx751MtWZc4SbZCUs8iqK
2. Download both the 'data/' and 'experiments/' directories
3. Place them in the project root directory, maintaining the exact same structure"
fi
# Now that we've verified the download contains the expected structure,
# we can copy all the files to their proper locations
# Create necessary directories if they don't exist
mkdir -p data/encoders data/decoders experiments/models || cleanup_and_exit 1
echo "Organizing downloaded files..."
# Define a function to copy directories recursively, preserving structure
copy_directory_contents() {
local src_dir="$1"
local dst_dir="$2"
echo "Copying contents from $src_dir to $dst_dir..."
# Create destination directory if it doesn't exist
mkdir -p "$dst_dir"
# Copy all files from source to destination
find "$src_dir" -type f | while read file; do
# Get the relative path from source directory
rel_path="${file#$src_dir/}"
dst_file="$dst_dir/$rel_path"
dst_path=$(dirname "$dst_file")
# Create destination directory if needed
if [ ! -d "$dst_path" ]; then
mkdir -p "$dst_path"
fi
# Copy the file
echo " - Copying: $rel_path"
cp "$file" "$dst_file" || echo "Warning: Could not copy file $rel_path"
done
}
# Copy data directory
if [ -d "$TEMP_DIR/data" ]; then
copy_directory_contents "$TEMP_DIR/data" "data"
fi
# Copy experiments directory
if [ -d "$TEMP_DIR/experiments" ]; then
copy_directory_contents "$TEMP_DIR/experiments" "experiments"
fi
# Verify that critical files were copied to their destinations
echo "Verifying file organization..."
MISSING_FILES=0
# Check for encoder files
if [ ! "$(ls -A data/encoders 2>/dev/null)" ] || [ "$(ls -A data/encoders 2>/dev/null | grep -v .gitkeep)" = "" ]; then
echo "ERROR: No encoder files found in data/encoders/"
MISSING_FILES=1
fi
# Check for decoder files
if [ ! "$(ls -A data/decoders 2>/dev/null)" ] || [ "$(ls -A data/decoders 2>/dev/null | grep -v .gitkeep)" = "" ]; then
echo "ERROR: No decoder files found in data/decoders/"
MISSING_FILES=1
fi
# Check for model weights
if [ ! "$(find experiments/models -type f -name "*.ckpt" -o -name "*.pt" -o -name "*.pth" 2>/dev/null)" ]; then
echo "ERROR: No model weight files found in experiments/models/"
MISSING_FILES=1
fi
if [ $MISSING_FILES -eq 1 ]; then
display_error_and_exit "Failed to organize critical files properly." "Some essential files could not be copied to their appropriate destinations.
Please download the files manually:
1. Visit https://drive.google.com/drive/folders/1k-amlOwNQEWGx751MtWZc4SbZCUs8iqK
2. Download both the 'data/' and 'experiments/' directories
3. Place them in the project root directory, maintaining the following structure:
- data/encoders/ - should contain encoder files
- data/decoders/ - should contain decoder files
- experiments/models/ - should contain model weight files"
fi
# Print final directory structure for verification
echo ""
echo "=== Setup Complete ==="
echo "All resources have been successfully downloaded and organized."
echo "You can now train or run inference with DrugGEN."
echo ""
# Cleanup temporary files
cleanup_and_exit 0