[9271c5]: / source / setup.sh

Download this file

137 lines (99 with data), 5.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
#!/bin/bash -e
export AWS_DEFAULT_OUTPUT=text
create_stack() {
local stack_name=${1}
local template_name=${2}
local ResourcePrefix=${3}
local ResourcePrefix_lowercase=$(echo ${ResourcePrefix} | tr '[:upper:]' '[:lower:]')
aws cloudformation create-stack --stack-name ${stack_name} --template-body file://${template_name} --parameters ParameterKey=ResourcePrefix,ParameterValue=${ResourcePrefix} ParameterKey=ResourcePrefixLowercase,ParameterValue=${ResourcePrefix_lowercase} --capabilities CAPABILITY_NAMED_IAM --no-enable-termination-protection; aws cloudformation wait stack-create-complete --stack-name ${stack_name}
}
clone_and_commit() {
local stack_name=${1}
local repo_http_url=$(aws cloudformation describe-stacks --stack-name ${stack_name} --query 'Stacks[].Outputs[?OutputKey==`RepoHttpUrl`].OutputValue')
git init .; git remote add origin ${repo_http_url}
git add *; git commit -m "first commit"; git push --set-upstream origin master
}
wait_for_pipeline() {
local pipeline_name=${1}
local commit_id=${2}
local message="Max attempts reached. Pipeline execution failed for commit: ${commit_id}"
for i in {1..60}; do
stage_status=$(aws codepipeline list-pipeline-executions --pipeline-name ${pipeline_name} --query 'pipelineExecutionSummaries[?sourceRevisions[0].revisionId==`'${commit_id}'`].status')
if [ "${stage_status}" == "InProgress" ] || [ -z "${stage_status}" ]; then
printf '.'
sleep 30
elif [ "${stage_status}" == "Succeeded" ]; then
message="CodePipeline execution succeeded for commit: ${commit_id}"
break
elif [ "${stage_status}" == "Failed" ]; then
message="CodePipeline execution Failed for commit: ${commit_id}"
break
fi
done
printf "\n${message}\n"
if [ "${stage_status}" == "Failed" ]; then exit 1; fi
}
copy_unpack_zip() {
local source_artifact=${1}
local dest_prefix=${2}
echo "Unpacking ${source_artifact} to ${dest_prefix}"
aws s3 cp ${source_artifact} ./temporary.zip
mkdir stage
pushd stage; unzip ../temporary.zip; popd
aws s3 sync stage/ ${dest_prefix}
rm -rf stage temporary.zip
}
copy_and_upload() {
local source_artifact=${1}
local dest_artifact=${2}
local filename=${3}
aws s3 cp ${source_artifact} ${filename}
aws s3 cp ${filename} ${dest_artifact}
rm ${filename}
}
copy_test_data() {
local artifact_bucket=${1}
local artifact_key_prefix=${2}
local pipe_stackname=${3}
local data_lake_bucket=$(aws cloudformation describe-stacks --stack-name ${pipe_stackname} --query 'Stacks[].Outputs[?OutputKey==`DataLakeBucket`].OutputValue' --output text)
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-clinical.zip s3://${data_lake_bucket}/
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-cnv.zip s3://${data_lake_bucket}/
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-expression.zip s3://${data_lake_bucket}/
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-mutation.zip s3://${data_lake_bucket}/
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcia-metadata.zip s3://${data_lake_bucket}/
copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-summary.zip s3://${data_lake_bucket}/
copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/annotation/clinvar/clinvar.vcf.gz s3://${data_lake_bucket}/annotation/vcf/clinvar/clinvar.vcf.gz clinvar.vcf.gz
copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/vcf/variants.vcf.gz s3://${data_lake_bucket}/variants/vcf/variants.vcf.gz variants.vcf.gz
copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz s3://${data_lake_bucket}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz
copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/references/hg38/Homo_sapiens_assembly38.fasta s3://${data_lake_bucket}/references/hg38/Homo_sapiens_assembly38.fasta Homo_sapiens_assembly38.fasta
}
setup() {
local resource_prefix=$1
local artifact_bucket=$2
local artifact_key_prefix=$3
local dir_prefix="GenomicsAnalysis"
local zone_dir="${dir_prefix}Zone"
local pipe_dir="${dir_prefix}Pipe"
local code_dir="${dir_prefix}Code"
local zone_stackname=${resource_prefix}-LandingZone
local pipe_stackname=${resource_prefix}-Pipeline
# Create stacks
create_stack "${zone_stackname}" "${zone_dir}/zone_cfn.yml" "${resource_prefix}"
create_stack "${pipe_stackname}" "${pipe_dir}/pipe_cfn.yml" "${resource_prefix}"
# Clone and commit resources
cd "${pipe_dir}"; clone_and_commit "${zone_stackname}"; cd ..
cd "${code_dir}"; clone_and_commit "${pipe_stackname}";
# Get the last commit id
commit_id=$(git log -1 --pretty=format:%H)
cd ..
# Get pipeline name
pipeline_name=$(aws cloudformation describe-stack-resource --stack-name ${pipe_stackname} --logical-resource-id CodePipeline --query 'StackResourceDetail.PhysicalResourceId')
# Copy Test Data
copy_test_data "${artifact_bucket}" "${artifact_key_prefix}" "${pipe_stackname}"
# Wait for pipeline execution using commit id
wait_for_pipeline "${pipeline_name}" "${commit_id}"
# Run Crawlers for TCGA data
"${code_dir}/run_crawlers.sh" "${resource_prefix}"
}
project_name=${PROJECT_NAME:-GenomicsAnalysis}
setup "$project_name" "${ARTIFACT_BUCKET}" "${ARTIFACT_KEY_PREFIX}"