--- a +++ b/source/setup.sh @@ -0,0 +1,136 @@ +#!/bin/bash -e + +export AWS_DEFAULT_OUTPUT=text + +create_stack() { + local stack_name=${1} + local template_name=${2} + local ResourcePrefix=${3} + + local ResourcePrefix_lowercase=$(echo ${ResourcePrefix} | tr '[:upper:]' '[:lower:]') + + aws cloudformation create-stack --stack-name ${stack_name} --template-body file://${template_name} --parameters ParameterKey=ResourcePrefix,ParameterValue=${ResourcePrefix} ParameterKey=ResourcePrefixLowercase,ParameterValue=${ResourcePrefix_lowercase} --capabilities CAPABILITY_NAMED_IAM --no-enable-termination-protection; aws cloudformation wait stack-create-complete --stack-name ${stack_name} +} + +clone_and_commit() { + local stack_name=${1} + + local repo_http_url=$(aws cloudformation describe-stacks --stack-name ${stack_name} --query 'Stacks[].Outputs[?OutputKey==`RepoHttpUrl`].OutputValue') + + git init .; git remote add origin ${repo_http_url} + + git add *; git commit -m "first commit"; git push --set-upstream origin master + +} + +wait_for_pipeline() { + local pipeline_name=${1} + local commit_id=${2} + + local message="Max attempts reached. Pipeline execution failed for commit: ${commit_id}" + for i in {1..60}; do + + stage_status=$(aws codepipeline list-pipeline-executions --pipeline-name ${pipeline_name} --query 'pipelineExecutionSummaries[?sourceRevisions[0].revisionId==`'${commit_id}'`].status') + + if [ "${stage_status}" == "InProgress" ] || [ -z "${stage_status}" ]; then + printf '.' + sleep 30 + elif [ "${stage_status}" == "Succeeded" ]; then + message="CodePipeline execution succeeded for commit: ${commit_id}" + break + elif [ "${stage_status}" == "Failed" ]; then + message="CodePipeline execution Failed for commit: ${commit_id}" + break + fi + + done + printf "\n${message}\n" + if [ "${stage_status}" == "Failed" ]; then exit 1; fi +} + +copy_unpack_zip() { + local source_artifact=${1} + local dest_prefix=${2} + + echo "Unpacking ${source_artifact} to ${dest_prefix}" + aws s3 cp ${source_artifact} ./temporary.zip + mkdir stage + pushd stage; unzip ../temporary.zip; popd + aws s3 sync stage/ ${dest_prefix} + rm -rf stage temporary.zip +} + +copy_and_upload() { + local source_artifact=${1} + local dest_artifact=${2} + local filename=${3} + + aws s3 cp ${source_artifact} ${filename} + aws s3 cp ${filename} ${dest_artifact} + rm ${filename} +} + +copy_test_data() { + local artifact_bucket=${1} + local artifact_key_prefix=${2} + local pipe_stackname=${3} + + local data_lake_bucket=$(aws cloudformation describe-stacks --stack-name ${pipe_stackname} --query 'Stacks[].Outputs[?OutputKey==`DataLakeBucket`].OutputValue' --output text) + + + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-clinical.zip s3://${data_lake_bucket}/ + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-cnv.zip s3://${data_lake_bucket}/ + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-expression.zip s3://${data_lake_bucket}/ + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-mutation.zip s3://${data_lake_bucket}/ + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcia-metadata.zip s3://${data_lake_bucket}/ + copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-summary.zip s3://${data_lake_bucket}/ + + copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/annotation/clinvar/clinvar.vcf.gz s3://${data_lake_bucket}/annotation/vcf/clinvar/clinvar.vcf.gz clinvar.vcf.gz + copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/vcf/variants.vcf.gz s3://${data_lake_bucket}/variants/vcf/variants.vcf.gz variants.vcf.gz + copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz s3://${data_lake_bucket}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz + copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/references/hg38/Homo_sapiens_assembly38.fasta s3://${data_lake_bucket}/references/hg38/Homo_sapiens_assembly38.fasta Homo_sapiens_assembly38.fasta +} + +setup() { + + local resource_prefix=$1 + local artifact_bucket=$2 + local artifact_key_prefix=$3 + + local dir_prefix="GenomicsAnalysis" + + local zone_dir="${dir_prefix}Zone" + local pipe_dir="${dir_prefix}Pipe" + local code_dir="${dir_prefix}Code" + + local zone_stackname=${resource_prefix}-LandingZone + local pipe_stackname=${resource_prefix}-Pipeline + + # Create stacks + create_stack "${zone_stackname}" "${zone_dir}/zone_cfn.yml" "${resource_prefix}" + create_stack "${pipe_stackname}" "${pipe_dir}/pipe_cfn.yml" "${resource_prefix}" + + # Clone and commit resources + cd "${pipe_dir}"; clone_and_commit "${zone_stackname}"; cd .. + cd "${code_dir}"; clone_and_commit "${pipe_stackname}"; + + # Get the last commit id + commit_id=$(git log -1 --pretty=format:%H) + cd .. + + # Get pipeline name + pipeline_name=$(aws cloudformation describe-stack-resource --stack-name ${pipe_stackname} --logical-resource-id CodePipeline --query 'StackResourceDetail.PhysicalResourceId') + + # Copy Test Data + copy_test_data "${artifact_bucket}" "${artifact_key_prefix}" "${pipe_stackname}" + + # Wait for pipeline execution using commit id + wait_for_pipeline "${pipeline_name}" "${commit_id}" + + # Run Crawlers for TCGA data + "${code_dir}/run_crawlers.sh" "${resource_prefix}" +} + +project_name=${PROJECT_NAME:-GenomicsAnalysis} + +setup "$project_name" "${ARTIFACT_BUCKET}" "${ARTIFACT_KEY_PREFIX}"