Diff of /source/setup.sh [000000] .. [9271c5]

Switch to side-by-side view

--- a
+++ b/source/setup.sh
@@ -0,0 +1,136 @@
+#!/bin/bash -e
+
+export AWS_DEFAULT_OUTPUT=text
+
+create_stack() {
+  local stack_name=${1}
+  local template_name=${2}
+  local ResourcePrefix=${3}
+
+  local ResourcePrefix_lowercase=$(echo ${ResourcePrefix} | tr '[:upper:]' '[:lower:]')
+  
+  aws cloudformation create-stack --stack-name ${stack_name} --template-body file://${template_name} --parameters ParameterKey=ResourcePrefix,ParameterValue=${ResourcePrefix} ParameterKey=ResourcePrefixLowercase,ParameterValue=${ResourcePrefix_lowercase} --capabilities CAPABILITY_NAMED_IAM --no-enable-termination-protection; aws cloudformation wait stack-create-complete --stack-name ${stack_name}
+}
+
+clone_and_commit() {
+  local stack_name=${1}
+
+  local repo_http_url=$(aws cloudformation describe-stacks --stack-name ${stack_name} --query 'Stacks[].Outputs[?OutputKey==`RepoHttpUrl`].OutputValue')
+
+  git init .; git remote add origin ${repo_http_url}
+
+  git add *; git commit -m "first commit"; git push --set-upstream origin master
+
+}
+
+wait_for_pipeline() {
+  local pipeline_name=${1}
+  local commit_id=${2}
+
+  local message="Max attempts reached. Pipeline execution failed for commit: ${commit_id}"
+  for i in {1..60}; do
+
+    stage_status=$(aws codepipeline list-pipeline-executions --pipeline-name ${pipeline_name} --query 'pipelineExecutionSummaries[?sourceRevisions[0].revisionId==`'${commit_id}'`].status')
+
+    if [ "${stage_status}" == "InProgress" ] || [ -z "${stage_status}" ]; then
+      printf '.'
+      sleep 30
+    elif [ "${stage_status}" == "Succeeded" ]; then
+      message="CodePipeline execution succeeded for commit: ${commit_id}"
+      break
+    elif [ "${stage_status}" == "Failed" ]; then
+      message="CodePipeline execution Failed for commit: ${commit_id}"
+      break
+    fi
+
+  done
+  printf "\n${message}\n"
+  if [ "${stage_status}" == "Failed" ]; then exit 1; fi
+}
+
+copy_unpack_zip() {
+  local source_artifact=${1}
+  local dest_prefix=${2}
+
+  echo "Unpacking ${source_artifact} to ${dest_prefix}"
+  aws s3 cp ${source_artifact} ./temporary.zip
+  mkdir stage
+  pushd stage; unzip ../temporary.zip; popd
+  aws s3 sync stage/ ${dest_prefix}
+  rm -rf stage temporary.zip
+}
+
+copy_and_upload() {
+  local source_artifact=${1}
+  local dest_artifact=${2}
+  local filename=${3}
+
+  aws s3 cp ${source_artifact} ${filename}
+  aws s3 cp ${filename} ${dest_artifact}
+  rm ${filename}
+}
+
+copy_test_data() {
+  local artifact_bucket=${1}
+  local artifact_key_prefix=${2}
+  local pipe_stackname=${3}
+
+  local data_lake_bucket=$(aws cloudformation describe-stacks --stack-name ${pipe_stackname} --query 'Stacks[].Outputs[?OutputKey==`DataLakeBucket`].OutputValue' --output text)
+
+  
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-clinical.zip s3://${data_lake_bucket}/
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-cnv.zip s3://${data_lake_bucket}/
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-expression.zip s3://${data_lake_bucket}/
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-mutation.zip s3://${data_lake_bucket}/
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcia-metadata.zip s3://${data_lake_bucket}/
+  copy_unpack_zip s3://${artifact_bucket}/${artifact_key_prefix}/tcga/tcga-summary.zip s3://${data_lake_bucket}/
+  
+  copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/annotation/clinvar/clinvar.vcf.gz s3://${data_lake_bucket}/annotation/vcf/clinvar/clinvar.vcf.gz clinvar.vcf.gz 
+  copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/vcf/variants.vcf.gz s3://${data_lake_bucket}/variants/vcf/variants.vcf.gz variants.vcf.gz
+  copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz s3://${data_lake_bucket}/variants/1kg/ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz ALL.chr22.shapeit2_integrated_snvindels_v2a_27022019.GRCh38.phased.filtNA.vcf.gz  
+  copy_and_upload s3://${artifact_bucket}/${artifact_key_prefix}/references/hg38/Homo_sapiens_assembly38.fasta s3://${data_lake_bucket}/references/hg38/Homo_sapiens_assembly38.fasta Homo_sapiens_assembly38.fasta 
+}
+
+setup() {
+
+  local resource_prefix=$1
+  local artifact_bucket=$2
+  local artifact_key_prefix=$3
+
+  local dir_prefix="GenomicsAnalysis"
+
+  local zone_dir="${dir_prefix}Zone"
+  local pipe_dir="${dir_prefix}Pipe"
+  local code_dir="${dir_prefix}Code"
+
+  local zone_stackname=${resource_prefix}-LandingZone
+  local pipe_stackname=${resource_prefix}-Pipeline
+
+  # Create stacks
+  create_stack "${zone_stackname}" "${zone_dir}/zone_cfn.yml" "${resource_prefix}"
+  create_stack "${pipe_stackname}" "${pipe_dir}/pipe_cfn.yml" "${resource_prefix}"
+
+  # Clone and commit resources
+  cd "${pipe_dir}"; clone_and_commit "${zone_stackname}"; cd ..
+  cd "${code_dir}"; clone_and_commit "${pipe_stackname}";
+
+  # Get the last commit id
+  commit_id=$(git log -1 --pretty=format:%H)
+  cd ..
+
+  # Get pipeline name
+  pipeline_name=$(aws cloudformation describe-stack-resource --stack-name ${pipe_stackname} --logical-resource-id CodePipeline --query 'StackResourceDetail.PhysicalResourceId')
+
+  # Copy Test Data
+  copy_test_data "${artifact_bucket}" "${artifact_key_prefix}" "${pipe_stackname}"
+
+  # Wait for pipeline execution using commit id
+  wait_for_pipeline "${pipeline_name}" "${commit_id}"
+
+  # Run Crawlers for TCGA data
+  "${code_dir}/run_crawlers.sh" "${resource_prefix}"
+}
+
+project_name=${PROJECT_NAME:-GenomicsAnalysis}
+
+setup "$project_name" "${ARTIFACT_BUCKET}" "${ARTIFACT_KEY_PREFIX}"