AWSTemplateFormatVersion: 2010-09-09
Description: GenomicsAnalysisCode
Parameters:
ResourcePrefix:
Type: String
Default: GenomicsAnalysis
ResourcePrefixLowercase:
Type: String
Default: genomicsanalysis
ResourcesBucket:
Type: String
DataLakeBucket:
Type: String
DatabaseAdministrator:
Type: String
Resources:
JobRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- glue.amazonaws.com
Action:
- sts:AssumeRole
Path: /
ManagedPolicyArns:
- arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
Policies:
- PolicyName: s3_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- athena:StartQueryExecution
- athena:GetQueryExecution
- athena:GetQueryResults
Resource:
- !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}*
- Effect: Allow
Action:
- s3:GetObject
- s3:ListBucket
Resource:
- !Sub arn:aws:s3:::${ResourcesBucket}
- !Sub arn:aws:s3:::${ResourcesBucket}/*
- Effect: Allow
Action:
- s3:PutObject
- s3:GetObject
- s3:ListBucket
- s3:DeleteObject
Resource:
- !Sub arn:aws:s3:::${DataLakeBucket}
- !Sub arn:aws:s3:::${DataLakeBucket}/*
- PolicyName: kms_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- kms:GenerateDataKey
- kms:Decrypt
- kms:Encrypt
Resource:
- !GetAtt DataCatalogEncryptionKey.Arn
RunbookRole:
Type: AWS::IAM::Role
Properties:
AssumeRolePolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Principal:
Service:
- sagemaker.amazonaws.com
Action:
- sts:AssumeRole
Path: /
Policies:
- PolicyName: logs_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- logs:CreateLogStream
- logs:DescribeLogStreams
- logs:CreateLogGroup
- logs:PutLogEvents
Resource:
- !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/sagemaker/*
- !Sub arn:aws:logs:${AWS::Region}:${AWS::AccountId}:log-group:/aws/sagemaker/*:log-stream:aws-glue-*
- PolicyName: s3_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- s3:ListBucket
- s3:GetBucketLocation
Resource:
- !Sub arn:aws:s3:::${DataLakeBucket}
- !Sub arn:aws:s3:::${ResourcesBucket}
- Effect: Allow
Action:
- s3:GetObject
- s3:GetObjectAcl
- s3:PutObject
- s3:DeleteObject
Resource:
- !Sub arn:aws:s3:::${DataLakeBucket}/*
- Effect: Allow
Action:
- s3:GetObject
Resource:
- !Sub arn:aws:s3:::${ResourcesBucket}/*
- PolicyName: glue_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- glue:StartCrawler
- glue:StartJobRun
- glue:StartTrigger
Resource:
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:crawler/${ResourcePrefixLowercase}*
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:job/${ResourcePrefixLowercase}*
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:trigger/${ResourcePrefixLowercase}*
- Effect: Allow
Action:
- kms:GenerateDataKey
- kms:Decrypt
- kms:Encrypt
Resource:
- !GetAtt DataCatalogEncryptionKey.Arn
- PolicyName: glue_table_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- glue:GetDatabases
- glue:GetDatabase
- glue:GetTables
- glue:GetTable
- lakeformation:GetDataAccess
Resource: '*'
- Effect: Allow
Action:
- glue:CreateDatabase
Resource:
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/default
- Effect: Allow
Action:
- glue:GetTable
- glue:GetTables
- glue:CreateTable
- glue:UpdateTable
- glue:DeleteTable
- glue:GetDatabase
- glue:GetPartition
- glue:GetPartitions
- glue:GetDevEndpoint
- glue:GetDevEndpoints
- glue:UpdateDevEndpoint
Resource:
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:catalog
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:database/${ResourcePrefixLowercase}
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:table/${ResourcePrefixLowercase}/*
- !Sub arn:aws:glue:${AWS::Region}:${AWS::AccountId}:devEndpoint/*
- PolicyName: athena_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- athena:StartQueryExecution
- athena:GetQueryExecution
- athena:GetQueryResults
- athena:GetWorkGroup
Resource:
- !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/${ResourcePrefixLowercase}-${AWS::Region}
- PolicyName: cfn_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- cloudformation:DescribeStacks
Resource:
- !Sub arn:aws:cloudformation:${AWS::Region}:${AWS::AccountId}:stack/${ResourcePrefix}*
- PolicyName: kms_access
PolicyDocument:
Version: 2012-10-17
Statement:
- Effect: Allow
Action:
- kms:GenerateDataKey
- kms:Decrypt
- kms:Encrypt
Resource:
- !GetAtt DataCatalogEncryptionKey.Arn
WorkGroup:
Type: AWS::Athena::WorkGroup
Properties:
Description: !Sub ${ResourcePrefixLowercase}
Name: !Sub ${ResourcePrefixLowercase}-${AWS::Region}
RecursiveDeleteOption: True
WorkGroupConfiguration:
EngineVersion:
EffectiveEngineVersion: "Athena engine version 3"
SelectedEngineVersion: "Athena engine version 3"
ResultConfiguration:
OutputLocation: !Sub s3://${DataLakeBucket}/results
DataCatalogEncryptionKey:
DeletionPolicy: Retain
Type: AWS::KMS::Key
Properties:
Description: KMS key used to encrypt the Glue data catalog
Enabled: True
EnableKeyRotation: True
KeyPolicy: !Sub |
{
"Version": "2012-10-17",
"Id": "TestGlueCatalogEncryptionKeyPolicy",
"Statement": [
{
"Sid": "Enable IAM User Permissions",
"Effect": "Allow",
"Principal": {
"AWS": [
"arn:aws:iam::${AWS::AccountId}:root",
"${DatabaseAdministrator}"
]
},
"Action": "kms:*",
"Resource": "arn:aws:kms:${AWS::Region}:${AWS::AccountId}:key/*"
},
{
"Sid": "Allow access for Key Administrators",
"Effect": "Allow",
"Principal": {
"AWS": [
"arn:aws:iam::${AWS::AccountId}:root",
"${DatabaseAdministrator}"
]
},
"Action": [
"kms:Create*",
"kms:Describe*",
"kms:Enable*",
"kms:List*",
"kms:Put*",
"kms:Update*",
"kms:Revoke*",
"kms:Disable*",
"kms:Get*",
"kms:Delete*",
"kms:TagResource",
"kms:UntagResource",
"kms:ScheduleKeyDeletion",
"kms:CancelKeyDeletion"
],
"Resource": "*"
},
{
"Sid": "Allow use of the key",
"Effect": "Allow",
"Principal": {
"Service": "logs.${AWS::Region}.amazonaws.com"
},
"Action": [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
],
"Resource": "*"
},
{
"Sid": "Allow use of the key",
"Effect": "Allow",
"Principal": "*",
"Action": [
"kms:Encrypt",
"kms:Decrypt",
"kms:ReEncrypt*",
"kms:GenerateDataKey*",
"kms:DescribeKey"
],
"Resource": "*",
"Condition": {
"ArnEquals": {
"aws:PrincipalARN": "arn:aws:iam::${AWS::AccountId}:role/${ResourcePrefix}*"
}
}
}
]
}
DataCatalogEncryptionSettings:
Type: AWS::Glue::DataCatalogEncryptionSettings
DependsOn: DataCatalogEncryptionKey
Properties:
CatalogId: !Ref AWS::AccountId
DataCatalogEncryptionSettings:
EncryptionAtRest:
CatalogEncryptionMode: SSE-KMS
SseAwsKmsKeyId: !Ref DataCatalogEncryptionKey
SecurityConfiguration:
Type: AWS::Glue::SecurityConfiguration
Properties:
EncryptionConfiguration:
CloudWatchEncryption:
CloudWatchEncryptionMode: SSE-KMS
KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
JobBookmarksEncryption:
JobBookmarksEncryptionMode: CSE-KMS
KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
S3Encryptions:
- S3EncryptionMode: SSE-KMS
KmsKeyArn: !GetAtt DataCatalogEncryptionKey.Arn
Name: !Sub ${ResourcePrefix}SecurityConfiguration
DataCatalog:
Type: AWS::Glue::Database
DependsOn: DataCatalogEncryptionKey
Properties:
CatalogId: !Ref AWS::AccountId
DatabaseInput:
Name: !Sub ${ResourcePrefixLowercase}
Description: Data catalog for Human NGS Tertiary Analysis and Data Lakes solution
RunbookLifecycle:
Type: AWS::SageMaker::NotebookInstanceLifecycleConfig
Properties:
NotebookInstanceLifecycleConfigName: !Sub ${ResourcePrefixLowercase}Runbook
OnStart:
- Content: !Base64
Fn::Sub: |
#!/bin/bash
cd /home/ec2-user/SageMaker
set -e
aws s3 sync s3://${ResourcesBucket}/notebooks .
chmod 666 *.ipynb
echo "export RESOURCE_PREFIX='${ResourcePrefix}'" > /home/ec2-user/anaconda3/envs/python3/etc/conda/activate.d/env_vars.sh
Runbook:
Type: AWS::SageMaker::NotebookInstance
Properties:
NotebookInstanceName: !Sub ${ResourcePrefixLowercase}Runbook
InstanceType: ml.t2.medium
LifecycleConfigName: !GetAtt RunbookLifecycle.NotebookInstanceLifecycleConfigName
RoleArn: !GetAtt RunbookRole.Arn
PlatformIdentifier: notebook-al2-v2
Outputs:
DataCatalogEncryptionKeyArn:
Value: !GetAtt DataCatalogEncryptionKey.Arn
Export:
Name: !Sub "${ResourcePrefix}-DataCatalogEncryptionKeyArn"