a b/source/TCIA_etl.yaml
1
# Copyright Amazon.com, Inc. or its affiliates. All Rights Reserved.
2
# SPDX-License-Identifier: MIT-0
3
#
4
# Permission is hereby granted, free of charge, to any person
5
# obtaining a copy of this software and associated documentation files
6
# (the "Software"), to deal in the Software without restriction,
7
# including without limitation the rights to use, copy, modify, merge,
8
# publish, distribute, sublicense, and/or sell copies of the Software,
9
# and to permit persons to whom the Software is furnished to do so.
10
#
11
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
12
# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
13
# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
14
# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
15
# BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
16
# ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
17
# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
18
# SOFTWARE.
19
20
AWSTemplateFormatVersion: '2010-09-09'
21
Description: |
22
  This CloudFormation Template deploys Glue jobs and crawlers for TCGA
23
  data
24
25
Parameters:
26
  ResourcesBucket:
27
    Type: String
28
29
  ResourcePrefix:
30
    Type: String
31
    
32
  ResourcePrefixLowercase:
33
    Type: String
34
    
35
  DatabaseName:
36
    Type: String
37
    Default: AUTO
38
    Description: |
39
      If not AUTO, references an existing Glue database for crawlers
40
      to create tables in.
41
    
42
  DataLakeBucket:
43
    Description: |
44
      S3 bucket where results will be written. Bucketname needs to be
45
      unique. The bucket name must respect the S3 bucket naming
46
      conventions (can contain lowercase letters, numbers, periods and
47
      hyphens).
48
    Type: String
49
    AllowedPattern: "((?=^.{3,63}$)(?!^(\\d+\\.)+\\d+$)(^(([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])\\.)*([a-z0-9]|[a-z0-9][a-z0-9\\-]*[a-z0-9])$)|(^.{0}$))"
50
    
51
  ExistingBucket:
52
    Description: Is this an existing bucket?
53
    Type: String
54
    AllowedValues:
55
      - "Yes"
56
      - "No"
57
    Default: "No"
58
59
Conditions:
60
  BucketDoesNotExist:
61
    Fn::Equals:
62
      - !Ref ExistingBucket
63
      - "No"
64
65
  NeedsGlueDatabase: !Equals [!Ref DatabaseName, "AUTO"]
66
67
Resources:
68
69
  TCGAS3Bucket:
70
    Type: AWS::S3::Bucket
71
    Condition: BucketDoesNotExist
72
    DeletionPolicy: Retain
73
    UpdateReplacePolicy: Retain
74
    Properties:
75
      BucketName: !Ref DataLakeBucket
76
      BucketEncryption:
77
        ServerSideEncryptionConfiguration:
78
          - ServerSideEncryptionByDefault:
79
             SSEAlgorithm: AES256
80
81
  GlueJobRole:
82
    Type: AWS::IAM::Role
83
    Properties:
84
      AssumeRolePolicyDocument:
85
        Version: "2012-10-17"
86
        Statement:
87
          - Effect: "Allow"
88
            Principal:
89
              Service: "glue.amazonaws.com"
90
            Action: "sts:AssumeRole"
91
      Path: "/"
92
      ManagedPolicyArns:
93
        - arn:aws:iam::aws:policy/service-role/AWSGlueServiceRole
94
      Policies:
95
        - PolicyName: athena_access
96
          PolicyDocument:
97
            Version: 2012-10-17
98
            Statement:
99
              - Effect: Allow
100
                Action:
101
                  - athena:StartQueryExecution
102
                  - athena:GetQueryExecution
103
                  - athena:GetQueryResults
104
                Resource:
105
                  - !Sub arn:aws:athena:${AWS::Region}:${AWS::AccountId}:workgroup/primary
106
        - PolicyName: kms_access
107
          PolicyDocument:
108
            Version: 2012-10-17
109
            Statement:
110
              - Effect: Allow
111
                Action:
112
                  - kms:GenerateDataKey
113
                  - kms:Decrypt
114
                  - kms:Encrypt
115
                Resource:
116
                  - !ImportValue
117
                      Fn::Sub: '${ResourcePrefix}-DataCatalogEncryptionKeyArn'
118
        - PolicyName: "CrawlerAccess"
119
          PolicyDocument:
120
            Version: "2012-10-17"
121
            Statement:
122
              - Effect: "Allow"
123
                Action:
124
                  - s3:PutObject
125
                  - s3:GetObject
126
                  - s3:ListBucket
127
                  - s3:DeleteObject
128
                Resource:
129
                  - !Sub 'arn:aws:s3:::${DataLakeBucket}'
130
                  - !Sub 'arn:aws:s3:::${DataLakeBucket}/*'
131
              - Effect: "Allow"
132
                Action:
133
                  - s3:GetObject
134
                  - s3:ListBucket
135
                Resource:
136
                  - !Sub 'arn:aws:s3:::${ResourcesBucket}'
137
                  - !Sub 'arn:aws:s3:::${ResourcesBucket}/*'
138
      
139
  TcgaLuadExpressionGlueJob:
140
    Type: AWS::Glue::Job
141
    Properties:
142
      Command: 
143
        Name: glueetl
144
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
145
      DefaultArguments:
146
        "--output_bucket": !Ref 'DataLakeBucket'
147
        "--project": "TCGA-LUAD"
148
        "--data_type": "Gene Expression Quantification"
149
      GlueVersion: "2.0"
150
      ExecutionProperty:
151
        MaxConcurrentRuns: 2
152
      MaxRetries: 0
153
      Role: !Ref GlueJobRole
154
  
155
  TcgaLuadMutationGlueJob:
156
    Type: AWS::Glue::Job
157
    Properties:
158
      Command:
159
        Name: glueetl
160
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
161
      DefaultArguments:
162
        "--output_bucket": !Ref 'DataLakeBucket'
163
        "--project": "TCGA-LUAD"
164
        "--data_type": "Masked Somatic Mutation"
165
      GlueVersion: "2.0"
166
      ExecutionProperty:
167
        MaxConcurrentRuns: 2
168
      MaxRetries: 0
169
      Role: !Ref GlueJobRole
170
   
171
  TcgaLuadCnvGlueJob:
172
    Type: AWS::Glue::Job
173
    Properties:
174
      Command: 
175
        Name: glueetl
176
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
177
      DefaultArguments:
178
        "--output_bucket": !Ref 'DataLakeBucket'
179
        "--project": "TCGA-LUAD"
180
        "--data_type": "Gene Level Copy Number"
181
      GlueVersion: "2.0"
182
      ExecutionProperty:
183
        MaxConcurrentRuns: 2
184
      MaxRetries: 0
185
      Role: !Ref GlueJobRole
186
187
  TcgaLuadClinicalGlueJob:
188
    Type: AWS::Glue::Job
189
    Properties:
190
      Command: 
191
        Name: glueetl
192
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
193
      DefaultArguments:
194
        "--output_bucket": !Ref 'DataLakeBucket'
195
        "--project": "TCGA-LUAD"
196
        "--data_type": "Clinical Supplement"
197
      GlueVersion: "2.0"
198
      ExecutionProperty:
199
        MaxConcurrentRuns: 2
200
      MaxRetries: 0
201
      Role: !Ref GlueJobRole
202
203
  TcgaLuadImagingMetadataGlueJob:
204
    Type: AWS::Glue::Job
205
    Properties:
206
      Command:
207
        Name: glueetl
208
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/image_api_glue.py"
209
      DefaultArguments:
210
        "--output_bucket": !Ref 'DataLakeBucket'
211
        "--project": "TCGA-LUAD"
212
      GlueVersion: "2.0"
213
      ExecutionProperty:
214
        MaxConcurrentRuns: 2
215
      MaxRetries: 0
216
      Role: !Ref GlueJobRole
217
      
218
  TcgaLuscExpressionGlueJob:
219
    Type: AWS::Glue::Job
220
    Properties:
221
      Command: 
222
        Name: glueetl
223
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
224
      DefaultArguments:
225
        "--output_bucket": !Ref 'DataLakeBucket'
226
        "--project": "TCGA-LUSC"
227
        "--data_type": "Gene Expression Quantification"
228
      GlueVersion: "2.0"
229
      ExecutionProperty:
230
        MaxConcurrentRuns: 2
231
      MaxRetries: 0
232
      Role: !Ref GlueJobRole
233
  
234
  TcgaLuscMutationGlueJob:
235
    Type: AWS::Glue::Job
236
    Properties:
237
      Command:
238
        Name: glueetl
239
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
240
      DefaultArguments:
241
        "--output_bucket": !Ref 'DataLakeBucket'
242
        "--project": "TCGA-LUSC"
243
        "--data_type": "Masked Somatic Mutation"
244
      GlueVersion: "2.0"
245
      ExecutionProperty:
246
        MaxConcurrentRuns: 2
247
      MaxRetries: 0
248
      Role: !Ref GlueJobRole
249
   
250
  TcgaLuscCnvGlueJob:
251
    Type: AWS::Glue::Job
252
    Properties:
253
      Command: 
254
        Name: glueetl
255
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
256
      DefaultArguments:
257
        "--output_bucket": !Ref 'DataLakeBucket'
258
        "--project": "TCGA-LUSC"
259
        "--data_type": "Gene Level Copy Number"
260
      GlueVersion: "2.0"
261
      ExecutionProperty:
262
        MaxConcurrentRuns: 2
263
      MaxRetries: 0
264
      Role: !Ref GlueJobRole
265
266
  TcgaLuscClinicalGlueJob:
267
    Type: AWS::Glue::Job
268
    Properties:
269
      Command: 
270
        Name: glueetl
271
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/tcga_etl_common_job.py"
272
      DefaultArguments:
273
        "--output_bucket": !Ref 'DataLakeBucket'
274
        "--project": "TCGA-LUSC"
275
        "--data_type": "Clinical Supplement"
276
      GlueVersion: "2.0"
277
      ExecutionProperty:
278
        MaxConcurrentRuns: 2
279
      MaxRetries: 0
280
      Role: !Ref GlueJobRole
281
282
  TcgaLuscImagingMetadataGlueJob:
283
    Type: AWS::Glue::Job
284
    Properties:
285
      Command:
286
        Name: glueetl
287
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/image_api_glue.py"
288
      DefaultArguments:
289
        "--output_bucket": !Ref 'DataLakeBucket'
290
        "--project": "TCGA-LUSC"
291
      GlueVersion: "2.0"
292
      ExecutionProperty:
293
        MaxConcurrentRuns: 2
294
      MaxRetries: 0
295
      Role: !Ref GlueJobRole
296
297
  TcgaSummaryGlueJob:
298
    Type: AWS::Glue::Job
299
    Properties:
300
      Command:
301
        Name: glueetl
302
        ScriptLocation: !Sub "s3://${ResourcesBucket}/scripts/create_tcga_summary.py"
303
      DefaultArguments:
304
        "--bucket": !Ref 'DataLakeBucket'
305
        "--workgroup": "primary"  # does not work when set to the solution wg
306
        "--database": !If
307
          - NeedsGlueDatabase
308
          - !Ref TcgaDb
309
          - !Ref DatabaseName
310
      GlueVersion: "2.0"
311
      ExecutionProperty:
312
        MaxConcurrentRuns: 2
313
      MaxRetries: 0
314
      Role: !Ref GlueJobRole
315
      
316
  TCGAMutationCrawler:
317
    Type: AWS::Glue::Crawler
318
    Properties: 
319
      Name: !Sub ${ResourcePrefixLowercase}-tcga-mut
320
      Role: !Ref GlueJobRole
321
      Description: AWS Glue crawler to crawl TCGA mutation data
322
      DatabaseName: !If
323
        - NeedsGlueDatabase
324
        - !Ref TcgaDb
325
        - !Ref DatabaseName
326
      Targets:
327
        S3Targets:
328
          - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-mutation']]
329
      SchemaChangePolicy:
330
        UpdateBehavior: "UPDATE_IN_DATABASE"
331
        DeleteBehavior: "LOG"
332
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
333
334
  TCGACNVCrawler:
335
    Type: AWS::Glue::Crawler
336
    Properties: 
337
      Name: !Sub ${ResourcePrefixLowercase}-tcga-cnv
338
      Role: !Ref GlueJobRole
339
      Description: AWS Glue crawler to crawl TCGA copy number data
340
      DatabaseName: !If
341
        - NeedsGlueDatabase
342
        - !Ref TcgaDb
343
        - !Ref DatabaseName
344
      Targets:
345
        S3Targets:
346
          - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-cnv']]
347
      SchemaChangePolicy:
348
        UpdateBehavior: "UPDATE_IN_DATABASE"
349
        DeleteBehavior: "LOG"
350
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
351
352
  TCGAExpressionCrawler:
353
    Type: AWS::Glue::Crawler
354
    Properties: 
355
      Name: !Sub ${ResourcePrefixLowercase}-tcga-exp
356
      Role: !Ref GlueJobRole
357
      Description: AWS Glue crawler to crawl TCGA expression data
358
      DatabaseName: !If
359
        - NeedsGlueDatabase
360
        - !Ref TcgaDb
361
        - !Ref DatabaseName
362
      Targets:
363
        S3Targets:
364
          - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-expression']]
365
      SchemaChangePolicy:
366
        UpdateBehavior: "UPDATE_IN_DATABASE"
367
        DeleteBehavior: "LOG"
368
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
369
370
  TCGAClinicalCrawler:
371
    Type: AWS::Glue::Crawler
372
    Properties: 
373
      Name: !Sub ${ResourcePrefixLowercase}-tcga-clin
374
      Role: !Ref GlueJobRole
375
      Description: AWS Glue crawler to crawl TCGA clinical data
376
      DatabaseName: !If
377
        - NeedsGlueDatabase
378
        - !Ref TcgaDb
379
        - !Ref DatabaseName
380
      Targets:
381
        S3Targets:
382
          - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcga-clinical']]
383
      SchemaChangePolicy:
384
        UpdateBehavior: "UPDATE_IN_DATABASE"
385
        DeleteBehavior: "LOG"
386
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
387
388
  TCGAImagingMetadataCrawler:
389
    Type: AWS::Glue::Crawler
390
    Properties: 
391
      Name: !Sub ${ResourcePrefixLowercase}-tcga-img
392
      Role: !Ref GlueJobRole
393
      Description: AWS Glue crawler to crawl TCGA imaging metadata
394
      DatabaseName: !If
395
        - NeedsGlueDatabase
396
        - !Ref TcgaDb
397
        - !Ref DatabaseName
398
      Targets:
399
        S3Targets:
400
          - Path: !Join ['',['s3://',!Ref 'DataLakeBucket','/tcia-metadata']]
401
      SchemaChangePolicy:
402
        UpdateBehavior: "UPDATE_IN_DATABASE"
403
        DeleteBehavior: "LOG"
404
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"
405
406
  TCGASummaryCrawler:
407
    Type: AWS::Glue::Crawler
408
    Properties:
409
      Name: !Sub ${ResourcePrefixLowercase}-tcga-sum
410
      Role: !Ref GlueJobRole
411
      Description: AWS Glue crawler to crawl TCGA summary data
412
      DatabaseName: !If
413
        - NeedsGlueDatabase
414
        - !Ref TcgaDb
415
        - !Ref DatabaseName
416
      Targets:
417
        S3Targets:
418
          - Path: !Join ['', ['s3://', !Ref DataLakeBucket, '/tcga-summary']]
419
      SchemaChangePolicy:
420
        UpdateBehavior: "UPDATE_IN_DATABASE"
421
        DeleteBehavior: "LOG"
422
      Configuration: "{\"Version\":1.0,\"CrawlerOutput\":{\"Partitions\":{\"AddOrUpdateBehavior\":\"InheritFromTable\"},\"Tables\":{\"AddOrUpdateBehavior\":\"MergeNewColumns\"}}}"      
423
424
  TCGAWorkflow:
425
    Type: AWS::Glue::Workflow
426
    Properties:
427
      Description: "Workflow that kicks off exp job and crawler"
428
429
  WorkflowStartTrigger:
430
    Type: AWS::Glue::Trigger
431
    Properties:
432
      Name: !Sub ${ResourcePrefixLowercase}-tcga-start
433
      Type: ON_DEMAND
434
      Description: Trigger for starting the workflow
435
      Actions:
436
        - JobName: !Ref TcgaLuadExpressionGlueJob
437
        - JobName: !Ref TcgaLuadMutationGlueJob
438
        - JobName: !Ref TcgaLuadCnvGlueJob
439
        - JobName: !Ref TcgaLuadClinicalGlueJob
440
        - JobName: !Ref TcgaLuadImagingMetadataGlueJob
441
        - JobName: !Ref TcgaLuscExpressionGlueJob
442
        - JobName: !Ref TcgaLuscMutationGlueJob
443
        - JobName: !Ref TcgaLuscCnvGlueJob
444
        - JobName: !Ref TcgaLuscClinicalGlueJob
445
        - JobName: !Ref TcgaLuscImagingMetadataGlueJob
446
      WorkflowName: !Ref TCGAWorkflow
447
448
  ExpCrawlerTrigger:
449
    Type: AWS::Glue::Trigger
450
    Properties:
451
      Name: !Sub ${ResourcePrefixLowercase}-tcga-exp
452
      Type: "CONDITIONAL"
453
      Description: "Description for a conditional job trigger"
454
      Actions:
455
        - CrawlerName: !Ref 'TCGAExpressionCrawler' 
456
      StartOnCreation: true
457
      Predicate:
458
        Logical: AND
459
        Conditions:
460
          - LogicalOperator: EQUALS
461
            JobName: !Ref 'TcgaLuadExpressionGlueJob'
462
            State: SUCCEEDED
463
          - LogicalOperator: EQUALS
464
            JobName: !Ref 'TcgaLuscExpressionGlueJob'
465
            State: SUCCEEDED
466
      WorkflowName: !Ref TCGAWorkflow
467
468
  MutCrawlerTrigger:
469
    Type: AWS::Glue::Trigger
470
    Properties:
471
      Name: !Sub ${ResourcePrefixLowercase}-tcga-mut
472
      Type: "CONDITIONAL"
473
      Description: "Description for a conditional Mutation crawler job trigger"
474
      Actions:
475
        - CrawlerName: !Ref 'TCGAMutationCrawler' 
476
      StartOnCreation: true
477
      Predicate:
478
        Logical: AND
479
        Conditions:
480
          - LogicalOperator: EQUALS
481
            JobName: !Ref 'TcgaLuadMutationGlueJob'
482
            State: SUCCEEDED
483
          - LogicalOperator: EQUALS
484
            JobName: !Ref 'TcgaLuscMutationGlueJob'
485
            State: SUCCEEDED
486
      WorkflowName: !Ref TCGAWorkflow
487
488
  CnvCrawlerTrigger:
489
    Type: AWS::Glue::Trigger
490
    Properties:
491
      Name: !Sub ${ResourcePrefixLowercase}-tcga-cnv
492
      Type: "CONDITIONAL"
493
      Description: "Description for a conditional CNV crawler job trigger"
494
      Actions:
495
        - CrawlerName: !Ref 'TCGACNVCrawler' 
496
      StartOnCreation: true
497
      Predicate:
498
        Logical: AND
499
        Conditions:
500
          - LogicalOperator: EQUALS
501
            JobName: !Ref 'TcgaLuadCnvGlueJob'
502
            State: SUCCEEDED
503
          - LogicalOperator: EQUALS
504
            JobName: !Ref 'TcgaLuscCnvGlueJob'
505
            State: SUCCEEDED
506
      WorkflowName: !Ref TCGAWorkflow
507
508
  ClinCrawlerTrigger:
509
    Type: AWS::Glue::Trigger
510
    Properties:
511
      Name: !Sub ${ResourcePrefixLowercase}-tcga-clin
512
      Type: "CONDITIONAL"
513
      Description: "Description for a conditional Clinical crawler job trigger"
514
      Actions:
515
        - CrawlerName: !Ref 'TCGAClinicalCrawler' 
516
      StartOnCreation: true
517
      Predicate:
518
        Logical: AND
519
        Conditions:
520
          - LogicalOperator: EQUALS
521
            JobName: !Ref 'TcgaLuadClinicalGlueJob'
522
            State: SUCCEEDED
523
          - LogicalOperator: EQUALS
524
            JobName: !Ref 'TcgaLuscClinicalGlueJob'
525
            State: SUCCEEDED
526
      WorkflowName: !Ref TCGAWorkflow
527
528
  ImMetaCrawlerTrigger:
529
    Type: AWS::Glue::Trigger
530
    Properties:
531
      Name: !Sub ${ResourcePrefixLowercase}-tcga-img
532
      Type: "CONDITIONAL"
533
      Description: "Description for a conditional Clinical crawler job trigger"
534
      Actions:
535
        - CrawlerName: !Ref 'TCGAImagingMetadataCrawler' 
536
      StartOnCreation: true
537
      Predicate:
538
        Logical: AND
539
        Conditions:
540
          - LogicalOperator: EQUALS
541
            JobName: !Ref 'TcgaLuadImagingMetadataGlueJob'
542
            State: SUCCEEDED
543
          - LogicalOperator: EQUALS
544
            JobName: !Ref 'TcgaLuscImagingMetadataGlueJob'
545
            State: SUCCEEDED
546
      WorkflowName: !Ref TCGAWorkflow
547
548
  RunSummaryJobTrigger:
549
    Type: AWS::Glue::Trigger
550
    Properties:
551
      Name: !Sub ${ResourcePrefixLowercase}-tcga-sum
552
      Type: "CONDITIONAL"
553
      Description: "Build TCGA summary"
554
      Actions:
555
        - JobName: !Ref TcgaSummaryGlueJob
556
      StartOnCreation: true
557
      Predicate:
558
        Logical: AND
559
        Conditions:
560
          - LogicalOperator: EQUALS
561
            CrawlerName: !Ref TCGAMutationCrawler
562
            CrawlState: SUCCEEDED
563
          - LogicalOperator: EQUALS
564
            CrawlerName: !Ref TCGACNVCrawler
565
            CrawlState: SUCCEEDED
566
          - LogicalOperator: EQUALS
567
            CrawlerName: !Ref TCGAExpressionCrawler
568
            CrawlState: SUCCEEDED
569
          - LogicalOperator: EQUALS
570
            CrawlerName: !Ref TCGAClinicalCrawler
571
            CrawlState: SUCCEEDED
572
          - LogicalOperator: EQUALS
573
            CrawlerName: !Ref TCGAImagingMetadataCrawler
574
            CrawlState: SUCCEEDED
575
      WorkflowName: !Ref TCGAWorkflow
576
577
  TcgaDb:
578
    Type: AWS::Glue::Database
579
    Condition: NeedsGlueDatabase
580
    Properties:
581
      CatalogId: !Ref AWS::AccountId
582
      DatabaseInput:
583
        Description: "AWS Glue container to hold tables for the TCGA crawlers"
584
        
585
      
586
Outputs:
587
  DataLakeBucket:
588
    Value: !Ref DataLakeBucket
589
590
  TCGAWorkflow:
591
    Value: !Ref TCGAWorkflow
592
593
  CreateQuicksightLink:
594
    Value: !Sub "https://${AWS::Region}.console.aws.amazon.com/cloudformation/home\
595
      ?region=${AWS::Region}#/stacks/create/review\
596
      ?templateURL=https://s3.${AWS::Region}.amazonaws.com/${ResourcesBucket}/quicksight_cfn.yml\
597
      &stackName=${ResourcePrefix}-Quicksight\
598
      &param_Project=${ResourcePrefix}"