Switch to unified view

a b/workflows/deepmodeloptim.nf
1
/*
2
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
3
    IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
4
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
5
*/
6
//
7
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
8
//
9
include { softwareVersionsToYAML              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
10
include { methodsDescriptionText              } from '../subworkflows/local/utils_nfcore_deepmodeloptim_pipeline'
11
include { CHECK_MODEL_WF                      } from '../subworkflows/local/check_model'
12
include { PREPROCESS_IBIS_BEDFILE_TO_STIMULUS } from '../subworkflows/local/preprocess_ibis_bedfile_to_stimulus'
13
include { SPLIT_DATA_CONFIG_SPLIT_WF          } from '../subworkflows/local/split_data_config_split'
14
include { SPLIT_DATA_CONFIG_TRANSFORM_WF      } from '../subworkflows/local/split_data_config_transform'
15
include { SPLIT_CSV_WF                        } from '../subworkflows/local/split_csv'
16
include { TRANSFORM_CSV_WF                    } from '../subworkflows/local/transform_csv'
17
include { TUNE_WF                             } from '../subworkflows/local/tune'
18
include { EVALUATION_WF                       } from '../subworkflows/local/evaluation'
19
20
//
21
// MODULES: Consisting of nf-core/modules
22
//
23
include { CUSTOM_GETCHROMSIZES                } from '../modules/nf-core/custom/getchromsizes'
24
25
/*
26
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
27
    RUN MAIN WORKFLOW
28
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
29
*/
30
31
workflow DEEPMODELOPTIM {
32
33
    take:
34
    ch_data
35
    ch_data_config
36
    ch_model
37
    ch_model_config
38
    ch_initial_weights
39
    ch_preprocessing_config
40
    ch_genome
41
    tune_trials_range
42
    tune_replicates
43
    prediction_data
44
45
    main:
46
47
    // TODO collect all the versions files from the different processes
48
    ch_versions = Channel.empty()
49
50
    // ==============================================================================
51
    // preprocess data
52
    // ==============================================================================
53
54
    if (params.preprocessing_config) {
55
56
        // create genome index
57
58
        CUSTOM_GETCHROMSIZES(ch_genome)
59
        ch_genome_sizes = CUSTOM_GETCHROMSIZES.out.sizes
60
61
        // preprocess bedfile into stimulus format
62
63
        PREPROCESS_IBIS_BEDFILE_TO_STIMULUS(
64
            ch_data,
65
            ch_preprocessing_config.filter{it.protocol == 'ibis'},
66
            ch_genome,
67
            ch_genome_sizes
68
        )
69
70
        ch_data = PREPROCESS_IBIS_BEDFILE_TO_STIMULUS.out.data
71
    }
72
73
    // ==============================================================================
74
    // split meta yaml split config file into individual yaml files
75
    // ==============================================================================
76
77
    SPLIT_DATA_CONFIG_SPLIT_WF( ch_data_config )
78
    ch_yaml_sub_config_split = SPLIT_DATA_CONFIG_SPLIT_WF.out.sub_config
79
80
    // ==============================================================================
81
    // split csv data file
82
    // ==============================================================================
83
84
    SPLIT_CSV_WF(
85
        ch_data,
86
        ch_yaml_sub_config_split
87
    )
88
    ch_split_data = SPLIT_CSV_WF.out.split_data
89
90
    // ==============================================================================
91
    // split meta yaml transform config file into individual yaml files
92
    // ==============================================================================
93
94
    SPLIT_DATA_CONFIG_TRANSFORM_WF( ch_yaml_sub_config_split )
95
    ch_yaml_sub_config = SPLIT_DATA_CONFIG_TRANSFORM_WF.out.sub_config
96
97
    // ==============================================================================
98
    // transform csv file
99
    // ==============================================================================
100
101
    TRANSFORM_CSV_WF(
102
        ch_split_data,
103
        ch_yaml_sub_config
104
    )
105
    ch_transformed_data = TRANSFORM_CSV_WF.out.transformed_data
106
107
    // ==============================================================================
108
    // check model
109
    // ==============================================================================
110
111
    // pre-step to check everything is fine
112
    // to do so we only run the first element of the sorted channel, as we don't need
113
    // to check on each transformed data
114
    // we sort the channel so that we always get the same input, as the default order
115
    // of the channel depends on which process finishes first (run in parallel)
116
    ch_check_input_data = ch_transformed_data.toSortedList().flatten().buffer(size:2).first()
117
    ch_check_input_config = ch_yaml_sub_config.toSortedList().flatten().buffer(size:2).first()
118
119
    CHECK_MODEL_WF (
120
        ch_check_input_data,
121
        ch_check_input_config,
122
        ch_model,
123
        ch_model_config,
124
        ch_initial_weights
125
    )
126
127
    // ==============================================================================
128
    // tune model
129
    // ==============================================================================
130
131
    // Create dependancy WF dependency to ensure TUNE_WF runs after CHECK_MODEL_WF finished
132
    ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
133
        .filter{it}   // remove the empty element from the check model
134
135
    TUNE_WF(
136
        ch_transformed_data,
137
        ch_yaml_sub_config,
138
        ch_model,
139
        ch_model_config,
140
        ch_initial_weights,
141
        tune_trials_range,
142
        tune_replicates
143
    )
144
145
    // ==============================================================================
146
    // Evaluation
147
    // ==============================================================================
148
149
    // Now the data config will not work if passed in full
150
    // We need to pass in the split data config, any of them, for the predict modules
151
    // This will be changed in the future
152
    prediction_data = prediction_data.combine(TUNE_WF.out.data_config_tmp.first().map{meta,file -> file})
153
    EVALUATION_WF(
154
        TUNE_WF.out.model_tmp,
155
        prediction_data
156
    )
157
158
159
    // Software versions collation remains as comments
160
    softwareVersionsToYAML(ch_versions)
161
        .collectFile(
162
            storeDir: "${params.outdir}/pipeline_info",
163
            name: 'nf_core_'  +  'deepmodeloptim_software_'  + 'versions.yml',
164
            sort: true,
165
            newLine: true
166
        ).set { ch_collated_versions }
167
168
    emit:
169
    versions = ch_versions  // channel: [ path(versions.yml) ]
170
171
}
172
173
/*
174
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
175
    THE END
176
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
177
*/