|
a |
|
b/workflows/deepmodeloptim.nf |
|
|
1 |
/* |
|
|
2 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
3 |
IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS |
|
|
4 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
5 |
*/ |
|
|
6 |
// |
|
|
7 |
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules |
|
|
8 |
// |
|
|
9 |
include { softwareVersionsToYAML } from '../subworkflows/nf-core/utils_nfcore_pipeline' |
|
|
10 |
include { methodsDescriptionText } from '../subworkflows/local/utils_nfcore_deepmodeloptim_pipeline' |
|
|
11 |
include { CHECK_MODEL_WF } from '../subworkflows/local/check_model' |
|
|
12 |
include { PREPROCESS_IBIS_BEDFILE_TO_STIMULUS } from '../subworkflows/local/preprocess_ibis_bedfile_to_stimulus' |
|
|
13 |
include { SPLIT_DATA_CONFIG_SPLIT_WF } from '../subworkflows/local/split_data_config_split' |
|
|
14 |
include { SPLIT_DATA_CONFIG_TRANSFORM_WF } from '../subworkflows/local/split_data_config_transform' |
|
|
15 |
include { SPLIT_CSV_WF } from '../subworkflows/local/split_csv' |
|
|
16 |
include { TRANSFORM_CSV_WF } from '../subworkflows/local/transform_csv' |
|
|
17 |
include { TUNE_WF } from '../subworkflows/local/tune' |
|
|
18 |
include { EVALUATION_WF } from '../subworkflows/local/evaluation' |
|
|
19 |
|
|
|
20 |
// |
|
|
21 |
// MODULES: Consisting of nf-core/modules |
|
|
22 |
// |
|
|
23 |
include { CUSTOM_GETCHROMSIZES } from '../modules/nf-core/custom/getchromsizes' |
|
|
24 |
|
|
|
25 |
/* |
|
|
26 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
27 |
RUN MAIN WORKFLOW |
|
|
28 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
29 |
*/ |
|
|
30 |
|
|
|
31 |
workflow DEEPMODELOPTIM { |
|
|
32 |
|
|
|
33 |
take: |
|
|
34 |
ch_data |
|
|
35 |
ch_data_config |
|
|
36 |
ch_model |
|
|
37 |
ch_model_config |
|
|
38 |
ch_initial_weights |
|
|
39 |
ch_preprocessing_config |
|
|
40 |
ch_genome |
|
|
41 |
tune_trials_range |
|
|
42 |
tune_replicates |
|
|
43 |
prediction_data |
|
|
44 |
|
|
|
45 |
main: |
|
|
46 |
|
|
|
47 |
// TODO collect all the versions files from the different processes |
|
|
48 |
ch_versions = Channel.empty() |
|
|
49 |
|
|
|
50 |
// ============================================================================== |
|
|
51 |
// preprocess data |
|
|
52 |
// ============================================================================== |
|
|
53 |
|
|
|
54 |
if (params.preprocessing_config) { |
|
|
55 |
|
|
|
56 |
// create genome index |
|
|
57 |
|
|
|
58 |
CUSTOM_GETCHROMSIZES(ch_genome) |
|
|
59 |
ch_genome_sizes = CUSTOM_GETCHROMSIZES.out.sizes |
|
|
60 |
|
|
|
61 |
// preprocess bedfile into stimulus format |
|
|
62 |
|
|
|
63 |
PREPROCESS_IBIS_BEDFILE_TO_STIMULUS( |
|
|
64 |
ch_data, |
|
|
65 |
ch_preprocessing_config.filter{it.protocol == 'ibis'}, |
|
|
66 |
ch_genome, |
|
|
67 |
ch_genome_sizes |
|
|
68 |
) |
|
|
69 |
|
|
|
70 |
ch_data = PREPROCESS_IBIS_BEDFILE_TO_STIMULUS.out.data |
|
|
71 |
} |
|
|
72 |
|
|
|
73 |
// ============================================================================== |
|
|
74 |
// split meta yaml split config file into individual yaml files |
|
|
75 |
// ============================================================================== |
|
|
76 |
|
|
|
77 |
SPLIT_DATA_CONFIG_SPLIT_WF( ch_data_config ) |
|
|
78 |
ch_yaml_sub_config_split = SPLIT_DATA_CONFIG_SPLIT_WF.out.sub_config |
|
|
79 |
|
|
|
80 |
// ============================================================================== |
|
|
81 |
// split csv data file |
|
|
82 |
// ============================================================================== |
|
|
83 |
|
|
|
84 |
SPLIT_CSV_WF( |
|
|
85 |
ch_data, |
|
|
86 |
ch_yaml_sub_config_split |
|
|
87 |
) |
|
|
88 |
ch_split_data = SPLIT_CSV_WF.out.split_data |
|
|
89 |
|
|
|
90 |
// ============================================================================== |
|
|
91 |
// split meta yaml transform config file into individual yaml files |
|
|
92 |
// ============================================================================== |
|
|
93 |
|
|
|
94 |
SPLIT_DATA_CONFIG_TRANSFORM_WF( ch_yaml_sub_config_split ) |
|
|
95 |
ch_yaml_sub_config = SPLIT_DATA_CONFIG_TRANSFORM_WF.out.sub_config |
|
|
96 |
|
|
|
97 |
// ============================================================================== |
|
|
98 |
// transform csv file |
|
|
99 |
// ============================================================================== |
|
|
100 |
|
|
|
101 |
TRANSFORM_CSV_WF( |
|
|
102 |
ch_split_data, |
|
|
103 |
ch_yaml_sub_config |
|
|
104 |
) |
|
|
105 |
ch_transformed_data = TRANSFORM_CSV_WF.out.transformed_data |
|
|
106 |
|
|
|
107 |
// ============================================================================== |
|
|
108 |
// check model |
|
|
109 |
// ============================================================================== |
|
|
110 |
|
|
|
111 |
// pre-step to check everything is fine |
|
|
112 |
// to do so we only run the first element of the sorted channel, as we don't need |
|
|
113 |
// to check on each transformed data |
|
|
114 |
// we sort the channel so that we always get the same input, as the default order |
|
|
115 |
// of the channel depends on which process finishes first (run in parallel) |
|
|
116 |
ch_check_input_data = ch_transformed_data.toSortedList().flatten().buffer(size:2).first() |
|
|
117 |
ch_check_input_config = ch_yaml_sub_config.toSortedList().flatten().buffer(size:2).first() |
|
|
118 |
|
|
|
119 |
CHECK_MODEL_WF ( |
|
|
120 |
ch_check_input_data, |
|
|
121 |
ch_check_input_config, |
|
|
122 |
ch_model, |
|
|
123 |
ch_model_config, |
|
|
124 |
ch_initial_weights |
|
|
125 |
) |
|
|
126 |
|
|
|
127 |
// ============================================================================== |
|
|
128 |
// tune model |
|
|
129 |
// ============================================================================== |
|
|
130 |
|
|
|
131 |
// Create dependancy WF dependency to ensure TUNE_WF runs after CHECK_MODEL_WF finished |
|
|
132 |
ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data) |
|
|
133 |
.filter{it} // remove the empty element from the check model |
|
|
134 |
|
|
|
135 |
TUNE_WF( |
|
|
136 |
ch_transformed_data, |
|
|
137 |
ch_yaml_sub_config, |
|
|
138 |
ch_model, |
|
|
139 |
ch_model_config, |
|
|
140 |
ch_initial_weights, |
|
|
141 |
tune_trials_range, |
|
|
142 |
tune_replicates |
|
|
143 |
) |
|
|
144 |
|
|
|
145 |
// ============================================================================== |
|
|
146 |
// Evaluation |
|
|
147 |
// ============================================================================== |
|
|
148 |
|
|
|
149 |
// Now the data config will not work if passed in full |
|
|
150 |
// We need to pass in the split data config, any of them, for the predict modules |
|
|
151 |
// This will be changed in the future |
|
|
152 |
prediction_data = prediction_data.combine(TUNE_WF.out.data_config_tmp.first().map{meta,file -> file}) |
|
|
153 |
EVALUATION_WF( |
|
|
154 |
TUNE_WF.out.model_tmp, |
|
|
155 |
prediction_data |
|
|
156 |
) |
|
|
157 |
|
|
|
158 |
|
|
|
159 |
// Software versions collation remains as comments |
|
|
160 |
softwareVersionsToYAML(ch_versions) |
|
|
161 |
.collectFile( |
|
|
162 |
storeDir: "${params.outdir}/pipeline_info", |
|
|
163 |
name: 'nf_core_' + 'deepmodeloptim_software_' + 'versions.yml', |
|
|
164 |
sort: true, |
|
|
165 |
newLine: true |
|
|
166 |
).set { ch_collated_versions } |
|
|
167 |
|
|
|
168 |
emit: |
|
|
169 |
versions = ch_versions // channel: [ path(versions.yml) ] |
|
|
170 |
|
|
|
171 |
} |
|
|
172 |
|
|
|
173 |
/* |
|
|
174 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
175 |
THE END |
|
|
176 |
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ |
|
|
177 |
*/ |