[13a70a]: / workflows / deepmodeloptim.nf

Download this file

178 lines (143 with data), 6.7 kB

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    IMPORT MODULES / SUBWORKFLOWS / FUNCTIONS
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/
//
// SUBWORKFLOW: Consisting of a mix of local and nf-core/modules
//
include { softwareVersionsToYAML              } from '../subworkflows/nf-core/utils_nfcore_pipeline'
include { methodsDescriptionText              } from '../subworkflows/local/utils_nfcore_deepmodeloptim_pipeline'
include { CHECK_MODEL_WF                      } from '../subworkflows/local/check_model'
include { PREPROCESS_IBIS_BEDFILE_TO_STIMULUS } from '../subworkflows/local/preprocess_ibis_bedfile_to_stimulus'
include { SPLIT_DATA_CONFIG_SPLIT_WF          } from '../subworkflows/local/split_data_config_split'
include { SPLIT_DATA_CONFIG_TRANSFORM_WF      } from '../subworkflows/local/split_data_config_transform'
include { SPLIT_CSV_WF                        } from '../subworkflows/local/split_csv'
include { TRANSFORM_CSV_WF                    } from '../subworkflows/local/transform_csv'
include { TUNE_WF                             } from '../subworkflows/local/tune'
include { EVALUATION_WF                       } from '../subworkflows/local/evaluation'

//
// MODULES: Consisting of nf-core/modules
//
include { CUSTOM_GETCHROMSIZES                } from '../modules/nf-core/custom/getchromsizes'

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    RUN MAIN WORKFLOW
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/

workflow DEEPMODELOPTIM {

    take:
    ch_data
    ch_data_config
    ch_model
    ch_model_config
    ch_initial_weights
    ch_preprocessing_config
    ch_genome
    tune_trials_range
    tune_replicates
    prediction_data

    main:

    // TODO collect all the versions files from the different processes
    ch_versions = Channel.empty()

    // ==============================================================================
    // preprocess data
    // ==============================================================================

    if (params.preprocessing_config) {

        // create genome index

        CUSTOM_GETCHROMSIZES(ch_genome)
        ch_genome_sizes = CUSTOM_GETCHROMSIZES.out.sizes

        // preprocess bedfile into stimulus format

        PREPROCESS_IBIS_BEDFILE_TO_STIMULUS(
            ch_data,
            ch_preprocessing_config.filter{it.protocol == 'ibis'},
            ch_genome,
            ch_genome_sizes
        )

        ch_data = PREPROCESS_IBIS_BEDFILE_TO_STIMULUS.out.data
    }

    // ==============================================================================
    // split meta yaml split config file into individual yaml files
    // ==============================================================================

    SPLIT_DATA_CONFIG_SPLIT_WF( ch_data_config )
    ch_yaml_sub_config_split = SPLIT_DATA_CONFIG_SPLIT_WF.out.sub_config

    // ==============================================================================
    // split csv data file
    // ==============================================================================

    SPLIT_CSV_WF(
        ch_data,
        ch_yaml_sub_config_split
    )
    ch_split_data = SPLIT_CSV_WF.out.split_data

    // ==============================================================================
    // split meta yaml transform config file into individual yaml files
    // ==============================================================================

    SPLIT_DATA_CONFIG_TRANSFORM_WF( ch_yaml_sub_config_split )
    ch_yaml_sub_config = SPLIT_DATA_CONFIG_TRANSFORM_WF.out.sub_config

    // ==============================================================================
    // transform csv file
    // ==============================================================================

    TRANSFORM_CSV_WF(
        ch_split_data,
        ch_yaml_sub_config
    )
    ch_transformed_data = TRANSFORM_CSV_WF.out.transformed_data

    // ==============================================================================
    // check model
    // ==============================================================================

    // pre-step to check everything is fine
    // to do so we only run the first element of the sorted channel, as we don't need
    // to check on each transformed data
    // we sort the channel so that we always get the same input, as the default order
    // of the channel depends on which process finishes first (run in parallel)
    ch_check_input_data = ch_transformed_data.toSortedList().flatten().buffer(size:2).first()
    ch_check_input_config = ch_yaml_sub_config.toSortedList().flatten().buffer(size:2).first()

    CHECK_MODEL_WF (
        ch_check_input_data,
        ch_check_input_config,
        ch_model,
        ch_model_config,
        ch_initial_weights
    )

    // ==============================================================================
    // tune model
    // ==============================================================================

    // Create dependancy WF dependency to ensure TUNE_WF runs after CHECK_MODEL_WF finished
    ch_transformed_data = CHECK_MODEL_WF.out.concat(ch_transformed_data)
        .filter{it}   // remove the empty element from the check model

    TUNE_WF(
        ch_transformed_data,
        ch_yaml_sub_config,
        ch_model,
        ch_model_config,
        ch_initial_weights,
        tune_trials_range,
        tune_replicates
    )

    // ==============================================================================
    // Evaluation
    // ==============================================================================

    // Now the data config will not work if passed in full
    // We need to pass in the split data config, any of them, for the predict modules
    // This will be changed in the future
    prediction_data = prediction_data.combine(TUNE_WF.out.data_config_tmp.first().map{meta,file -> file})
    EVALUATION_WF(
        TUNE_WF.out.model_tmp,
        prediction_data
    )


    // Software versions collation remains as comments
    softwareVersionsToYAML(ch_versions)
        .collectFile(
            storeDir: "${params.outdir}/pipeline_info",
            name: 'nf_core_'  +  'deepmodeloptim_software_'  + 'versions.yml',
            sort: true,
            newLine: true
        ).set { ch_collated_versions }

    emit:
    versions = ch_versions  // channel: [ path(versions.yml) ]

}

/*
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
    THE END
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
*/