deepmodeloptim / Git / Diff of /modules/local/awk/extract/main.nf

Models:
MarcoTheBlack/
deepmodeloptim
Downloads: 1
Diff of /modules/local/awk/extract/main.nf [000000] .. [13a70a]
Switch to side-by-side view

--- a
+++ b/modules/local/awk/extract/main.nf
@@ -0,0 +1,67 @@
+process AWK_EXTRACT {
+    tag "$meta.id"
+    label 'process_single'
+
+    conda "${moduleDir}/environment.yml"
+    container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+        'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
+        'biocontainers/gawk:5.3.0' }"
+
+    input:
+    tuple val(meta), val(column_name), val(values)
+    tuple val(meta2), path(data)
+
+    output:
+    tuple val(meta), path("${prefix}.${extension}"), emit: extracted_data
+    path("versions.yml")                           , emit: versions
+
+    when:
+    task.ext.when == null || task.ext.when
+
+    script:
+    def args = task.ext.args ?: []
+    def separator = args.separator ?: ( data.getName().endsWith(".csv") ? ',': '\t' )
+    prefix = task.ext.prefix ?: "${meta.id}.extracted"
+    extension = data.getName().split("\\.").last()
+    """
+    # Convert comma-separated values to an array
+    IFS=',' read -r -a values_array <<< $values
+
+    # Get the column index for the given column name
+    column_index=\$(head -1 $data | tr "$separator" "\\n" | nl -v 0 | grep -w $column_name | awk '{print \$1}')
+
+    if [ -z \$column_index ]; then
+        echo "Column '$column_name' not found in the CSV file."
+        exit 1
+    fi
+
+    # Extract rows where the column has the specified values
+    awk -v col=\$column_index -v values=$values -v FS="$separator" '
+        BEGIN {
+            split(values, vals, ",");
+            for (i in vals) {
+                val_map[vals[i]] = 1;
+            }
+        }
+        NR == 1 || val_map[\$(col + 1)] {
+            print \$0
+        }
+    ' $data > ${prefix}.${extension}
+
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+
+    stub:
+    prefix = task.ext.prefix ?: "${meta.id}.extracted"
+    extension = data.getName().split("\\.").last()
+    """
+    touch ${prefix}.${extension}
+    cat <<-END_VERSIONS > versions.yml
+    "${task.process}":
+        gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
+    END_VERSIONS
+    """
+}