a b/qiita_ware/metadata_pipeline.py
1
# -----------------------------------------------------------------------------
2
# Copyright (c) 2014--, The Qiita Development Team.
3
#
4
# Distributed under the terms of the BSD 3-clause License.
5
#
6
# The full license is in the file LICENSE, distributed with this software.
7
# -----------------------------------------------------------------------------
8
from qiita_db.metadata_template.util import load_template_to_dataframe
9
from qiita_db.metadata_template.sample_template import SampleTemplate
10
from qiita_db.metadata_template.prep_template import PrepTemplate
11
from qiita_db.metadata_template.constants import (
12
    PREP_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS,
13
    TARGET_GENE_DATA_TYPES)
14
from qiita_db.util import convert_from_id
15
from qiita_ware.exceptions import QiitaWareError
16
17
18
def create_templates_from_qiime_mapping_file(fp, study, data_type):
19
    """Creates a sample template and a prep template from qiime mapping file
20
21
    Parameters
22
    ----------
23
    fp : str or file-like object
24
        Path to the QIIME mapping file
25
    study : Study
26
        The study to which the sample template belongs to
27
    data_type : str or int
28
        The data_type of the prep_template
29
30
    Returns
31
    -------
32
    (SampleTemplate, PrepTemplate)
33
        The templates created from the QIIME mapping file
34
    """
35
    qiime_map = load_template_to_dataframe(fp, index='#SampleID')
36
37
    # There are a few columns in the QIIME mapping file that are special and
38
    # we know how to deal with them
39
    rename_cols = {
40
        'BarcodeSequence': 'barcode',
41
        'LinkerPrimerSequence': 'primer',
42
        'Description': 'description',
43
    }
44
45
    if 'ReverseLinkerPrimer' in qiime_map:
46
        rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer'
47
48
    missing = set(rename_cols).difference(qiime_map.columns)
49
    if missing:
50
        raise QiitaWareError(
51
            "Error generating the templates from the QIIME mapping file. "
52
            "Missing QIIME mapping file columns: %s" % ', '.join(missing))
53
54
    qiime_map.rename(columns=rename_cols, inplace=True)
55
56
    # Fix the casing in the columns that we control
57
    qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c
58
                         for c in qiime_map.columns]
59
60
    # Figure out which columns belong to the prep template
61
    def _col_iterator(restriction_set):
62
        for restriction in restriction_set.values():
63
            for cols in restriction.columns.keys():
64
                yield cols
65
66
    pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS))
67
68
    data_type_str = (convert_from_id(data_type, "data_type")
69
                     if isinstance(data_type, int) else data_type)
70
71
    if data_type_str in TARGET_GENE_DATA_TYPES:
72
        pt_cols.update(
73
            col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE))
74
        pt_cols.add('reverselinkerprimer')
75
76
    qiime_cols = set(qiime_map.columns)
77
    pt_cols = qiime_cols.intersection(pt_cols)
78
    st_cols = qiime_cols.difference(pt_cols)
79
80
    st_md = qiime_map.loc[:, list(st_cols)]
81
    pt_md = qiime_map.loc[:, list(pt_cols)]
82
83
    return (SampleTemplate.create(st_md, study),
84
            PrepTemplate.create(pt_md, study, data_type))