|
a |
|
b/qiita_ware/metadata_pipeline.py |
|
|
1 |
# ----------------------------------------------------------------------------- |
|
|
2 |
# Copyright (c) 2014--, The Qiita Development Team. |
|
|
3 |
# |
|
|
4 |
# Distributed under the terms of the BSD 3-clause License. |
|
|
5 |
# |
|
|
6 |
# The full license is in the file LICENSE, distributed with this software. |
|
|
7 |
# ----------------------------------------------------------------------------- |
|
|
8 |
from qiita_db.metadata_template.util import load_template_to_dataframe |
|
|
9 |
from qiita_db.metadata_template.sample_template import SampleTemplate |
|
|
10 |
from qiita_db.metadata_template.prep_template import PrepTemplate |
|
|
11 |
from qiita_db.metadata_template.constants import ( |
|
|
12 |
PREP_TEMPLATE_COLUMNS, PREP_TEMPLATE_COLUMNS_TARGET_GENE, CONTROLLED_COLS, |
|
|
13 |
TARGET_GENE_DATA_TYPES) |
|
|
14 |
from qiita_db.util import convert_from_id |
|
|
15 |
from qiita_ware.exceptions import QiitaWareError |
|
|
16 |
|
|
|
17 |
|
|
|
18 |
def create_templates_from_qiime_mapping_file(fp, study, data_type): |
|
|
19 |
"""Creates a sample template and a prep template from qiime mapping file |
|
|
20 |
|
|
|
21 |
Parameters |
|
|
22 |
---------- |
|
|
23 |
fp : str or file-like object |
|
|
24 |
Path to the QIIME mapping file |
|
|
25 |
study : Study |
|
|
26 |
The study to which the sample template belongs to |
|
|
27 |
data_type : str or int |
|
|
28 |
The data_type of the prep_template |
|
|
29 |
|
|
|
30 |
Returns |
|
|
31 |
------- |
|
|
32 |
(SampleTemplate, PrepTemplate) |
|
|
33 |
The templates created from the QIIME mapping file |
|
|
34 |
""" |
|
|
35 |
qiime_map = load_template_to_dataframe(fp, index='#SampleID') |
|
|
36 |
|
|
|
37 |
# There are a few columns in the QIIME mapping file that are special and |
|
|
38 |
# we know how to deal with them |
|
|
39 |
rename_cols = { |
|
|
40 |
'BarcodeSequence': 'barcode', |
|
|
41 |
'LinkerPrimerSequence': 'primer', |
|
|
42 |
'Description': 'description', |
|
|
43 |
} |
|
|
44 |
|
|
|
45 |
if 'ReverseLinkerPrimer' in qiime_map: |
|
|
46 |
rename_cols['ReverseLinkerPrimer'] = 'reverselinkerprimer' |
|
|
47 |
|
|
|
48 |
missing = set(rename_cols).difference(qiime_map.columns) |
|
|
49 |
if missing: |
|
|
50 |
raise QiitaWareError( |
|
|
51 |
"Error generating the templates from the QIIME mapping file. " |
|
|
52 |
"Missing QIIME mapping file columns: %s" % ', '.join(missing)) |
|
|
53 |
|
|
|
54 |
qiime_map.rename(columns=rename_cols, inplace=True) |
|
|
55 |
|
|
|
56 |
# Fix the casing in the columns that we control |
|
|
57 |
qiime_map.columns = [c.lower() if c.lower() in CONTROLLED_COLS else c |
|
|
58 |
for c in qiime_map.columns] |
|
|
59 |
|
|
|
60 |
# Figure out which columns belong to the prep template |
|
|
61 |
def _col_iterator(restriction_set): |
|
|
62 |
for restriction in restriction_set.values(): |
|
|
63 |
for cols in restriction.columns.keys(): |
|
|
64 |
yield cols |
|
|
65 |
|
|
|
66 |
pt_cols = set(col for col in _col_iterator(PREP_TEMPLATE_COLUMNS)) |
|
|
67 |
|
|
|
68 |
data_type_str = (convert_from_id(data_type, "data_type") |
|
|
69 |
if isinstance(data_type, int) else data_type) |
|
|
70 |
|
|
|
71 |
if data_type_str in TARGET_GENE_DATA_TYPES: |
|
|
72 |
pt_cols.update( |
|
|
73 |
col for col in _col_iterator(PREP_TEMPLATE_COLUMNS_TARGET_GENE)) |
|
|
74 |
pt_cols.add('reverselinkerprimer') |
|
|
75 |
|
|
|
76 |
qiime_cols = set(qiime_map.columns) |
|
|
77 |
pt_cols = qiime_cols.intersection(pt_cols) |
|
|
78 |
st_cols = qiime_cols.difference(pt_cols) |
|
|
79 |
|
|
|
80 |
st_md = qiime_map.loc[:, list(st_cols)] |
|
|
81 |
pt_md = qiime_map.loc[:, list(pt_cols)] |
|
|
82 |
|
|
|
83 |
return (SampleTemplate.create(st_md, study), |
|
|
84 |
PrepTemplate.create(pt_md, study, data_type)) |