a b/qiita_db/test/test_commands.py
1
# -----------------------------------------------------------------------------
2
# Copyright (c) 2014--, The Qiita Development Team.
3
#
4
# Distributed under the terms of the BSD 3-clause License.
5
#
6
# The full license is in the file LICENSE, distributed with this software.
7
# -----------------------------------------------------------------------------
8
9
from os import remove, close, mkdir
10
from os.path import exists, join
11
from tempfile import mkstemp, mkdtemp
12
from shutil import rmtree
13
from unittest import TestCase, main
14
from six import StringIO
15
from functools import partial
16
17
import pandas as pd
18
19
from qiita_core.util import qiita_test_checker
20
21
import qiita_db as qdb
22
23
import configparser
24
25
26
@qiita_test_checker()
27
class TestMakeStudyFromCmd(TestCase):
28
    def setUp(self):
29
        qdb.study.StudyPerson.create(
30
            'SomeDude', 'somedude@foo.bar', 'some',
31
            '111 fake street', '111-121-1313')
32
        qdb.user.User.create('test@test.com', 'password')
33
        self.config1 = CONFIG_1
34
        self.config2 = CONFIG_2
35
36
    def test_make_study_from_cmd(self):
37
        fh = StringIO(self.config1)
38
        qdb.commands.load_study_from_cmd('test@test.com', 'newstudy', fh)
39
40
        with qdb.sql_connection.TRN:
41
            sql = """SELECT study_id
42
                     FROM qiita.study
43
                     WHERE email = %s AND study_title = %s"""
44
            qdb.sql_connection.TRN.add(sql, ['test@test.com', 'newstudy'])
45
            study_id = qdb.sql_connection.TRN.execute_fetchflatten()
46
        self.assertEqual(study_id, [2])
47
48
        fh2 = StringIO(self.config2)
49
        with self.assertRaises(configparser.NoOptionError):
50
            qdb.commands.load_study_from_cmd('test@test.com', 'newstudy2', fh2)
51
52
53
@qiita_test_checker()
54
class TestLoadArtifactFromCmd(TestCase):
55
    def setUp(self):
56
        self.artifact_count = qdb.util.get_count('qiita.artifact')
57
        self.fp_count = qdb.util.get_count('qiita.filepath')
58
        self.files_to_remove = []
59
60
    def tearDown(self):
61
        for fp in self.files_to_remove:
62
            if exists(fp):
63
                remove(fp)
64
65
    def test_load_artifact_from_cmd_error(self):
66
        with self.assertRaises(ValueError):
67
            qdb.commands.load_artifact_from_cmd(
68
                ["fp1", "fp2"], ["preprocessed_fasta"], "Demultiplexed",
69
                parents=[1], dflt_params_id=10,
70
                required_params='{"input_data": 1}')
71
72
        with self.assertRaises(ValueError):
73
            qdb.commands.load_artifact_from_cmd(
74
                ["fp1"], ["preprocessed_fasta"], "Demultiplexed",
75
                parents=[1, 2], dflt_params_id=10)
76
77
    def test_load_artifact_from_cmd_root(self):
78
        fd, forward_fp = mkstemp(suffix='_forward.fastq.gz')
79
        close(fd)
80
        self.files_to_remove.append(forward_fp)
81
        fd, reverse_fp = mkstemp(suffix='_reverse.fastq.gz')
82
        close(fd)
83
        self.files_to_remove.append(reverse_fp)
84
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq.gz')
85
        close(fd)
86
        self.files_to_remove.append(barcodes_fp)
87
        fps = [forward_fp, reverse_fp, barcodes_fp]
88
        for fp in fps:
89
            with open(fp, 'w') as f:
90
                f.write('\n')
91
        ftypes = ['raw_forward_seqs', 'raw_reverse_seqs', 'raw_barcodes']
92
        metadata = pd.DataFrame.from_dict(
93
            {'SKB8.640193': {'center_name': 'ANL',
94
                             'primer': 'GTGCCAGCMGCCGCGGTAA',
95
                             'barcode': 'GTCCGCAAGTTA',
96
                             'run_prefix': "s_G1_L001_sequences",
97
                             'platform': 'Illumina',
98
                             'instrument_model': 'Illumina MiSeq',
99
                             'library_construction_protocol': 'AAAA',
100
                             'experiment_design_description': 'BBBB'}},
101
            orient='index', dtype=str)
102
        pt = qdb.metadata_template.prep_template.PrepTemplate.create(
103
            metadata, qdb.study.Study(1), "16S")
104
        obs = qdb.commands.load_artifact_from_cmd(
105
            fps, ftypes, 'FASTQ', prep_template=pt.id)
106
        self.files_to_remove.extend([x['fp'] for x in obs.filepaths])
107
        self.assertEqual(obs.id, self.artifact_count + 1)
108
        self.assertTrue(
109
            qdb.util.check_count('qiita.filepath', self.fp_count + 4))
110
111
    def test_load_artifact_from_cmd_processed(self):
112
        fd, file1 = mkstemp()
113
        close(fd)
114
        self.files_to_remove.append(file1)
115
        fd, file2 = mkstemp()
116
        close(fd)
117
        self.files_to_remove.append(file2)
118
        fps = [file1, file2]
119
        ftypes = ['preprocessed_fasta', 'preprocessed_fastq']
120
        for fp in fps:
121
            with open(fp, 'w') as f:
122
                f.write("\n")
123
        obs = qdb.commands.load_artifact_from_cmd(
124
            fps, ftypes, 'Demultiplexed', parents=[1], dflt_params_id=1,
125
            required_params='{"input_data": 1}',
126
            optional_params='{"min_per_read_length_fraction": 0.80}')
127
        self.files_to_remove.extend([x['fp'] for x in obs.filepaths])
128
        self.assertEqual(obs.id, self.artifact_count + 1)
129
        self.assertTrue(
130
            qdb.util.check_count('qiita.filepath', self.fp_count + 2))
131
132
    def test_load_artifact_from_cmd_biom(self):
133
        fd, otu_table_fp = mkstemp(suffix='_otu_table.biom')
134
        close(fd)
135
        self.files_to_remove.append(otu_table_fp)
136
        fps = [otu_table_fp]
137
        ftypes = ['biom']
138
        for fp in fps:
139
            with open(fp, 'w') as f:
140
                f.write("\n")
141
        obs = qdb.commands.load_artifact_from_cmd(
142
            fps, ftypes, 'BIOM', parents=[3], dflt_params_id=10,
143
            required_params='{"input_data": 3}')
144
        self.files_to_remove.extend([x['fp'] for x in obs.filepaths])
145
        self.assertEqual(obs.id, self.artifact_count + 1)
146
        self.assertTrue(
147
            qdb.util.check_count('qiita.filepath', self.fp_count + 1))
148
149
150
@qiita_test_checker()
151
class TestLoadSampleTemplateFromCmd(TestCase):
152
    def setUp(self):
153
        # Create a sample template file
154
        self.st_contents = SAMPLE_TEMPLATE
155
156
        # create a new study to attach the sample template
157
        info = {
158
            "timeseries_type_id": 1,
159
            "metadata_complete": True,
160
            "mixs_compliant": True,
161
            "study_alias": "TestStudy",
162
            "study_description": "Description of a test study",
163
            "study_abstract": "No abstract right now...",
164
            "principal_investigator_id": qdb.study.StudyPerson(3),
165
            "lab_person_id": qdb.study.StudyPerson(1)
166
        }
167
        self.study = qdb.study.Study.create(
168
            qdb.user.User('test@foo.bar'), "Test study", info)
169
170
    def test_load_sample_template_from_cmd(self):
171
        """Correctly adds a sample template to the DB"""
172
        fh = StringIO(self.st_contents)
173
        st = qdb.commands.load_sample_template_from_cmd(fh, self.study.id)
174
        self.assertEqual(st.id, self.study.id)
175
176
177
@qiita_test_checker()
178
class TestLoadPrepTemplateFromCmd(TestCase):
179
    def setUp(self):
180
        self.pt_contents = PREP_TEMPLATE
181
182
    def test_load_prep_template_from_cmd(self):
183
        """Correctly adds a prep template to the DB"""
184
        fh = StringIO(self.pt_contents)
185
        st = qdb.commands.load_prep_template_from_cmd(fh, 1, '18S')
186
        self.assertEqual(st.id, 3)
187
188
189
@qiita_test_checker()
190
class TestLoadParametersFromCmd(TestCase):
191
    def setUp(self):
192
        fd, self.fp = mkstemp(suffix='_params.txt')
193
        close(fd)
194
195
        fd, self.fp_wrong = mkstemp(suffix='_params.txt')
196
        close(fd)
197
198
        with open(self.fp, 'w') as f:
199
            f.write(PARAMETERS)
200
201
        with open(self.fp_wrong, 'w') as f:
202
            f.write(PARAMETERS_ERROR)
203
204
        self.files_to_remove = [self.fp, self.fp_wrong]
205
206
    def tearDown(self):
207
        for fp in self.files_to_remove:
208
            if exists(fp):
209
                remove(fp)
210
211
212
@qiita_test_checker()
213
class TestPatch(TestCase):
214
    def setUp(self):
215
        self.patches_dir = mkdtemp()
216
        self.py_patches_dir = join(self.patches_dir, 'python_patches')
217
        mkdir(self.py_patches_dir)
218
        patch2_fp = join(self.patches_dir, '2.sql')
219
        patch10_fp = join(self.patches_dir, '10.sql')
220
221
        with open(patch2_fp, 'w') as f:
222
            f.write("CREATE TABLE qiita.patchtest2 (testing integer);\n")
223
            f.write("INSERT INTO qiita.patchtest2 VALUES (1);\n")
224
            f.write("INSERT INTO qiita.patchtest2 VALUES (9);\n")
225
226
        with open(patch10_fp, 'w') as f:
227
            f.write("CREATE TABLE qiita.patchtest10 (testing integer);\n")
228
229
    def tearDown(self):
230
        rmtree(self.patches_dir)
231
        # The tests on this class are really tied up to the status of the
232
        # database, so we do an exception and reset the DB in each test
233
        qdb.environment_manager.drop_and_rebuild_tst_database()
234
235
    def _check_patchtest2(self, exists=True):
236
        if exists:
237
            assertion_fn = self.assertTrue
238
        else:
239
            assertion_fn = self.assertFalse
240
241
        with qdb.sql_connection.TRN:
242
            qdb.sql_connection.TRN.add("""SELECT EXISTS(SELECT *
243
                                          FROM information_schema.tables
244
                                          WHERE table_name = 'patchtest2')""")
245
            obs = qdb.sql_connection.TRN.execute_fetchflatten()[0]
246
        assertion_fn(obs)
247
248
        if exists:
249
            exp = [[1], [9]]
250
            with qdb.sql_connection.TRN:
251
                qdb.sql_connection.TRN.add(
252
                    """SELECT * FROM qiita.patchtest2 ORDER BY testing""")
253
                obs = qdb.sql_connection.TRN.execute_fetchindex()
254
            self.assertEqual(obs, exp)
255
256
    def _check_patchtest10(self):
257
        with qdb.sql_connection.TRN:
258
            qdb.sql_connection.TRN.add(
259
                """SELECT EXISTS(SELECT * FROM information_schema.tables
260
                   WHERE table_name = 'patchtest10')""")
261
            obs = qdb.sql_connection.TRN.execute_fetchflatten()[0]
262
        self.assertTrue(obs)
263
264
        exp = []
265
        with qdb.sql_connection.TRN:
266
            qdb.sql_connection.TRN.add("SELECT * FROM qiita.patchtest10")
267
            obs = qdb.sql_connection.TRN.execute_fetchindex()
268
        self.assertEqual(obs, exp)
269
270
    def _assert_current_patch(self, patch_to_check):
271
        with qdb.sql_connection.TRN:
272
            qdb.sql_connection.TRN.add("SELECT current_patch FROM settings")
273
            current_patch = qdb.sql_connection.TRN.execute_fetchflatten()[0]
274
        self.assertEqual(current_patch, patch_to_check)
275
276
    def test_unpatched(self):
277
        """Test patching from unpatched state"""
278
        # Reset the settings table to the unpatched state
279
        qdb.sql_connection.perform_as_transaction(
280
            "UPDATE settings SET current_patch = 'unpatched'")
281
282
        self._assert_current_patch('unpatched')
283
        qdb.environment_manager.patch(self.patches_dir)
284
        self._check_patchtest2()
285
        self._check_patchtest10()
286
        self._assert_current_patch('10.sql')
287
288
    def test_skip_patch(self):
289
        """Test patching from a patched state"""
290
        qdb.sql_connection.perform_as_transaction(
291
            "UPDATE settings SET current_patch = '2.sql'")
292
        self._assert_current_patch('2.sql')
293
294
        # If it tried to apply patch 2.sql again, this will error
295
        qdb.environment_manager.patch(self.patches_dir)
296
297
        self._assert_current_patch('10.sql')
298
        self._check_patchtest10()
299
300
        # Since we "tricked" the system, patchtest2 should not exist
301
        self._check_patchtest2(exists=False)
302
303
    def test_nonexistent_patch(self):
304
        """Test case where current patch does not exist"""
305
        qdb.sql_connection.perform_as_transaction(
306
            "UPDATE settings SET current_patch = 'nope.sql'")
307
        self._assert_current_patch('nope.sql')
308
309
        with self.assertRaises(RuntimeError):
310
            qdb.environment_manager.patch(self.patches_dir)
311
312
    def test_python_patch(self):
313
        # Write a test python patch
314
        patch10_py_fp = join(self.py_patches_dir, '10.py')
315
        with open(patch10_py_fp, 'w') as f:
316
            f.write(PY_PATCH)
317
318
        # Reset the settings table to the unpatched state
319
        qdb.sql_connection.perform_as_transaction(
320
            "UPDATE settings SET current_patch = 'unpatched'")
321
322
        self._assert_current_patch('unpatched')
323
324
        qdb.environment_manager.patch(self.patches_dir)
325
326
        with qdb.sql_connection.TRN:
327
            qdb.sql_connection.TRN.add("SELECT testing FROM qiita.patchtest10")
328
            obs = qdb.sql_connection.TRN.execute_fetchindex()
329
        exp = [[1], [100]]
330
        self.assertEqual(obs, exp)
331
332
        self._assert_current_patch('10.sql')
333
334
335
@qiita_test_checker()
336
class TestUpdateArtifactFromCmd(TestCase):
337
    def setUp(self):
338
        fd, seqs_fp = mkstemp(suffix='_seqs.fastq')
339
        close(fd)
340
        fd, barcodes_fp = mkstemp(suffix='_barcodes.fastq')
341
        close(fd)
342
        self.filepaths = [seqs_fp, barcodes_fp]
343
        self.checksums = []
344
        for fp in sorted(self.filepaths):
345
            with open(fp, 'w') as f:
346
                f.write("%s\n" % fp)
347
            self.checksums.append(qdb.util.compute_checksum(fp))
348
        self.filepaths_types = ["raw_forward_seqs", "raw_barcodes"]
349
        self._clean_up_files = [seqs_fp, barcodes_fp]
350
        self.uploaded_files = qdb.util.get_files_from_uploads_folders("1")
351
352
    def tearDown(self):
353
        new_uploaded_files = qdb.util.get_files_from_uploads_folders("1")
354
        new_files = set(new_uploaded_files).difference(self.uploaded_files)
355
        path_builder = partial(
356
            join, qdb.util.get_mountpoint("uploads")[0][1], '1')
357
        self._clean_up_files.extend(
358
            [path_builder(fp) for _, fp, _ in new_files])
359
        for f in self._clean_up_files:
360
            if exists(f):
361
                remove(f)
362
363
    def test_update_artifact_from_cmd_error(self):
364
        with self.assertRaises(ValueError):
365
            qdb.commands.update_artifact_from_cmd(
366
                self.filepaths[1:], self.filepaths_types, 1)
367
368
        with self.assertRaises(ValueError):
369
            qdb.commands.update_artifact_from_cmd(
370
                self.filepaths, self.filepaths_types[1:], 1)
371
372
    def test_update_artifact_from_cmd(self):
373
        # Generate some files for an artifact
374
        fd, fp1 = mkstemp(suffix='_seqs.fastq')
375
        close(fd)
376
        with open(fp1, 'w') as f:
377
            f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
378
                    "NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
379
                    "+\n"
380
                    "#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
381
382
        fd, fp2 = mkstemp(suffix='_barcodes.fastq')
383
        close(fd)
384
        with open(fp2, 'w') as f:
385
            f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n"
386
                    "NNNCNNNNNNNNN\n"
387
                    "+\n"
388
                    "#############\n")
389
        filepaths = [(fp1, 1), (fp2, 3)]
390
        # Create a new prep template
391
        metadata_dict = {
392
            'SKB8.640193': {'center_name': 'ANL',
393
                            'primer': 'GTGCCAGCMGCCGCGGTAA',
394
                            'barcode': 'GTCCGCAAGTTA',
395
                            'run_prefix': "s_G1_L001_sequences",
396
                            'platform': 'Illumina',
397
                            'instrument_model': 'Illumina MiSeq',
398
                            'library_construction_protocol': 'AAAA',
399
                            'experiment_design_description': 'BBBB'}}
400
        metadata = pd.DataFrame.from_dict(metadata_dict, orient='index',
401
                                          dtype=str)
402
        self.prep_template = \
403
            qdb.metadata_template.prep_template.PrepTemplate.create(
404
                metadata, qdb.study.Study(1), "16S")
405
        artifact = qdb.artifact.Artifact.create(
406
            filepaths, "FASTQ", prep_template=self.prep_template)
407
        for x in artifact.filepaths:
408
            self._clean_up_files.append(x['fp'])
409
410
        new_artifact = qdb.commands.update_artifact_from_cmd(
411
            self.filepaths, self.filepaths_types, artifact.id)
412
        for x in new_artifact.filepaths:
413
            self._clean_up_files.append(x['fp'])
414
415
        for obs, exp in zip(sorted(artifact.filepaths, key=lambda x: x['fp']),
416
                            self.checksums):
417
            self.assertEqual(qdb.util.compute_checksum(obs['fp']), exp)
418
419
420
CONFIG_1 = """[required]
421
timeseries_type_id = 1
422
metadata_complete = True
423
mixs_compliant = True
424
principal_investigator = SomeDude, somedude@foo.bar, some
425
reprocess = False
426
study_alias = 'test study'
427
study_description = 'test study description'
428
study_abstract = 'study abstract'
429
efo_ids = 1,2,3,4
430
[optional]
431
lab_person = SomeDude, somedude@foo.bar, some
432
funding = 'funding source'
433
vamps_id = vamps_id
434
"""
435
436
CONFIG_2 = """[required]
437
timeseries_type_id = 1
438
metadata_complete = True
439
principal_investigator = SomeDude, somedude@foo.bar, some
440
reprocess = False
441
study_alias = 'test study'
442
study_description = 'test study description'
443
study_abstract = 'study abstract'
444
efo_ids = 1,2,3,4
445
[optional]
446
lab_person = SomeDude, somedude@foo.bar, some
447
funding = 'funding source'
448
vamps_id = vamps_id
449
"""
450
451
SAMPLE_TEMPLATE = (
452
    "sample_name\trequired_sample_info_status\tcollection_timestamp\t"
453
    "sample_type\tphysical_specimen_remaining\tphysical_specimen_location\t"
454
    "dna_extracted\thost_subject_id\tTreatment\tDOB\tlatitude\tlongitude"
455
    "\ttaxon_id\tscientific_name\tDescription\n"
456
    "PC.354\treceived\t06/18/14 16:44:00\ttype_1\tTrue\tLocation_1\tTrue\t"
457
    "HS_ID_PC.354\tControl\t20061218\t1.88401499993\t56.0003871552\t"
458
    "9606\thomo sapiens\tControl_mouse_I.D._354\n"
459
    "PC.593\treceived\t06/18/14 16:44:00\ttype_1\tTrue\tLocation_1\tTrue\t"
460
    "HS_ID_PC.593\tControl\t20071210\t35.4079458313\t83.2595338611\t"
461
    "9606\thomo sapiens\tControl_mouse_I.D._593\n"
462
    "PC.607\treceived\t06/18/14 16:44:00\ttype_1\tTrue\tLocation_1\tTrue\t"
463
    "HS_ID_PC.607\tFast\t20071112\t18.3175615444\t91.3713989729\t"
464
    "9606\thomo sapiens\tFasting_mouse_I.D._607\n"
465
    "PC.636\treceived\t06/18/14 16:44:00\ttype_1\tTrue\tLocation_1\tTrue\t"
466
    "HS_ID_PC.636\tFast\t20080116\t31.0856060708\t4.16781143893\t"
467
    "9606\thomo sapiens\tFasting_mouse_I.D._636")
468
469
PREP_TEMPLATE = (
470
    'sample_name\tbarcode\tcenter_name\tcenter_project_name\t'
471
    'description_prep\tebi_submission_accession\temp_status\tprimer\t'
472
    'run_prefix\tstr_column\tplatform\tlibrary_construction_protocol\t'
473
    'experiment_design_description\tinstrument_model\n'
474
    'SKB7.640196\tCCTCTGAGAGCT\tANL\tTest Project\tskb7\tNone\tEMP\t'
475
    'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 3\tA\tB\tC\t'
476
    'Illumina MiSeq\n'
477
    'SKB8.640193\tGTCCGCAAGTTA\tANL\tTest Project\tskb8\tNone\tEMP\t'
478
    'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 1\tA\tB\tC\t'
479
    'Illumina MiSeq\n'
480
    'SKD8.640184\tCGTAGAGCTCTC\tANL\tTest Project\tskd8\tNone\tEMP\t'
481
    'GTGCCAGCMGCCGCGGTAA\tts_G1_L001_sequences\tValue for sample 2\tA\tB\tC\t'
482
    'Illumina MiSeq\n')
483
484
PY_PATCH = """
485
from qiita_db.study import Study
486
from qiita_db.sql_connection import TRN
487
study = Study(1)
488
489
with TRN:
490
    sql = "INSERT INTO qiita.patchtest10 (testing) VALUES (%s)"
491
    TRN.add(sql, [[study.id], [study.id*100]], many=True)
492
    TRN.execute()
493
"""
494
495
PARAMETERS = """max_bad_run_length\t3
496
min_per_read_length_fraction\t0.75
497
sequence_max_n\t0
498
rev_comp_barcode\tFalse
499
rev_comp_mapping_barcodes\tFalse
500
rev_comp\tFalse
501
phred_quality_threshold\t3
502
barcode_type\thamming_8
503
max_barcode_errors\t1.5
504
"""
505
506
PARAMETERS_ERROR = """max_bad_run_length\t3\tmin_per_read_length_fraction\t0.75
507
sequence_max_n\t0
508
rev_comp_barcode\tFalse
509
rev_comp_mapping_barcodes\tFalse
510
rev_comp\tFalse
511
phred_quality_threshold\t3
512
barcode_type\thamming_8
513
max_barcode_errors\t1.5
514
"""
515
516
517
if __name__ == "__main__":
518
    main()