[973924]: / qiita_db / handlers / tests / test_artifact.py

Download this file

469 lines (404 with data), 18.5 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from unittest import main, TestCase
from json import loads, dumps
from functools import partial
from os.path import join, exists, isfile
from os import close, remove
from shutil import rmtree
from tempfile import mkstemp, mkdtemp
from time import sleep
from tornado.web import HTTPError
import pandas as pd
from biom import example_table as et
from biom.util import biom_open
from qiita_db.handlers.tests.oauthbase import OauthTestingBase
import qiita_db as qdb
from qiita_db.handlers.artifact import _get_artifact
class UtilTests(TestCase):
def test_get_artifact(self):
obs = _get_artifact(1)
exp = qdb.artifact.Artifact(1)
self.assertEqual(obs, exp)
# It does not exist
with self.assertRaises(HTTPError):
_get_artifact(100)
class ArtifactHandlerTests(OauthTestingBase):
def setUp(self):
super(ArtifactHandlerTests, self).setUp()
self._clean_up_files = []
def tearDown(self):
super(ArtifactHandlerTests, self).tearDown()
for fp in self._clean_up_files:
if exists(fp):
if isfile(fp):
remove(fp)
else:
rmtree(fp)
def test_get_artifact_does_not_exist(self):
obs = self.get('/qiita_db/artifacts/100/', headers=self.header)
self.assertEqual(obs.code, 404)
def test_get_no_header(self):
obs = self.get('/qiita_db/artifacts/100/')
self.assertEqual(obs.code, 400)
def test_get_artifact(self):
obs = self.get('/qiita_db/artifacts/1/', headers=self.header)
self.assertEqual(obs.code, 200)
db_test_raw_dir = qdb.util.get_mountpoint('raw_data')[0][1]
path_builder = partial(join, db_test_raw_dir)
exp_fps = {
"raw_forward_seqs":
[{'filepath': path_builder('1_s_G1_L001_sequences.fastq.gz'),
'size': 58}],
"raw_barcodes":
[{'filepath': path_builder(
'1_s_G1_L001_sequences_barcodes.fastq.gz'),
'size': 58}]
}
exp = {
'name': 'Raw data 1',
'timestamp': '2012-10-01 09:30:27',
'visibility': 'private',
'type': 'FASTQ',
'data_type': '18S',
'can_be_submitted_to_ebi': False,
'ebi_run_accessions': None,
'can_be_submitted_to_vamps': False,
'is_submitted_to_vamps': None,
'prep_information': [1],
'study': 1,
'analysis': None,
'processing_parameters': None,
'files': exp_fps}
self.assertEqual(loads(obs.body), exp)
obs = self.get('/qiita_db/artifacts/9/', headers=self.header)
self.assertEqual(obs.code, 200)
db_test_raw_dir = qdb.util.get_mountpoint('analysis')[0][1]
path_builder = partial(join, db_test_raw_dir)
exp_fps = {
"biom": [{'filepath': path_builder('1_analysis_18S.biom'),
'size': 1093210}]}
exp = {
'name': 'noname',
'visibility': 'sandbox',
'type': 'BIOM',
'data_type': '18S',
'can_be_submitted_to_ebi': False,
'ebi_run_accessions': None,
'can_be_submitted_to_vamps': False,
'is_submitted_to_vamps': None,
'prep_information': [],
'study': None,
'analysis': 1,
'processing_parameters': {'biom_table': '8', 'depth': '9000',
'subsample_multinomial': 'False'},
'files': exp_fps}
obs = loads(obs.body)
# The timestamp is genreated at patch time, so we can't check for it
del obs['timestamp']
self.assertEqual(obs, exp)
def test_patch(self):
fd, html_fp = mkstemp(suffix=".html")
close(fd)
self._clean_up_files.append(html_fp)
# correct argument with a single HTML
arguments = {'op': 'add', 'path': '/html_summary/',
'value': html_fp}
artifact = qdb.artifact.Artifact(1)
self.assertIsNone(artifact.html_summary_fp)
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 200)
self.assertIsNotNone(artifact.html_summary_fp)
# Correct argument with an HMTL and a directory
fd, html_fp = mkstemp(suffix=".html")
close(fd)
self._clean_up_files.append(html_fp)
html_dir = mkdtemp()
self._clean_up_files.append(html_dir)
arguments = {'op': 'add', 'path': '/html_summary/',
'value': dumps({'html': html_fp, 'dir': html_dir})}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 200)
self.assertIsNotNone(artifact.html_summary_fp)
html_dir = [x['fp'] for x in artifact.filepaths
if x['fp_type'] == 'html_summary_dir']
self.assertEqual(len(html_dir), 1)
# Wrong operation
arguments = {'op': 'wrong', 'path': '/html_summary/',
'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 400)
self.assertEqual(obs.reason, 'Operation "wrong" not supported. '
'Current supported operations: add')
# Wrong path parameter
arguments = {'op': 'add', 'path': '/wrong/',
'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 400)
self.assertEqual(obs.reason, 'Incorrect path parameter value')
# Wrong value parameter
arguments = {'op': 'add', 'path': '/html_summary/',
'value': html_fp}
obs = self.patch('/qiita_db/artifacts/1/',
headers=self.header,
data=arguments)
self.assertEqual(obs.code, 500)
self.assertIn('No such file or directory', obs.reason)
class ArtifactAPItestHandlerTests(OauthTestingBase):
def setUp(self):
super(ArtifactAPItestHandlerTests, self).setUp()
metadata_dict = {
'SKB8.640193': {'center_name': 'ANL',
'primer': 'GTGCCAGCMGCCGCGGTAA',
'barcode': 'GTCCGCAAGTTA',
'run_prefix': "s_G1_L001_sequences",
'platform': 'Illumina',
'instrument_model': 'Illumina MiSeq',
'library_construction_protocol': 'AAAA',
'experiment_design_description': 'BBBB'}}
metadata = pd.DataFrame.from_dict(metadata_dict, orient='index')
self.prep_template = \
qdb.metadata_template.prep_template.PrepTemplate.create(
metadata, qdb.study.Study(1), "16S")
self._clean_up_files = []
def tearDown(self):
super(ArtifactAPItestHandlerTests, self).tearDown()
for f in self._clean_up_files:
if exists(f):
remove(f)
def test_post(self):
fd, fp1 = mkstemp(suffix='_seqs.fastq')
close(fd)
self._clean_up_files.append(fp1)
with open(fp1, 'w') as f:
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
"NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
"+\n"
"#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
fd, fp2 = mkstemp(suffix='_barcodes.fastq')
close(fd)
self._clean_up_files.append(fp2)
with open(fp2, 'w') as f:
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n"
"NNNCNNNNNNNNN\n"
"+\n"
"#############\n")
data = {'filepaths': dumps([(fp1, 'raw_forward_seqs'),
(fp2, 'raw_barcodes')]),
'type': "FASTQ",
'name': "New test artifact",
'prep': self.prep_template.id}
obs = self.post('/apitest/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 200)
obs = loads(obs.body)
self.assertCountEqual(obs.keys(), ['artifact'])
a = qdb.artifact.Artifact(obs['artifact'])
self._clean_up_files.extend([x['fp'] for x in a.filepaths])
self.assertEqual(a.name, "New test artifact")
def test_post_analysis(self):
fd, fp = mkstemp(suffix='_table.biom')
close(fd)
with biom_open(fp, 'w') as f:
et.to_hdf5(f, "test")
self._clean_up_files.append(fp)
data = {'filepaths': dumps([(fp, 'biom')]),
'type': "BIOM",
'name': "New biom artifact",
'analysis': 1,
'data_type': '16S'}
obs = self.post('/apitest/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 200)
obs = loads(obs.body)
self.assertCountEqual(obs.keys(), ['artifact'])
a = qdb.artifact.Artifact(obs['artifact'])
self._clean_up_files.extend([x['fp'] for x in a.filepaths])
self.assertEqual(a.name, "New biom artifact")
def test_post_error(self):
data = {'filepaths': dumps([('Do not exist', 'raw_forward_seqs')]),
'type': "FASTQ",
'name': "New test artifact",
'prep': 1}
obs = self.post('/apitest/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 500)
self.assertIn("Prep template 1 already has an artifact associated",
obs.body.decode('ascii'))
class ArtifactTypeHandlerTests(OauthTestingBase):
def test_post_no_header(self):
obs = self.post('/qiita_db/artifacts/types/', data={})
self.assertEqual(obs.code, 400)
def test_post(self):
data = {'type_name': 'new_type',
'description': 'some_description',
'can_be_submitted_to_ebi': False,
'can_be_submitted_to_vamps': False,
'is_user_uploadable': False,
'filepath_types': dumps([("log", False),
("raw_forward_seqs", True)])}
obs = self.post('/qiita_db/artifacts/types/', headers=self.header,
data=data)
self.assertEqual(obs.code, 200)
self.assertIn(['new_type', 'some_description', False, False, False],
qdb.artifact.Artifact.types())
obs = self.post('/qiita_db/artifacts/types/', headers=self.header,
data=data)
self.assertEqual(obs.code, 200)
def test_get(self):
obs = self.get('/qiita_db/artifacts/types/', headers=self.header)
self.assertEqual(obs.code, 200)
basedir = qdb.util.get_db_files_base_dir()
exp = {
"SFF": f"{basedir}/SFF",
"FASTA": f"{basedir}/FASTA",
"FASTA_Sanger": f"{basedir}/FASTA_Sanger",
"Demultiplexed": f"{basedir}/Demultiplexed",
"FASTQ": f"{basedir}/FASTQ",
"per_sample_FASTQ": f"{basedir}/per_sample_FASTQ",
"BIOM": f"{basedir}/BIOM",
"uploads": f"{basedir}/uploads"}
self.assertDictEqual(loads(obs.body), exp)
class APIArtifactHandlerTests(OauthTestingBase):
def setUp(self):
super(APIArtifactHandlerTests, self).setUp()
self._clean_up_files = []
def tearDown(self):
super(APIArtifactHandlerTests, self).tearDown()
for f in self._clean_up_files:
if exists(f):
remove(f)
def test_post(self):
# no header
obs = self.post('/qiita_db/artifact/', data={})
self.assertEqual(obs.code, 400)
fd, fp = mkstemp(suffix='_table.biom')
close(fd)
# renaming samples
et.update_ids({'S1': '1.SKB1.640202',
'S2': '1.SKD3.640198',
'S3': '1.SKM4.640180'}, inplace=True)
with biom_open(fp, 'w') as f:
et.to_hdf5(f, "test")
self._clean_up_files.append(fp)
# no job_id or prep_id
data = {'user_email': 'demo@microbio.me',
'artifact_type': 'BIOM',
'command_artifact_name': 'OTU table',
'files': dumps({'biom': [fp]})}
obs = self.post('/qiita_db/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 400)
self.assertIn(
'You need to specify a job_id or a prep_id', str(obs.error))
# both job_id and prep_id defined
data['job_id'] = 'e5609746-a985-41a1-babf-6b3ebe9eb5a9'
data['prep_id'] = 'prep_id'
obs = self.post('/qiita_db/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 400)
self.assertIn(
'You need to specify only a job_id or a prep_id', str(obs.error))
# make sure that all the plugins are on
qdb.util.activate_or_update_plugins(update=True)
# tests success by inserting a new artifact into an existing job
original_job = qdb.processing_job.ProcessingJob(data['job_id'])
input_artifact = original_job.input_artifacts[0]
original_children = input_artifact.children
self.assertEqual(len(original_children), 3)
# send the new data
del data['prep_id']
obs = self.post('/qiita_db/artifact/', headers=self.header, data=data)
jid = loads(obs.body)['job_id']
job = qdb.processing_job.ProcessingJob(jid)
while job.status not in ('error', 'success'):
sleep(0.5)
# now the original job should have 4 children and make sure they have
# the same parent and parameters
children = input_artifact.children
new_children = list(set(children) - set(original_children))[0]
self.assertEqual(len(children), 4)
for c in children[1:]:
self.assertCountEqual(children[0].processing_parameters.values,
c.processing_parameters.values)
self.assertEqual(children[0].parents, c.parents)
# making sure the new artifact is part of the descendants, which is a
# different method and usage than children method
self.assertIn(new_children, input_artifact.descendants.nodes)
# now let's test adding an artifact directly to a new prep
new_prep = qdb.metadata_template.prep_template.PrepTemplate.create(
pd.DataFrame({'new_col': {'1.SKB1.640202': 1,
'1.SKD3.640198': 2,
'1.SKM4.640180': 3}}),
qdb.study.Study(1), '16S')
fd, fp = mkstemp(suffix='_table.biom')
close(fd)
with biom_open(fp, 'w') as f:
et.to_hdf5(f, "test")
self._clean_up_files.append(fp)
data = {'user_email': 'demo@microbio.me',
'artifact_type': 'BIOM', 'prep_id': new_prep.id,
'files': dumps({'biom': [fp]})}
obs = self.post('/qiita_db/artifact/', headers=self.header, data=data)
jid = loads(obs.body)['job_id']
job = qdb.processing_job.ProcessingJob(jid)
while job.status not in ('error', 'success'):
sleep(0.5)
self.assertIsNotNone(new_prep.artifact)
def test_post_insert_artifact_and_add_default_processing(self):
# now let's test adding an artifact + default processing to a new
# preparation
new_prep = qdb.metadata_template.prep_template.PrepTemplate.create(
pd.DataFrame({'new_col': {'1.SKB1.640202': 1,
'1.SKD3.640198': 2,
'1.SKM4.640180': 3}}),
qdb.study.Study(1), '16S')
# creating the fastq files to be added
fd, fp1 = mkstemp(suffix='_seqs.fastq')
close(fd)
self._clean_up_files.append(fp1)
with open(fp1, 'w') as f:
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 1:N:0:\n"
"NACGTAGGGTGCAAGCGTTGTCCGGAATNA\n"
"+\n"
"#1=DDFFFHHHHHJJJJJJJJJJJJGII#0\n")
fd, fp2 = mkstemp(suffix='_barcodes.fastq')
close(fd)
self._clean_up_files.append(fp2)
with open(fp2, 'w') as f:
f.write("@HWI-ST753:189:D1385ACXX:1:1101:1214:1906 2:N:0:\n"
"NNNCNNNNNNNNN\n"
"+\n"
"#############\n")
data = {'user_email': 'demo@microbio.me',
'artifact_type': 'FASTQ',
'prep_id': new_prep.id,
'files': dumps([(fp1, 'raw_forward_seqs'),
(fp2, 'raw_barcodes')]),
'add_default_workflow': False}
obs = self.post('/qiita_db/artifact/', headers=self.header, data=data)
self.assertEqual(obs.code, 200)
jid = loads(obs.body)['job_id']
# if we got to this point, then we should have a job and that job
# should have children jobs (generated by the default workflow)
job = qdb.processing_job.ProcessingJob(jid)
children = [c.command.name for c in job.children]
grandchildren = [gc.command.name for c in job.children
for gc in c.children]
self.assertEqual('Validate', job.command.name)
self.assertEqual(['Split libraries FASTQ'], children)
self.assertEqual(['Pick closed-reference OTUs'], grandchildren)
# just to avoid any tentative issues, let's wait for the main job to
# finish
while job.status not in ('error', 'success'):
sleep(0.5)
if __name__ == '__main__':
main()