|
a |
|
b/qiita_pet/handlers/download.py |
|
|
1 |
# ----------------------------------------------------------------------------- |
|
|
2 |
# Copyright (c) 2014--, The Qiita Development Team. |
|
|
3 |
# |
|
|
4 |
# Distributed under the terms of the BSD 3-clause License. |
|
|
5 |
# |
|
|
6 |
# The full license is in the file LICENSE, distributed with this software. |
|
|
7 |
# ----------------------------------------------------------------------------- |
|
|
8 |
|
|
|
9 |
from tornado.web import authenticated, HTTPError |
|
|
10 |
from tornado.gen import coroutine |
|
|
11 |
|
|
|
12 |
from os.path import basename, getsize, join, isdir, getctime |
|
|
13 |
from os import walk |
|
|
14 |
|
|
|
15 |
from .base_handlers import BaseHandler |
|
|
16 |
from qiita_pet.handlers.api_proxy.util import check_access |
|
|
17 |
from qiita_pet.handlers.artifact_handlers.base_handlers \ |
|
|
18 |
import check_artifact_access |
|
|
19 |
from qiita_db.study import Study |
|
|
20 |
from qiita_db.artifact import Artifact |
|
|
21 |
from qiita_db.user import User |
|
|
22 |
from qiita_db.download_link import DownloadLink |
|
|
23 |
from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir, |
|
|
24 |
get_filepath_information, get_mountpoint, |
|
|
25 |
filepath_id_to_object_id, get_data_types, |
|
|
26 |
retrieve_filepaths, get_work_base_dir) |
|
|
27 |
from qiita_db.meta_util import validate_filepath_access_by_user |
|
|
28 |
from qiita_db.metadata_template.sample_template import SampleTemplate |
|
|
29 |
from qiita_db.metadata_template.prep_template import PrepTemplate |
|
|
30 |
from qiita_db.exceptions import QiitaDBUnknownIDError |
|
|
31 |
from qiita_core.util import execute_as_transaction, get_release_info |
|
|
32 |
from qiita_core.qiita_settings import qiita_config |
|
|
33 |
|
|
|
34 |
from jose import jwt as jose_jwt |
|
|
35 |
from uuid import uuid4 |
|
|
36 |
from base64 import b64encode |
|
|
37 |
from datetime import datetime, timedelta, timezone |
|
|
38 |
from tempfile import mkdtemp |
|
|
39 |
from zipfile import ZipFile |
|
|
40 |
from io import BytesIO |
|
|
41 |
|
|
|
42 |
|
|
|
43 |
class BaseHandlerDownload(BaseHandler): |
|
|
44 |
def _check_permissions(self, sid): |
|
|
45 |
# Check general access to study |
|
|
46 |
study_info = check_access(sid, self.current_user.id) |
|
|
47 |
if study_info: |
|
|
48 |
raise HTTPError(405, reason="%s: %s, %s" % ( |
|
|
49 |
study_info['message'], self.current_user.email, sid)) |
|
|
50 |
return Study(sid) |
|
|
51 |
|
|
|
52 |
def _finish_generate_files(self, filename, text): |
|
|
53 |
self.set_header('Content-Description', 'text/csv') |
|
|
54 |
self.set_header('Expires', '0') |
|
|
55 |
self.set_header('Cache-Control', 'no-cache') |
|
|
56 |
self.set_header('Content-Disposition', 'attachment; ' |
|
|
57 |
'filename=%s' % filename) |
|
|
58 |
self.write(text) |
|
|
59 |
self.finish() |
|
|
60 |
|
|
|
61 |
def _generate_files(self, header_name, accessions, filename): |
|
|
62 |
text = "sample_name\t%s\n%s" % (header_name, '\n'.join( |
|
|
63 |
["%s\t%s" % (k, v) for k, v in accessions.items()])) |
|
|
64 |
|
|
|
65 |
self._finish_generate_files(filename, text) |
|
|
66 |
|
|
|
67 |
def _list_dir_files_nginx(self, dirpath): |
|
|
68 |
"""Generates a nginx list of files in the given dirpath for nginx |
|
|
69 |
|
|
|
70 |
Parameters |
|
|
71 |
---------- |
|
|
72 |
dirpath : str |
|
|
73 |
Path to the directory |
|
|
74 |
|
|
|
75 |
Returns |
|
|
76 |
------- |
|
|
77 |
list of (str, str, str) |
|
|
78 |
The path information needed by nginx for each file in the |
|
|
79 |
directory |
|
|
80 |
""" |
|
|
81 |
basedir = get_db_files_base_dir() |
|
|
82 |
basedir_len = len(basedir) + 1 |
|
|
83 |
to_download = [] |
|
|
84 |
for dp, _, fps in walk(dirpath): |
|
|
85 |
for fn in fps: |
|
|
86 |
fullpath = join(dp, fn) |
|
|
87 |
spath = fullpath |
|
|
88 |
if fullpath.startswith(basedir): |
|
|
89 |
spath = fullpath[basedir_len:] |
|
|
90 |
to_download.append((spath, spath, '-', str(getsize(fullpath)))) |
|
|
91 |
return to_download |
|
|
92 |
|
|
|
93 |
def _list_artifact_files_nginx(self, artifact): |
|
|
94 |
"""Generates a nginx list of files for the given artifact |
|
|
95 |
|
|
|
96 |
Parameters |
|
|
97 |
---------- |
|
|
98 |
artifact : qiita_db.artifact.Artifact |
|
|
99 |
The artifact to retrieve the files |
|
|
100 |
|
|
|
101 |
Returns |
|
|
102 |
------- |
|
|
103 |
list of (str, str, str) |
|
|
104 |
The path information needed by nginx for each file in the artifact |
|
|
105 |
""" |
|
|
106 |
basedir = get_db_files_base_dir() |
|
|
107 |
basedir_len = len(basedir) + 1 |
|
|
108 |
to_download = [] |
|
|
109 |
for i, x in enumerate(artifact.filepaths): |
|
|
110 |
# ignore if tgz as they could create problems and the |
|
|
111 |
# raw data is in the folder |
|
|
112 |
if x['fp_type'] == 'tgz': |
|
|
113 |
continue |
|
|
114 |
if isdir(x['fp']): |
|
|
115 |
# If we have a directory, we actually need to list all the |
|
|
116 |
# files from the directory so NGINX can actually download all |
|
|
117 |
# of them |
|
|
118 |
to_download.extend(self._list_dir_files_nginx(x['fp'])) |
|
|
119 |
elif x['fp'].startswith(basedir): |
|
|
120 |
spath = x['fp'][basedir_len:] |
|
|
121 |
to_download.append( |
|
|
122 |
(spath, spath, '-', str(x['fp_size']))) |
|
|
123 |
else: |
|
|
124 |
to_download.append( |
|
|
125 |
(x['fp'], x['fp'], '-', str(x['fp_size']))) |
|
|
126 |
|
|
|
127 |
for pt in artifact.prep_templates: |
|
|
128 |
# the latest prep template file is always the first [0] tuple and |
|
|
129 |
# we need the filepath [1] |
|
|
130 |
pt_fp = pt.get_filepaths() |
|
|
131 |
if pt_fp: |
|
|
132 |
pt_fp = pt_fp[0][1] |
|
|
133 |
spt_fp = pt_fp |
|
|
134 |
if pt_fp.startswith(basedir): |
|
|
135 |
spt_fp = pt_fp[basedir_len:] |
|
|
136 |
fname = 'mapping_files/%s_mapping_file.txt' % artifact.id |
|
|
137 |
to_download.append((spt_fp, fname, '-', str(getsize(pt_fp)))) |
|
|
138 |
return to_download |
|
|
139 |
|
|
|
140 |
def _write_nginx_file_list(self, to_download): |
|
|
141 |
"""Writes out the nginx file list |
|
|
142 |
|
|
|
143 |
Parameters |
|
|
144 |
---------- |
|
|
145 |
to_download : list of (str, str, str, str) |
|
|
146 |
The file list information |
|
|
147 |
""" |
|
|
148 |
all_files = '\n'.join( |
|
|
149 |
["%s %s /protected/%s %s" % (fp_checksum, fp_size, fp, fp_name) |
|
|
150 |
for fp, fp_name, fp_checksum, fp_size in to_download]) |
|
|
151 |
|
|
|
152 |
self.set_header('X-Archive-Files', 'zip') |
|
|
153 |
self.write("%s\n" % all_files) |
|
|
154 |
|
|
|
155 |
def _set_nginx_headers(self, fname): |
|
|
156 |
"""Sets commong nginx headers |
|
|
157 |
|
|
|
158 |
Parameters |
|
|
159 |
---------- |
|
|
160 |
fname : str |
|
|
161 |
Nginx's output filename |
|
|
162 |
""" |
|
|
163 |
self.set_header('Content-Description', 'File Transfer') |
|
|
164 |
self.set_header('Expires', '0') |
|
|
165 |
self.set_header('Cache-Control', 'no-cache') |
|
|
166 |
self.set_header('Content-Disposition', |
|
|
167 |
'attachment; filename=%s' % fname) |
|
|
168 |
|
|
|
169 |
def _write_nginx_placeholder_file(self, fp): |
|
|
170 |
"""Writes nginx placeholder file in case that nginx is not set up |
|
|
171 |
|
|
|
172 |
Parameters |
|
|
173 |
---------- |
|
|
174 |
fp : str |
|
|
175 |
The path to be downloaded through nginx |
|
|
176 |
""" |
|
|
177 |
# If we don't have nginx, write a file that indicates this |
|
|
178 |
self.write("This installation of Qiita was not equipped with " |
|
|
179 |
"nginx, so it is incapable of serving files. The file " |
|
|
180 |
"you attempted to download is located at %s" % fp) |
|
|
181 |
|
|
|
182 |
|
|
|
183 |
class DownloadHandler(BaseHandlerDownload): |
|
|
184 |
@authenticated |
|
|
185 |
@coroutine |
|
|
186 |
@execute_as_transaction |
|
|
187 |
def get(self, filepath_id): |
|
|
188 |
fid = int(filepath_id) |
|
|
189 |
|
|
|
190 |
if not validate_filepath_access_by_user(self.current_user, fid): |
|
|
191 |
raise HTTPError( |
|
|
192 |
403, "%s doesn't have access to " |
|
|
193 |
"filepath_id: %s" % (self.current_user.email, str(fid))) |
|
|
194 |
|
|
|
195 |
relpath = filepath_id_to_rel_path(fid) |
|
|
196 |
fp_info = get_filepath_information(fid) |
|
|
197 |
fname = basename(relpath) |
|
|
198 |
|
|
|
199 |
if fp_info['filepath_type'] in ('directory', 'html_summary_dir'): |
|
|
200 |
# This is a directory, we need to list all the files so NGINX |
|
|
201 |
# can download all of them |
|
|
202 |
to_download = self._list_dir_files_nginx(fp_info['fullpath']) |
|
|
203 |
self._write_nginx_file_list(to_download) |
|
|
204 |
fname = '%s.zip' % fname |
|
|
205 |
else: |
|
|
206 |
self._write_nginx_placeholder_file(relpath) |
|
|
207 |
self.set_header('Content-Type', 'application/octet-stream') |
|
|
208 |
self.set_header('Content-Transfer-Encoding', 'binary') |
|
|
209 |
self.set_header('X-Accel-Redirect', '/protected/' + relpath) |
|
|
210 |
aid = filepath_id_to_object_id(fid) |
|
|
211 |
if aid is not None: |
|
|
212 |
fname = '%d_%s' % (aid, fname) |
|
|
213 |
|
|
|
214 |
self._set_nginx_headers(fname) |
|
|
215 |
self.finish() |
|
|
216 |
|
|
|
217 |
|
|
|
218 |
class DownloadStudyBIOMSHandler(BaseHandlerDownload): |
|
|
219 |
@authenticated |
|
|
220 |
@coroutine |
|
|
221 |
@execute_as_transaction |
|
|
222 |
def get(self, study_id): |
|
|
223 |
study_id = int(study_id) |
|
|
224 |
study = self._check_permissions(study_id) |
|
|
225 |
# loop over artifacts and retrieve those that we have access to |
|
|
226 |
to_download = [] |
|
|
227 |
# The user has access to the study, but we don't know if the user |
|
|
228 |
# can do whatever he wants to the study or just access the public |
|
|
229 |
# data. (1) an admin has access to all the data; (2) if the study |
|
|
230 |
# is not public, and the user has access, then it has full access |
|
|
231 |
# to the data; (3) if the study is public and the user is not the owner |
|
|
232 |
# or the study is shared with him, then the user doesn't have full |
|
|
233 |
# access to the study data |
|
|
234 |
full_access = ( |
|
|
235 |
(self.current_user.level == 'admin') | |
|
|
236 |
(study.status != 'public') | |
|
|
237 |
((self.current_user == study.owner) | |
|
|
238 |
(self.current_user in study.shared_with))) |
|
|
239 |
|
|
|
240 |
for a in study.artifacts(artifact_type='BIOM'): |
|
|
241 |
if full_access or (a.visibility == 'public' and not a.has_human): |
|
|
242 |
to_download.extend(self._list_artifact_files_nginx(a)) |
|
|
243 |
|
|
|
244 |
self._write_nginx_file_list(to_download) |
|
|
245 |
|
|
|
246 |
zip_fn = 'study_%d_%s.zip' % ( |
|
|
247 |
study_id, datetime.now().strftime('%m%d%y-%H%M%S')) |
|
|
248 |
|
|
|
249 |
self._set_nginx_headers(zip_fn) |
|
|
250 |
self.finish() |
|
|
251 |
|
|
|
252 |
|
|
|
253 |
class DownloadRelease(BaseHandlerDownload): |
|
|
254 |
@coroutine |
|
|
255 |
def get(self, extras): |
|
|
256 |
biom_metadata_release, archive_release = get_release_info() |
|
|
257 |
if extras == 'archive': |
|
|
258 |
relpath = archive_release[1] |
|
|
259 |
else: |
|
|
260 |
relpath = biom_metadata_release[1] |
|
|
261 |
|
|
|
262 |
# If we don't have nginx, write a file that indicates this |
|
|
263 |
# Note that this configuration will automatically create and download |
|
|
264 |
# ("on the fly") the zip file via the contents in all_files |
|
|
265 |
self._write_nginx_placeholder_file(relpath) |
|
|
266 |
|
|
|
267 |
self._set_nginx_headers(basename(relpath)) |
|
|
268 |
|
|
|
269 |
self.set_header('Content-Type', 'application/octet-stream') |
|
|
270 |
self.set_header('Content-Transfer-Encoding', 'binary') |
|
|
271 |
self.set_header('X-Accel-Redirect', |
|
|
272 |
f'/protected-working_dir/{relpath}') |
|
|
273 |
self.finish() |
|
|
274 |
|
|
|
275 |
|
|
|
276 |
class DownloadRawData(BaseHandlerDownload): |
|
|
277 |
@authenticated |
|
|
278 |
@coroutine |
|
|
279 |
@execute_as_transaction |
|
|
280 |
def get(self, study_id): |
|
|
281 |
study_id = int(study_id) |
|
|
282 |
study = self._check_permissions(study_id) |
|
|
283 |
user = self.current_user |
|
|
284 |
# Checking access options |
|
|
285 |
is_owner = study.has_access(user, True) |
|
|
286 |
public_raw_download = study.public_raw_download |
|
|
287 |
if not is_owner and not public_raw_download: |
|
|
288 |
raise HTTPError(405, reason="%s: %s, %s" % ( |
|
|
289 |
'No raw data access', self.current_user.email, str(study_id))) |
|
|
290 |
|
|
|
291 |
# loop over artifacts and retrieve raw data (no parents) |
|
|
292 |
to_download = [] |
|
|
293 |
for a in study.artifacts(): |
|
|
294 |
if not a.parents: |
|
|
295 |
if not is_owner and (a.visibility != 'public' or a.has_human): |
|
|
296 |
continue |
|
|
297 |
to_download.extend(self._list_artifact_files_nginx(a)) |
|
|
298 |
|
|
|
299 |
self._write_nginx_file_list(to_download) |
|
|
300 |
|
|
|
301 |
zip_fn = 'study_raw_data_%d_%s.zip' % ( |
|
|
302 |
study_id, datetime.now().strftime('%m%d%y-%H%M%S')) |
|
|
303 |
|
|
|
304 |
self._set_nginx_headers(zip_fn) |
|
|
305 |
self.finish() |
|
|
306 |
|
|
|
307 |
|
|
|
308 |
class DownloadEBISampleAccessions(BaseHandlerDownload): |
|
|
309 |
@authenticated |
|
|
310 |
@coroutine |
|
|
311 |
@execute_as_transaction |
|
|
312 |
def get(self, study_id): |
|
|
313 |
sid = int(study_id) |
|
|
314 |
self._check_permissions(sid) |
|
|
315 |
|
|
|
316 |
self._generate_files( |
|
|
317 |
'sample_accession', SampleTemplate(sid).ebi_sample_accessions, |
|
|
318 |
'ebi_sample_accessions_study_%s.tsv' % sid) |
|
|
319 |
|
|
|
320 |
|
|
|
321 |
class DownloadEBIPrepAccessions(BaseHandlerDownload): |
|
|
322 |
@authenticated |
|
|
323 |
@coroutine |
|
|
324 |
@execute_as_transaction |
|
|
325 |
def get(self, prep_template_id): |
|
|
326 |
pid = int(prep_template_id) |
|
|
327 |
pt = PrepTemplate(pid) |
|
|
328 |
sid = pt.study_id |
|
|
329 |
|
|
|
330 |
self._check_permissions(sid) |
|
|
331 |
|
|
|
332 |
self._generate_files( |
|
|
333 |
'experiment_accession', pt.ebi_experiment_accessions, |
|
|
334 |
'ebi_experiment_accessions_study_%s_prep_%s.tsv' % (sid, pid)) |
|
|
335 |
|
|
|
336 |
|
|
|
337 |
class DownloadSampleInfoPerPrep(BaseHandlerDownload): |
|
|
338 |
@authenticated |
|
|
339 |
@coroutine |
|
|
340 |
@execute_as_transaction |
|
|
341 |
def get(self, prep_template_id): |
|
|
342 |
pid = int(prep_template_id) |
|
|
343 |
pt = PrepTemplate(pid) |
|
|
344 |
sid = pt.study_id |
|
|
345 |
|
|
|
346 |
self._check_permissions(sid) |
|
|
347 |
|
|
|
348 |
st = SampleTemplate(sid) |
|
|
349 |
|
|
|
350 |
text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t') |
|
|
351 |
|
|
|
352 |
self._finish_generate_files( |
|
|
353 |
'sample_information_from_prep_%s.tsv' % pid, text) |
|
|
354 |
|
|
|
355 |
|
|
|
356 |
class DownloadUpload(BaseHandlerDownload): |
|
|
357 |
@authenticated |
|
|
358 |
@coroutine |
|
|
359 |
@execute_as_transaction |
|
|
360 |
def get(self, path): |
|
|
361 |
user = self.current_user |
|
|
362 |
if user.level != 'admin': |
|
|
363 |
raise HTTPError(403, reason="%s doesn't have access to download " |
|
|
364 |
"uploaded files" % user.email) |
|
|
365 |
|
|
|
366 |
# [0] because it returns a list |
|
|
367 |
# [1] we only need the filepath |
|
|
368 |
filepath = get_mountpoint("uploads")[0][1][ |
|
|
369 |
len(get_db_files_base_dir()):] |
|
|
370 |
relpath = join(filepath, path) |
|
|
371 |
|
|
|
372 |
self._write_nginx_placeholder_file(relpath) |
|
|
373 |
self.set_header('Content-Type', 'application/octet-stream') |
|
|
374 |
self.set_header('Content-Transfer-Encoding', 'binary') |
|
|
375 |
self.set_header('X-Accel-Redirect', '/protected/' + relpath) |
|
|
376 |
self._set_nginx_headers(basename(relpath)) |
|
|
377 |
self.finish() |
|
|
378 |
|
|
|
379 |
|
|
|
380 |
class DownloadDataReleaseFromPrep(BaseHandlerDownload): |
|
|
381 |
@authenticated |
|
|
382 |
@coroutine |
|
|
383 |
@execute_as_transaction |
|
|
384 |
def get(self, prep_template_id): |
|
|
385 |
""" This method constructs an on the fly ZIP with all the files |
|
|
386 |
required for a data-prep release/data-delivery. Mainly sample, prep |
|
|
387 |
info, bioms and coverage |
|
|
388 |
""" |
|
|
389 |
user = self.current_user |
|
|
390 |
if user.level not in ('admin', 'web-lab admin'): |
|
|
391 |
raise HTTPError(403, reason="%s doesn't have access to download " |
|
|
392 |
"the data release files" % user.email) |
|
|
393 |
|
|
|
394 |
pid = int(prep_template_id) |
|
|
395 |
pt = PrepTemplate(pid) |
|
|
396 |
sid = pt.study_id |
|
|
397 |
st = SampleTemplate(sid) |
|
|
398 |
date = datetime.now().strftime('%m%d%y-%H%M%S') |
|
|
399 |
td = mkdtemp(dir=get_work_base_dir()) |
|
|
400 |
|
|
|
401 |
files = [] |
|
|
402 |
readme = [ |
|
|
403 |
f'Delivery created on {date}', |
|
|
404 |
'', |
|
|
405 |
f'Host (human) removal: {pt.artifact.human_reads_filter_method}', |
|
|
406 |
'', |
|
|
407 |
# this is not changing in the near future so just leaving |
|
|
408 |
# hardcoded for now |
|
|
409 |
'Main woltka reference: WoLr2, more info visit: ' |
|
|
410 |
'https://ftp.microbio.me/pub/wol2/', |
|
|
411 |
'', |
|
|
412 |
f"Qiita's prep: https://qiita.ucsd.edu/study/description/{sid}" |
|
|
413 |
f"?prep_id={pid}", |
|
|
414 |
'', |
|
|
415 |
] |
|
|
416 |
|
|
|
417 |
# helper dict to add "user/human" friendly names to the bioms |
|
|
418 |
human_names = { |
|
|
419 |
'ec.biom': 'KEGG Enzyme (EC)', |
|
|
420 |
'per-gene.biom': 'Per gene Predictions', |
|
|
421 |
'none.biom': 'Per genome Predictions', |
|
|
422 |
'cell_counts.biom': 'Cell counts', |
|
|
423 |
'pathway.biom': 'KEGG Pathway', |
|
|
424 |
'ko.biom': 'KEGG Ontology (KO)', |
|
|
425 |
'rna_copy_counts.biom': 'RNA copy counts' |
|
|
426 |
} |
|
|
427 |
|
|
|
428 |
# sample-info creation |
|
|
429 |
fn = join(td, f'sample_information_from_prep_{pid}.tsv') |
|
|
430 |
readme.append(f'Sample information: {basename(fn)}') |
|
|
431 |
files.append([fn, basename(fn)]) |
|
|
432 |
st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t') |
|
|
433 |
|
|
|
434 |
# prep-info creation |
|
|
435 |
fn = join(td, f'prep_information_{pid}.tsv') |
|
|
436 |
readme.append(f'Prep information: {basename(fn)}') |
|
|
437 |
files.append([fn, basename(fn)]) |
|
|
438 |
pt.to_dataframe().to_csv(fn, sep='\t') |
|
|
439 |
|
|
|
440 |
readme.append('') |
|
|
441 |
|
|
|
442 |
# finding the bioms to be added |
|
|
443 |
bioms = dict() |
|
|
444 |
coverages = None |
|
|
445 |
for a in Study(sid).artifacts(artifact_type='BIOM'): |
|
|
446 |
if a.prep_templates[0].id != pid: |
|
|
447 |
continue |
|
|
448 |
biom = None |
|
|
449 |
for fp in a.filepaths: |
|
|
450 |
if fp['fp_type'] == 'biom': |
|
|
451 |
biom = fp |
|
|
452 |
if coverages is None and 'coverages.tgz' == basename(fp['fp']): |
|
|
453 |
coverages = fp['fp'] |
|
|
454 |
if biom is None: |
|
|
455 |
continue |
|
|
456 |
biom_fn = basename(biom['fp']) |
|
|
457 |
# there is a small but real chance that the same prep has the same |
|
|
458 |
# artifacts so using the latests |
|
|
459 |
if biom_fn not in bioms: |
|
|
460 |
bioms[biom_fn] = [a, biom] |
|
|
461 |
else: |
|
|
462 |
if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']): |
|
|
463 |
bioms[biom_fn] = [a, biom] |
|
|
464 |
|
|
|
465 |
# once we have all the bioms, we can add them to the list of zips |
|
|
466 |
# and to the readme the biom details and all the processing |
|
|
467 |
for fn, (a, fp) in bioms.items(): |
|
|
468 |
aname = basename(fp["fp"]) |
|
|
469 |
nname = f'{a.id}_{aname}' |
|
|
470 |
files.append([fp['fp'], nname]) |
|
|
471 |
|
|
|
472 |
hname = '' |
|
|
473 |
if aname in human_names: |
|
|
474 |
hname = human_names[aname] |
|
|
475 |
readme.append(f'{nname}\t{hname}') |
|
|
476 |
|
|
|
477 |
for an in set(a.ancestors.nodes()): |
|
|
478 |
p = an.processing_parameters |
|
|
479 |
if p is not None: |
|
|
480 |
c = p.command |
|
|
481 |
cn = c.name |
|
|
482 |
s = c.software |
|
|
483 |
sn = s.name |
|
|
484 |
sv = s.version |
|
|
485 |
pd = p.dump() |
|
|
486 |
readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}') |
|
|
487 |
|
|
|
488 |
# if a coverage was found, add it to the list of files |
|
|
489 |
if coverages is not None: |
|
|
490 |
fn = basename(coverages) |
|
|
491 |
readme.append(f'{fn}\tcoverage files') |
|
|
492 |
files.append([coverages, fn]) |
|
|
493 |
|
|
|
494 |
fn = join(td, 'README.txt') |
|
|
495 |
with open(fn, 'w') as fp: |
|
|
496 |
fp.write('\n'.join(readme)) |
|
|
497 |
files.append([fn, basename(fn)]) |
|
|
498 |
|
|
|
499 |
zp_fn = f'data_release_{pid}_{date}.zip' |
|
|
500 |
zp = BytesIO() |
|
|
501 |
with ZipFile(zp, 'w') as zipf: |
|
|
502 |
for fp, fn in files: |
|
|
503 |
zipf.write(fp, fn) |
|
|
504 |
|
|
|
505 |
self.set_header('Content-Type', 'application/zip') |
|
|
506 |
self.set_header("Content-Disposition", f"attachment; filename={zp_fn}") |
|
|
507 |
self.write(zp.getvalue()) |
|
|
508 |
zp.close() |
|
|
509 |
self.finish() |
|
|
510 |
|
|
|
511 |
|
|
|
512 |
class DownloadPublicHandler(BaseHandlerDownload): |
|
|
513 |
@coroutine |
|
|
514 |
@execute_as_transaction |
|
|
515 |
def get(self): |
|
|
516 |
data = self.get_argument("data", None) |
|
|
517 |
study_id = self.get_argument("study_id", None) |
|
|
518 |
prep_id = self.get_argument("prep_id", None) |
|
|
519 |
data_type = self.get_argument("data_type", None) |
|
|
520 |
dtypes = get_data_types().keys() |
|
|
521 |
|
|
|
522 |
templates = ['sample_information', 'prep_information'] |
|
|
523 |
valid_data = ['raw', 'biom'] + templates |
|
|
524 |
|
|
|
525 |
to_download = [] |
|
|
526 |
if data is None or (study_id is None and prep_id is None) or \ |
|
|
527 |
data not in valid_data: |
|
|
528 |
raise HTTPError(422, reason='You need to specify both data (the ' |
|
|
529 |
'data type you want to download - %s) and ' |
|
|
530 |
'study_id or prep_id' % '/'.join(valid_data)) |
|
|
531 |
elif data_type is not None and data_type not in dtypes: |
|
|
532 |
raise HTTPError(422, reason='Not a valid data_type. Valid types ' |
|
|
533 |
'are: %s' % ', '.join(dtypes)) |
|
|
534 |
elif data in templates and prep_id is None and study_id is None: |
|
|
535 |
raise HTTPError(422, reason='If downloading a sample or ' |
|
|
536 |
'preparation file you need to define study_id or' |
|
|
537 |
' prep_id') |
|
|
538 |
elif data in templates: |
|
|
539 |
if data_type is not None: |
|
|
540 |
raise HTTPError(422, reason='If requesting an information ' |
|
|
541 |
'file you cannot specify the data_type') |
|
|
542 |
elif prep_id is not None and data == 'prep_information': |
|
|
543 |
fname = 'preparation_information_%s' % prep_id |
|
|
544 |
prep_id = int(prep_id) |
|
|
545 |
try: |
|
|
546 |
infofile = PrepTemplate(prep_id) |
|
|
547 |
except QiitaDBUnknownIDError: |
|
|
548 |
raise HTTPError( |
|
|
549 |
422, reason='Preparation information does not exist') |
|
|
550 |
elif study_id is not None and data == 'sample_information': |
|
|
551 |
fname = 'sample_information_%s' % study_id |
|
|
552 |
study_id = int(study_id) |
|
|
553 |
try: |
|
|
554 |
infofile = SampleTemplate(study_id) |
|
|
555 |
except QiitaDBUnknownIDError: |
|
|
556 |
raise HTTPError( |
|
|
557 |
422, reason='Sample information does not exist') |
|
|
558 |
else: |
|
|
559 |
raise HTTPError(422, reason='Review your parameters, not a ' |
|
|
560 |
'valid combination') |
|
|
561 |
x = retrieve_filepaths( |
|
|
562 |
infofile._filepath_table, infofile._id_column, infofile.id, |
|
|
563 |
sort='descending')[0] |
|
|
564 |
|
|
|
565 |
basedir = get_db_files_base_dir() |
|
|
566 |
basedir_len = len(basedir) + 1 |
|
|
567 |
fp = x['fp'][basedir_len:] |
|
|
568 |
to_download.append((fp, fp, '-', str(x['fp_size']))) |
|
|
569 |
self._write_nginx_file_list(to_download) |
|
|
570 |
|
|
|
571 |
zip_fn = '%s_%s.zip' % ( |
|
|
572 |
fname, datetime.now().strftime('%m%d%y-%H%M%S')) |
|
|
573 |
self._set_nginx_headers(zip_fn) |
|
|
574 |
else: |
|
|
575 |
study_id = int(study_id) |
|
|
576 |
try: |
|
|
577 |
study = Study(study_id) |
|
|
578 |
except QiitaDBUnknownIDError: |
|
|
579 |
raise HTTPError(422, reason='Study does not exist') |
|
|
580 |
else: |
|
|
581 |
public_raw_download = study.public_raw_download |
|
|
582 |
if study.status != 'public': |
|
|
583 |
raise HTTPError(404, reason='Study is not public. If this ' |
|
|
584 |
'is a mistake contact: %s' % |
|
|
585 |
qiita_config.help_email) |
|
|
586 |
elif data == 'raw' and not public_raw_download: |
|
|
587 |
raise HTTPError(422, reason='No raw data access. If this ' |
|
|
588 |
'is a mistake contact: %s' |
|
|
589 |
% qiita_config.help_email) |
|
|
590 |
else: |
|
|
591 |
# raw data |
|
|
592 |
artifacts = [a for a in study.artifacts(dtype=data_type) |
|
|
593 |
if not a.parents] |
|
|
594 |
# bioms |
|
|
595 |
if data == 'biom': |
|
|
596 |
artifacts = study.artifacts( |
|
|
597 |
dtype=data_type, artifact_type='BIOM') |
|
|
598 |
for a in artifacts: |
|
|
599 |
if a.visibility != 'public' or a.has_human: |
|
|
600 |
continue |
|
|
601 |
to_download.extend(self._list_artifact_files_nginx(a)) |
|
|
602 |
|
|
|
603 |
if not to_download: |
|
|
604 |
raise HTTPError(422, reason='Nothing to download. If ' |
|
|
605 |
'this is a mistake contact: %s' |
|
|
606 |
% qiita_config.help_email) |
|
|
607 |
else: |
|
|
608 |
self._write_nginx_file_list(to_download) |
|
|
609 |
|
|
|
610 |
zip_fn = 'study_%d_%s_%s.zip' % ( |
|
|
611 |
study_id, data, datetime.now().strftime( |
|
|
612 |
'%m%d%y-%H%M%S')) |
|
|
613 |
|
|
|
614 |
self._set_nginx_headers(zip_fn) |
|
|
615 |
|
|
|
616 |
self.finish() |
|
|
617 |
|
|
|
618 |
|
|
|
619 |
class DownloadPublicArtifactHandler(BaseHandlerDownload): |
|
|
620 |
@coroutine |
|
|
621 |
@execute_as_transaction |
|
|
622 |
def get(self): |
|
|
623 |
artifact_id = self.get_argument("artifact_id", None) |
|
|
624 |
|
|
|
625 |
if artifact_id is None: |
|
|
626 |
raise HTTPError(422, reason='You need to specify an artifact id') |
|
|
627 |
else: |
|
|
628 |
try: |
|
|
629 |
artifact = Artifact(artifact_id) |
|
|
630 |
except QiitaDBUnknownIDError: |
|
|
631 |
raise HTTPError(404, reason='Artifact does not exist') |
|
|
632 |
else: |
|
|
633 |
if artifact.visibility != 'public': |
|
|
634 |
raise HTTPError(404, reason='Artifact is not public. If ' |
|
|
635 |
'this is a mistake contact: %s' |
|
|
636 |
% qiita_config.help_email) |
|
|
637 |
elif artifact.has_human: |
|
|
638 |
raise HTTPError(404, reason='Artifact has possible human ' |
|
|
639 |
'sequences. If this is a mistake contact: ' |
|
|
640 |
'%s' % qiita_config.help_email) |
|
|
641 |
else: |
|
|
642 |
to_download = self._list_artifact_files_nginx(artifact) |
|
|
643 |
if not to_download: |
|
|
644 |
raise HTTPError(422, reason='Nothing to download. If ' |
|
|
645 |
'this is a mistake contact: %s' |
|
|
646 |
% qiita_config.help_email) |
|
|
647 |
else: |
|
|
648 |
self._write_nginx_file_list(to_download) |
|
|
649 |
|
|
|
650 |
zip_fn = 'artifact_%s_%s.zip' % ( |
|
|
651 |
artifact_id, datetime.now().strftime( |
|
|
652 |
'%m%d%y-%H%M%S')) |
|
|
653 |
|
|
|
654 |
self._set_nginx_headers(zip_fn) |
|
|
655 |
self.finish() |
|
|
656 |
|
|
|
657 |
|
|
|
658 |
class DownloadPrivateArtifactHandler(BaseHandlerDownload): |
|
|
659 |
@authenticated |
|
|
660 |
@coroutine |
|
|
661 |
@execute_as_transaction |
|
|
662 |
def post(self, artifact_id): |
|
|
663 |
# Generate a new download link: |
|
|
664 |
# 1. Build a signed jwt specifying the user and |
|
|
665 |
# the artifact they wish to download |
|
|
666 |
# 2. Write that jwt to the database keyed by its jti |
|
|
667 |
# (jwt ID/ json token identifier) |
|
|
668 |
# 3. Return the jti as a short url to be used for download |
|
|
669 |
|
|
|
670 |
user = self.current_user |
|
|
671 |
artifact = Artifact(artifact_id) |
|
|
672 |
|
|
|
673 |
# Check that user is currently allowed to access artifact, else throw |
|
|
674 |
check_artifact_access(user, artifact) |
|
|
675 |
|
|
|
676 |
# Generate a jwt id as a random uuid in base64 |
|
|
677 |
jti = b64encode(uuid4().bytes).decode("utf-8") |
|
|
678 |
# Sign a jwt allowing access |
|
|
679 |
utcnow = datetime.now(timezone.utc) |
|
|
680 |
jwt = jose_jwt.encode({ |
|
|
681 |
"artifactId": str(artifact_id), |
|
|
682 |
"perm": "download", |
|
|
683 |
"sub": str(user._id), |
|
|
684 |
"email": str(user.email), |
|
|
685 |
"iat": int(utcnow.timestamp() * 1000), |
|
|
686 |
"exp": int((utcnow + timedelta(days=7)).timestamp() * 1000), |
|
|
687 |
"jti": jti |
|
|
688 |
}, |
|
|
689 |
qiita_config.jwt_secret, |
|
|
690 |
algorithm='HS256' |
|
|
691 |
) |
|
|
692 |
|
|
|
693 |
# Save the jwt to the database |
|
|
694 |
DownloadLink.create(jwt) |
|
|
695 |
|
|
|
696 |
url = qiita_config.base_url + '/private_download/' + jti |
|
|
697 |
user_msg = "This link will expire in 7 days on: " + \ |
|
|
698 |
(utcnow + timedelta(days=7)).strftime('%Y-%m-%d') |
|
|
699 |
|
|
|
700 |
self.set_status(200) |
|
|
701 |
self.finish({"url": url, "msg": user_msg}) |
|
|
702 |
|
|
|
703 |
@coroutine |
|
|
704 |
@execute_as_transaction |
|
|
705 |
def get(self, jti): |
|
|
706 |
# Grab the jwt out of the database |
|
|
707 |
jwt = DownloadLink.get(jti) |
|
|
708 |
|
|
|
709 |
# If no jwt, error response |
|
|
710 |
if jwt is None: |
|
|
711 |
raise HTTPError( |
|
|
712 |
404, |
|
|
713 |
reason='Download Not Found. Link may have expired.') |
|
|
714 |
|
|
|
715 |
# If jwt doesn't validate, error response |
|
|
716 |
jwt_data = jose_jwt.decode(jwt, qiita_config.jwt_secret, 'HS256') |
|
|
717 |
if jwt_data is None: |
|
|
718 |
raise HTTPError(403, reason='Invalid JWT') |
|
|
719 |
|
|
|
720 |
# Triple check expiration and user permissions |
|
|
721 |
user = User(jwt_data["sub"]) |
|
|
722 |
artifact = Artifact(jwt_data["artifactId"]) |
|
|
723 |
|
|
|
724 |
utc_millis = datetime.now(timezone.utc).timestamp() * 1000 |
|
|
725 |
|
|
|
726 |
if utc_millis < jwt_data["iat"]: |
|
|
727 |
raise HTTPError(403, reason="This download link is not yet valid") |
|
|
728 |
if utc_millis > jwt_data["exp"]: |
|
|
729 |
raise HTTPError(403, reason="This download link has expired") |
|
|
730 |
if jwt_data["perm"] != "download": |
|
|
731 |
raise HTTPError(403, reason="This download link is invalid") |
|
|
732 |
|
|
|
733 |
check_artifact_access(user, artifact) |
|
|
734 |
|
|
|
735 |
# All checks out, let's give them the files then! |
|
|
736 |
to_download = self._list_artifact_files_nginx(artifact) |
|
|
737 |
if not to_download: |
|
|
738 |
raise HTTPError(422, reason='Nothing to download. If ' |
|
|
739 |
'this is a mistake contact: %s' % |
|
|
740 |
qiita_config.help_email) |
|
|
741 |
else: |
|
|
742 |
self._write_nginx_file_list(to_download) |
|
|
743 |
|
|
|
744 |
zip_fn = 'artifact_%s_%s.zip' % ( |
|
|
745 |
jwt_data["artifactId"], datetime.now().strftime( |
|
|
746 |
'%m%d%y-%H%M%S')) |
|
|
747 |
|
|
|
748 |
self._set_nginx_headers(zip_fn) |
|
|
749 |
self.finish() |