Switch to unified view

a b/qiita_pet/handlers/download.py
1
# -----------------------------------------------------------------------------
2
# Copyright (c) 2014--, The Qiita Development Team.
3
#
4
# Distributed under the terms of the BSD 3-clause License.
5
#
6
# The full license is in the file LICENSE, distributed with this software.
7
# -----------------------------------------------------------------------------
8
9
from tornado.web import authenticated, HTTPError
10
from tornado.gen import coroutine
11
12
from os.path import basename, getsize, join, isdir, getctime
13
from os import walk
14
15
from .base_handlers import BaseHandler
16
from qiita_pet.handlers.api_proxy.util import check_access
17
from qiita_pet.handlers.artifact_handlers.base_handlers \
18
    import check_artifact_access
19
from qiita_db.study import Study
20
from qiita_db.artifact import Artifact
21
from qiita_db.user import User
22
from qiita_db.download_link import DownloadLink
23
from qiita_db.util import (filepath_id_to_rel_path, get_db_files_base_dir,
24
                           get_filepath_information, get_mountpoint,
25
                           filepath_id_to_object_id, get_data_types,
26
                           retrieve_filepaths, get_work_base_dir)
27
from qiita_db.meta_util import validate_filepath_access_by_user
28
from qiita_db.metadata_template.sample_template import SampleTemplate
29
from qiita_db.metadata_template.prep_template import PrepTemplate
30
from qiita_db.exceptions import QiitaDBUnknownIDError
31
from qiita_core.util import execute_as_transaction, get_release_info
32
from qiita_core.qiita_settings import qiita_config
33
34
from jose import jwt as jose_jwt
35
from uuid import uuid4
36
from base64 import b64encode
37
from datetime import datetime, timedelta, timezone
38
from tempfile import mkdtemp
39
from zipfile import ZipFile
40
from io import BytesIO
41
42
43
class BaseHandlerDownload(BaseHandler):
44
    def _check_permissions(self, sid):
45
        # Check general access to study
46
        study_info = check_access(sid, self.current_user.id)
47
        if study_info:
48
            raise HTTPError(405, reason="%s: %s, %s" % (
49
                study_info['message'], self.current_user.email, sid))
50
        return Study(sid)
51
52
    def _finish_generate_files(self, filename, text):
53
        self.set_header('Content-Description', 'text/csv')
54
        self.set_header('Expires', '0')
55
        self.set_header('Cache-Control', 'no-cache')
56
        self.set_header('Content-Disposition', 'attachment; '
57
                        'filename=%s' % filename)
58
        self.write(text)
59
        self.finish()
60
61
    def _generate_files(self, header_name, accessions, filename):
62
        text = "sample_name\t%s\n%s" % (header_name, '\n'.join(
63
            ["%s\t%s" % (k, v) for k, v in accessions.items()]))
64
65
        self._finish_generate_files(filename, text)
66
67
    def _list_dir_files_nginx(self, dirpath):
68
        """Generates a nginx list of files in the given dirpath for nginx
69
70
        Parameters
71
        ----------
72
        dirpath : str
73
            Path to the directory
74
75
        Returns
76
        -------
77
        list of (str, str, str)
78
            The path information needed by nginx for each file in the
79
            directory
80
        """
81
        basedir = get_db_files_base_dir()
82
        basedir_len = len(basedir) + 1
83
        to_download = []
84
        for dp, _, fps in walk(dirpath):
85
            for fn in fps:
86
                fullpath = join(dp, fn)
87
                spath = fullpath
88
                if fullpath.startswith(basedir):
89
                    spath = fullpath[basedir_len:]
90
                to_download.append((spath, spath, '-', str(getsize(fullpath))))
91
        return to_download
92
93
    def _list_artifact_files_nginx(self, artifact):
94
        """Generates a nginx list of files for the given artifact
95
96
        Parameters
97
        ----------
98
        artifact : qiita_db.artifact.Artifact
99
            The artifact to retrieve the files
100
101
        Returns
102
        -------
103
        list of (str, str, str)
104
            The path information needed by nginx for each file in the artifact
105
        """
106
        basedir = get_db_files_base_dir()
107
        basedir_len = len(basedir) + 1
108
        to_download = []
109
        for i, x in enumerate(artifact.filepaths):
110
            # ignore if tgz as they could create problems and the
111
            # raw data is in the folder
112
            if x['fp_type'] == 'tgz':
113
                continue
114
            if isdir(x['fp']):
115
                # If we have a directory, we actually need to list all the
116
                # files from the directory so NGINX can actually download all
117
                # of them
118
                to_download.extend(self._list_dir_files_nginx(x['fp']))
119
            elif x['fp'].startswith(basedir):
120
                spath = x['fp'][basedir_len:]
121
                to_download.append(
122
                    (spath, spath, '-', str(x['fp_size'])))
123
            else:
124
                to_download.append(
125
                    (x['fp'], x['fp'], '-', str(x['fp_size'])))
126
127
        for pt in artifact.prep_templates:
128
            # the latest prep template file is always the first [0] tuple and
129
            # we need the filepath [1]
130
            pt_fp = pt.get_filepaths()
131
            if pt_fp:
132
                pt_fp = pt_fp[0][1]
133
                spt_fp = pt_fp
134
                if pt_fp.startswith(basedir):
135
                    spt_fp = pt_fp[basedir_len:]
136
                fname = 'mapping_files/%s_mapping_file.txt' % artifact.id
137
                to_download.append((spt_fp, fname, '-', str(getsize(pt_fp))))
138
        return to_download
139
140
    def _write_nginx_file_list(self, to_download):
141
        """Writes out the nginx file list
142
143
        Parameters
144
        ----------
145
        to_download : list of (str, str, str, str)
146
            The file list information
147
        """
148
        all_files = '\n'.join(
149
            ["%s %s /protected/%s %s" % (fp_checksum, fp_size, fp, fp_name)
150
             for fp, fp_name, fp_checksum, fp_size in to_download])
151
152
        self.set_header('X-Archive-Files', 'zip')
153
        self.write("%s\n" % all_files)
154
155
    def _set_nginx_headers(self, fname):
156
        """Sets commong nginx headers
157
158
        Parameters
159
        ----------
160
        fname : str
161
            Nginx's output filename
162
        """
163
        self.set_header('Content-Description', 'File Transfer')
164
        self.set_header('Expires',  '0')
165
        self.set_header('Cache-Control',  'no-cache')
166
        self.set_header('Content-Disposition',
167
                        'attachment; filename=%s' % fname)
168
169
    def _write_nginx_placeholder_file(self, fp):
170
        """Writes nginx placeholder file in case that nginx is not set up
171
172
        Parameters
173
        ----------
174
        fp : str
175
            The path to be downloaded through nginx
176
        """
177
        # If we don't have nginx, write a file that indicates this
178
        self.write("This installation of Qiita was not equipped with "
179
                   "nginx, so it is incapable of serving files. The file "
180
                   "you attempted to download is located at %s" % fp)
181
182
183
class DownloadHandler(BaseHandlerDownload):
184
    @authenticated
185
    @coroutine
186
    @execute_as_transaction
187
    def get(self, filepath_id):
188
        fid = int(filepath_id)
189
190
        if not validate_filepath_access_by_user(self.current_user, fid):
191
            raise HTTPError(
192
                403, "%s doesn't have access to "
193
                "filepath_id: %s" % (self.current_user.email, str(fid)))
194
195
        relpath = filepath_id_to_rel_path(fid)
196
        fp_info = get_filepath_information(fid)
197
        fname = basename(relpath)
198
199
        if fp_info['filepath_type'] in ('directory', 'html_summary_dir'):
200
            # This is a directory, we need to list all the files so NGINX
201
            # can download all of them
202
            to_download = self._list_dir_files_nginx(fp_info['fullpath'])
203
            self._write_nginx_file_list(to_download)
204
            fname = '%s.zip' % fname
205
        else:
206
            self._write_nginx_placeholder_file(relpath)
207
            self.set_header('Content-Type', 'application/octet-stream')
208
            self.set_header('Content-Transfer-Encoding', 'binary')
209
            self.set_header('X-Accel-Redirect', '/protected/' + relpath)
210
            aid = filepath_id_to_object_id(fid)
211
            if aid is not None:
212
                fname = '%d_%s' % (aid, fname)
213
214
        self._set_nginx_headers(fname)
215
        self.finish()
216
217
218
class DownloadStudyBIOMSHandler(BaseHandlerDownload):
219
    @authenticated
220
    @coroutine
221
    @execute_as_transaction
222
    def get(self, study_id):
223
        study_id = int(study_id)
224
        study = self._check_permissions(study_id)
225
        # loop over artifacts and retrieve those that we have access to
226
        to_download = []
227
        # The user has access to the study, but we don't know if the user
228
        # can do whatever he wants to the study or just access the public
229
        # data. (1) an admin has access to all the data; (2) if the study
230
        # is not public, and the user has access, then it has full access
231
        # to the data; (3) if the study is public and the user is not the owner
232
        # or the study is shared with him, then the user doesn't have full
233
        # access to the study data
234
        full_access = (
235
            (self.current_user.level == 'admin') |
236
            (study.status != 'public') |
237
            ((self.current_user == study.owner) |
238
             (self.current_user in study.shared_with)))
239
240
        for a in study.artifacts(artifact_type='BIOM'):
241
            if full_access or (a.visibility == 'public' and not a.has_human):
242
                to_download.extend(self._list_artifact_files_nginx(a))
243
244
        self._write_nginx_file_list(to_download)
245
246
        zip_fn = 'study_%d_%s.zip' % (
247
            study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
248
249
        self._set_nginx_headers(zip_fn)
250
        self.finish()
251
252
253
class DownloadRelease(BaseHandlerDownload):
254
    @coroutine
255
    def get(self, extras):
256
        biom_metadata_release, archive_release = get_release_info()
257
        if extras == 'archive':
258
            relpath = archive_release[1]
259
        else:
260
            relpath = biom_metadata_release[1]
261
262
        # If we don't have nginx, write a file that indicates this
263
        # Note that this configuration will automatically create and download
264
        # ("on the fly") the zip file via the contents in all_files
265
        self._write_nginx_placeholder_file(relpath)
266
267
        self._set_nginx_headers(basename(relpath))
268
269
        self.set_header('Content-Type', 'application/octet-stream')
270
        self.set_header('Content-Transfer-Encoding', 'binary')
271
        self.set_header('X-Accel-Redirect',
272
                        f'/protected-working_dir/{relpath}')
273
        self.finish()
274
275
276
class DownloadRawData(BaseHandlerDownload):
277
    @authenticated
278
    @coroutine
279
    @execute_as_transaction
280
    def get(self, study_id):
281
        study_id = int(study_id)
282
        study = self._check_permissions(study_id)
283
        user = self.current_user
284
        # Checking access options
285
        is_owner = study.has_access(user, True)
286
        public_raw_download = study.public_raw_download
287
        if not is_owner and not public_raw_download:
288
            raise HTTPError(405, reason="%s: %s, %s" % (
289
                'No raw data access', self.current_user.email, str(study_id)))
290
291
        # loop over artifacts and retrieve raw data (no parents)
292
        to_download = []
293
        for a in study.artifacts():
294
            if not a.parents:
295
                if not is_owner and (a.visibility != 'public' or a.has_human):
296
                    continue
297
                to_download.extend(self._list_artifact_files_nginx(a))
298
299
        self._write_nginx_file_list(to_download)
300
301
        zip_fn = 'study_raw_data_%d_%s.zip' % (
302
            study_id, datetime.now().strftime('%m%d%y-%H%M%S'))
303
304
        self._set_nginx_headers(zip_fn)
305
        self.finish()
306
307
308
class DownloadEBISampleAccessions(BaseHandlerDownload):
309
    @authenticated
310
    @coroutine
311
    @execute_as_transaction
312
    def get(self, study_id):
313
        sid = int(study_id)
314
        self._check_permissions(sid)
315
316
        self._generate_files(
317
            'sample_accession', SampleTemplate(sid).ebi_sample_accessions,
318
            'ebi_sample_accessions_study_%s.tsv' % sid)
319
320
321
class DownloadEBIPrepAccessions(BaseHandlerDownload):
322
    @authenticated
323
    @coroutine
324
    @execute_as_transaction
325
    def get(self, prep_template_id):
326
        pid = int(prep_template_id)
327
        pt = PrepTemplate(pid)
328
        sid = pt.study_id
329
330
        self._check_permissions(sid)
331
332
        self._generate_files(
333
            'experiment_accession', pt.ebi_experiment_accessions,
334
            'ebi_experiment_accessions_study_%s_prep_%s.tsv' % (sid, pid))
335
336
337
class DownloadSampleInfoPerPrep(BaseHandlerDownload):
338
    @authenticated
339
    @coroutine
340
    @execute_as_transaction
341
    def get(self, prep_template_id):
342
        pid = int(prep_template_id)
343
        pt = PrepTemplate(pid)
344
        sid = pt.study_id
345
346
        self._check_permissions(sid)
347
348
        st = SampleTemplate(sid)
349
350
        text = st.to_dataframe(samples=list(pt)).to_csv(None, sep='\t')
351
352
        self._finish_generate_files(
353
            'sample_information_from_prep_%s.tsv' % pid, text)
354
355
356
class DownloadUpload(BaseHandlerDownload):
357
    @authenticated
358
    @coroutine
359
    @execute_as_transaction
360
    def get(self, path):
361
        user = self.current_user
362
        if user.level != 'admin':
363
            raise HTTPError(403, reason="%s doesn't have access to download "
364
                            "uploaded files" % user.email)
365
366
        # [0] because it returns a list
367
        # [1] we only need the filepath
368
        filepath = get_mountpoint("uploads")[0][1][
369
            len(get_db_files_base_dir()):]
370
        relpath = join(filepath, path)
371
372
        self._write_nginx_placeholder_file(relpath)
373
        self.set_header('Content-Type', 'application/octet-stream')
374
        self.set_header('Content-Transfer-Encoding', 'binary')
375
        self.set_header('X-Accel-Redirect', '/protected/' + relpath)
376
        self._set_nginx_headers(basename(relpath))
377
        self.finish()
378
379
380
class DownloadDataReleaseFromPrep(BaseHandlerDownload):
381
    @authenticated
382
    @coroutine
383
    @execute_as_transaction
384
    def get(self, prep_template_id):
385
        """ This method constructs an on the fly ZIP with all the files
386
            required for a data-prep release/data-delivery. Mainly sample, prep
387
            info, bioms and coverage
388
        """
389
        user = self.current_user
390
        if user.level not in ('admin', 'web-lab admin'):
391
            raise HTTPError(403, reason="%s doesn't have access to download "
392
                            "the data release files" % user.email)
393
394
        pid = int(prep_template_id)
395
        pt = PrepTemplate(pid)
396
        sid = pt.study_id
397
        st = SampleTemplate(sid)
398
        date = datetime.now().strftime('%m%d%y-%H%M%S')
399
        td = mkdtemp(dir=get_work_base_dir())
400
401
        files = []
402
        readme = [
403
            f'Delivery created on {date}',
404
            '',
405
            f'Host (human) removal: {pt.artifact.human_reads_filter_method}',
406
            '',
407
            # this is not changing in the near future so just leaving
408
            # hardcoded for now
409
            'Main woltka reference: WoLr2, more info visit: '
410
            'https://ftp.microbio.me/pub/wol2/',
411
            '',
412
            f"Qiita's prep: https://qiita.ucsd.edu/study/description/{sid}"
413
            f"?prep_id={pid}",
414
            '',
415
        ]
416
417
        # helper dict to add "user/human" friendly names to the bioms
418
        human_names = {
419
            'ec.biom': 'KEGG Enzyme (EC)',
420
            'per-gene.biom': 'Per gene Predictions',
421
            'none.biom': 'Per genome Predictions',
422
            'cell_counts.biom': 'Cell counts',
423
            'pathway.biom': 'KEGG Pathway',
424
            'ko.biom': 'KEGG Ontology (KO)',
425
            'rna_copy_counts.biom': 'RNA copy counts'
426
        }
427
428
        # sample-info creation
429
        fn = join(td, f'sample_information_from_prep_{pid}.tsv')
430
        readme.append(f'Sample information: {basename(fn)}')
431
        files.append([fn, basename(fn)])
432
        st.to_dataframe(samples=list(pt)).to_csv(fn, sep='\t')
433
434
        # prep-info creation
435
        fn = join(td, f'prep_information_{pid}.tsv')
436
        readme.append(f'Prep information: {basename(fn)}')
437
        files.append([fn, basename(fn)])
438
        pt.to_dataframe().to_csv(fn, sep='\t')
439
440
        readme.append('')
441
442
        # finding the bioms to be added
443
        bioms = dict()
444
        coverages = None
445
        for a in Study(sid).artifacts(artifact_type='BIOM'):
446
            if a.prep_templates[0].id != pid:
447
                continue
448
            biom = None
449
            for fp in a.filepaths:
450
                if fp['fp_type'] == 'biom':
451
                    biom = fp
452
                if coverages is None and 'coverages.tgz' == basename(fp['fp']):
453
                    coverages = fp['fp']
454
            if biom is None:
455
                continue
456
            biom_fn = basename(biom['fp'])
457
            # there is a small but real chance that the same prep has the same
458
            # artifacts so using the latests
459
            if biom_fn not in bioms:
460
                bioms[biom_fn] = [a, biom]
461
            else:
462
                if getctime(biom['fp']) > getctime(bioms[biom_fn][1]['fp']):
463
                    bioms[biom_fn] = [a, biom]
464
465
        # once we have all the bioms, we can add them to the list of zips
466
        # and to the readme the biom details and all the processing
467
        for fn, (a, fp) in bioms.items():
468
            aname = basename(fp["fp"])
469
            nname = f'{a.id}_{aname}'
470
            files.append([fp['fp'], nname])
471
472
            hname = ''
473
            if aname in human_names:
474
                hname = human_names[aname]
475
            readme.append(f'{nname}\t{hname}')
476
477
            for an in set(a.ancestors.nodes()):
478
                p = an.processing_parameters
479
                if p is not None:
480
                    c = p.command
481
                    cn = c.name
482
                    s = c.software
483
                    sn = s.name
484
                    sv = s.version
485
                    pd = p.dump()
486
                    readme.append(f'\t{cn}\t{sn}\t{sv}\t{pd}')
487
488
        # if a coverage was found, add it to the list of files
489
        if coverages is not None:
490
            fn = basename(coverages)
491
            readme.append(f'{fn}\tcoverage files')
492
            files.append([coverages, fn])
493
494
        fn = join(td, 'README.txt')
495
        with open(fn, 'w') as fp:
496
            fp.write('\n'.join(readme))
497
        files.append([fn, basename(fn)])
498
499
        zp_fn = f'data_release_{pid}_{date}.zip'
500
        zp = BytesIO()
501
        with ZipFile(zp, 'w') as zipf:
502
            for fp, fn in files:
503
                zipf.write(fp, fn)
504
505
        self.set_header('Content-Type', 'application/zip')
506
        self.set_header("Content-Disposition", f"attachment; filename={zp_fn}")
507
        self.write(zp.getvalue())
508
        zp.close()
509
        self.finish()
510
511
512
class DownloadPublicHandler(BaseHandlerDownload):
513
    @coroutine
514
    @execute_as_transaction
515
    def get(self):
516
        data = self.get_argument("data", None)
517
        study_id = self.get_argument("study_id",  None)
518
        prep_id = self.get_argument("prep_id",  None)
519
        data_type = self.get_argument("data_type",  None)
520
        dtypes = get_data_types().keys()
521
522
        templates = ['sample_information', 'prep_information']
523
        valid_data = ['raw', 'biom'] + templates
524
525
        to_download = []
526
        if data is None or (study_id is None and prep_id is None) or \
527
                data not in valid_data:
528
            raise HTTPError(422, reason='You need to specify both data (the '
529
                            'data type you want to download - %s) and '
530
                            'study_id or prep_id' % '/'.join(valid_data))
531
        elif data_type is not None and data_type not in dtypes:
532
            raise HTTPError(422, reason='Not a valid data_type. Valid types '
533
                            'are: %s' % ', '.join(dtypes))
534
        elif data in templates and prep_id is None and study_id is None:
535
            raise HTTPError(422, reason='If downloading a sample or '
536
                            'preparation file you need to define study_id or'
537
                            ' prep_id')
538
        elif data in templates:
539
            if data_type is not None:
540
                raise HTTPError(422, reason='If requesting an information '
541
                                'file you cannot specify the data_type')
542
            elif prep_id is not None and data == 'prep_information':
543
                fname = 'preparation_information_%s' % prep_id
544
                prep_id = int(prep_id)
545
                try:
546
                    infofile = PrepTemplate(prep_id)
547
                except QiitaDBUnknownIDError:
548
                    raise HTTPError(
549
                        422, reason='Preparation information does not exist')
550
            elif study_id is not None and data == 'sample_information':
551
                fname = 'sample_information_%s' % study_id
552
                study_id = int(study_id)
553
                try:
554
                    infofile = SampleTemplate(study_id)
555
                except QiitaDBUnknownIDError:
556
                    raise HTTPError(
557
                        422, reason='Sample information does not exist')
558
            else:
559
                raise HTTPError(422, reason='Review your parameters, not a '
560
                                'valid combination')
561
            x = retrieve_filepaths(
562
                infofile._filepath_table, infofile._id_column, infofile.id,
563
                sort='descending')[0]
564
565
            basedir = get_db_files_base_dir()
566
            basedir_len = len(basedir) + 1
567
            fp = x['fp'][basedir_len:]
568
            to_download.append((fp, fp, '-', str(x['fp_size'])))
569
            self._write_nginx_file_list(to_download)
570
571
            zip_fn = '%s_%s.zip' % (
572
                fname, datetime.now().strftime('%m%d%y-%H%M%S'))
573
            self._set_nginx_headers(zip_fn)
574
        else:
575
            study_id = int(study_id)
576
            try:
577
                study = Study(study_id)
578
            except QiitaDBUnknownIDError:
579
                raise HTTPError(422, reason='Study does not exist')
580
            else:
581
                public_raw_download = study.public_raw_download
582
                if study.status != 'public':
583
                    raise HTTPError(404, reason='Study is not public. If this '
584
                                    'is a mistake contact: %s' %
585
                                    qiita_config.help_email)
586
                elif data == 'raw' and not public_raw_download:
587
                    raise HTTPError(422, reason='No raw data access. If this '
588
                                    'is a mistake contact: %s'
589
                                    % qiita_config.help_email)
590
                else:
591
                    # raw data
592
                    artifacts = [a for a in study.artifacts(dtype=data_type)
593
                                 if not a.parents]
594
                    # bioms
595
                    if data == 'biom':
596
                        artifacts = study.artifacts(
597
                            dtype=data_type, artifact_type='BIOM')
598
                    for a in artifacts:
599
                        if a.visibility != 'public' or a.has_human:
600
                            continue
601
                        to_download.extend(self._list_artifact_files_nginx(a))
602
603
                if not to_download:
604
                    raise HTTPError(422, reason='Nothing to download. If '
605
                                    'this is a mistake contact: %s'
606
                                    % qiita_config.help_email)
607
                else:
608
                    self._write_nginx_file_list(to_download)
609
610
                    zip_fn = 'study_%d_%s_%s.zip' % (
611
                        study_id, data, datetime.now().strftime(
612
                            '%m%d%y-%H%M%S'))
613
614
                    self._set_nginx_headers(zip_fn)
615
616
        self.finish()
617
618
619
class DownloadPublicArtifactHandler(BaseHandlerDownload):
620
    @coroutine
621
    @execute_as_transaction
622
    def get(self):
623
        artifact_id = self.get_argument("artifact_id", None)
624
625
        if artifact_id is None:
626
            raise HTTPError(422, reason='You need to specify an artifact id')
627
        else:
628
            try:
629
                artifact = Artifact(artifact_id)
630
            except QiitaDBUnknownIDError:
631
                raise HTTPError(404, reason='Artifact does not exist')
632
            else:
633
                if artifact.visibility != 'public':
634
                    raise HTTPError(404, reason='Artifact is not public. If '
635
                                    'this is a mistake contact: %s'
636
                                    % qiita_config.help_email)
637
                elif artifact.has_human:
638
                    raise HTTPError(404, reason='Artifact has possible human '
639
                                    'sequences. If this is a mistake contact: '
640
                                    '%s' % qiita_config.help_email)
641
                else:
642
                    to_download = self._list_artifact_files_nginx(artifact)
643
                    if not to_download:
644
                        raise HTTPError(422, reason='Nothing to download. If '
645
                                        'this is a mistake contact: %s'
646
                                        % qiita_config.help_email)
647
                    else:
648
                        self._write_nginx_file_list(to_download)
649
650
                        zip_fn = 'artifact_%s_%s.zip' % (
651
                            artifact_id, datetime.now().strftime(
652
                                '%m%d%y-%H%M%S'))
653
654
                        self._set_nginx_headers(zip_fn)
655
        self.finish()
656
657
658
class DownloadPrivateArtifactHandler(BaseHandlerDownload):
659
    @authenticated
660
    @coroutine
661
    @execute_as_transaction
662
    def post(self, artifact_id):
663
        # Generate a new download link:
664
        #   1. Build a signed jwt specifying the user and
665
        #      the artifact they wish to download
666
        #   2. Write that jwt to the database keyed by its jti
667
        #      (jwt ID/ json token identifier)
668
        #   3. Return the jti as a short url to be used for download
669
670
        user = self.current_user
671
        artifact = Artifact(artifact_id)
672
673
        # Check that user is currently allowed to access artifact, else throw
674
        check_artifact_access(user, artifact)
675
676
        # Generate a jwt id as a random uuid in base64
677
        jti = b64encode(uuid4().bytes).decode("utf-8")
678
        # Sign a jwt allowing access
679
        utcnow = datetime.now(timezone.utc)
680
        jwt = jose_jwt.encode({
681
                "artifactId": str(artifact_id),
682
                "perm": "download",
683
                "sub": str(user._id),
684
                "email": str(user.email),
685
                "iat": int(utcnow.timestamp() * 1000),
686
                "exp": int((utcnow + timedelta(days=7)).timestamp() * 1000),
687
                "jti": jti
688
            },
689
            qiita_config.jwt_secret,
690
            algorithm='HS256'
691
        )
692
693
        # Save the jwt to the database
694
        DownloadLink.create(jwt)
695
696
        url = qiita_config.base_url + '/private_download/' + jti
697
        user_msg = "This link will expire in 7 days on: " + \
698
                   (utcnow + timedelta(days=7)).strftime('%Y-%m-%d')
699
700
        self.set_status(200)
701
        self.finish({"url": url, "msg": user_msg})
702
703
    @coroutine
704
    @execute_as_transaction
705
    def get(self, jti):
706
        # Grab the jwt out of the database
707
        jwt = DownloadLink.get(jti)
708
709
        # If no jwt, error response
710
        if jwt is None:
711
            raise HTTPError(
712
                404,
713
                reason='Download Not Found.  Link may have expired.')
714
715
        # If jwt doesn't validate, error response
716
        jwt_data = jose_jwt.decode(jwt, qiita_config.jwt_secret, 'HS256')
717
        if jwt_data is None:
718
            raise HTTPError(403, reason='Invalid JWT')
719
720
        # Triple check expiration and user permissions
721
        user = User(jwt_data["sub"])
722
        artifact = Artifact(jwt_data["artifactId"])
723
724
        utc_millis = datetime.now(timezone.utc).timestamp() * 1000
725
726
        if utc_millis < jwt_data["iat"]:
727
            raise HTTPError(403, reason="This download link is not yet valid")
728
        if utc_millis > jwt_data["exp"]:
729
            raise HTTPError(403, reason="This download link has expired")
730
        if jwt_data["perm"] != "download":
731
            raise HTTPError(403, reason="This download link is invalid")
732
733
        check_artifact_access(user, artifact)
734
735
        # All checks out, let's give them the files then!
736
        to_download = self._list_artifact_files_nginx(artifact)
737
        if not to_download:
738
            raise HTTPError(422, reason='Nothing to download. If '
739
                                        'this is a mistake contact: %s' %
740
                                        qiita_config.help_email)
741
        else:
742
            self._write_nginx_file_list(to_download)
743
744
            zip_fn = 'artifact_%s_%s.zip' % (
745
                jwt_data["artifactId"], datetime.now().strftime(
746
                    '%m%d%y-%H%M%S'))
747
748
            self._set_nginx_headers(zip_fn)
749
            self.finish()