a b/qiita_pet/handlers/upload.py
1
# -----------------------------------------------------------------------------
2
# Copyright (c) 2014--, The Qiita Development Team.
3
#
4
# Distributed under the terms of the BSD 3-clause License.
5
#
6
# The full license is in the file LICENSE, distributed with this software.
7
# -----------------------------------------------------------------------------
8
9
from tornado.web import authenticated, HTTPError
10
11
from os.path import join, exists
12
from os import remove, chmod
13
from json import loads, dumps
14
15
from collections import defaultdict
16
from shutil import rmtree, move
17
18
from .util import check_access
19
from .base_handlers import BaseHandler
20
21
from qiita_core.qiita_settings import qiita_config, r_client
22
from qiita_core.util import execute_as_transaction
23
from qiita_db.util import (get_files_from_uploads_folders,
24
                           get_mountpoint, move_upload_files_to_trash)
25
from qiita_db.study import Study
26
from qiita_db.processing_job import ProcessingJob
27
from qiita_db.software import Software, Parameters
28
from qiita_db.exceptions import QiitaDBUnknownIDError
29
from qiita_db.util import create_nested_path
30
31
32
UPLOAD_STUDY_FORMAT = 'upload_study_%s'
33
34
35
class StudyUploadFileHandler(BaseHandler):
36
    @authenticated
37
    @execute_as_transaction
38
    def display_template(self, study_id, msg):
39
        """Simple function to avoid duplication of code"""
40
        study_id = int(study_id)
41
        study = Study(study_id)
42
        user = self.current_user
43
        level = 'info'
44
        message = ''
45
        remote_url = ''
46
        remote_files = []
47
        check_access(user, study, no_public=True, raise_error=True)
48
49
        job_info = r_client.get(UPLOAD_STUDY_FORMAT % study_id)
50
        if job_info:
51
            job_info = defaultdict(lambda: '', loads(job_info))
52
            job_id = job_info['job_id']
53
            job = ProcessingJob(job_id)
54
            job_status = job.status
55
            processing = job_status not in ('success', 'error')
56
            url = job.parameters.values['url']
57
            if processing:
58
                if job.command.name == 'list_remote_files':
59
                    message = 'Retrieving remote files: listing %s' % url
60
                else:
61
                    message = 'Retrieving remote files: download %s' % url
62
            elif job_status == 'error':
63
                level = 'danger'
64
                message = job.log.msg.replace('\n', '</br>')
65
                # making errors nicer for users
66
                if 'No such file' in message:
67
                    message = 'URL not valid: <i>%s</i>, please review.' % url
68
            else:
69
                remote_url = job_info['url']
70
                remote_files = job_info['files']
71
                level = job_info['alert_type']
72
                message = job_info['alert_msg'].replace('\n', '</br>')
73
74
        # getting the ontologies
75
        self.render('upload.html',
76
                    study_title=study.title, study_info=study.info,
77
                    study_id=study_id, is_admin=user.level == 'admin',
78
                    extensions=','.join(qiita_config.valid_upload_extension),
79
                    max_upload_size=qiita_config.max_upload_size, level=level,
80
                    message=message, remote_url=remote_url,
81
                    remote_files=remote_files,
82
                    files=get_files_from_uploads_folders(str(study_id)))
83
84
    @authenticated
85
    @execute_as_transaction
86
    def get(self, study_id):
87
        try:
88
            study = Study(int(study_id))
89
        except QiitaDBUnknownIDError:
90
            raise HTTPError(404, reason="Study %s does not exist" % study_id)
91
        check_access(self.current_user, study, no_public=True,
92
                     raise_error=True)
93
        self.display_template(study_id, "")
94
95
    @authenticated
96
    @execute_as_transaction
97
    def post(self, study_id):
98
        try:
99
            study = Study(int(study_id))
100
        except QiitaDBUnknownIDError:
101
            raise HTTPError(404, reason="Study %s does not exist" % study_id)
102
        check_access(self.current_user, study, no_public=True,
103
                     raise_error=True)
104
105
        files_to_move = []
106
        for v in self.get_arguments('files_to_erase', strip=True):
107
            v = v.split('-', 1)
108
            # if the file was just uploaded JS will not know which id the
109
            # current upload folder has so we need to retrieve it
110
            if v[0] == 'undefined':
111
                v[0], _ = get_mountpoint("uploads")[0]
112
113
            files_to_move.append((int(v[0]), v[1]))
114
115
        move_upload_files_to_trash(study.id, files_to_move)
116
117
        self.display_template(study_id, "")
118
119
120
class StudyUploadViaRemote(BaseHandler):
121
    @authenticated
122
    @execute_as_transaction
123
    def post(self, study_id):
124
        method = self.get_argument('remote-request-type')
125
        url = self.get_argument('inputURL')
126
        ssh_key = self.request.files['ssh-key'][0]['body']
127
        status = 'success'
128
        message = ''
129
130
        try:
131
            study = Study(int(study_id))
132
        except QiitaDBUnknownIDError:
133
            raise HTTPError(404, reason="Study %s does not exist" % study_id)
134
        check_access(
135
            self.current_user, study, no_public=True, raise_error=True)
136
137
        _, upload_folder = get_mountpoint("uploads")[0]
138
        upload_folder = join(upload_folder, study_id)
139
        ssh_key_fp = join(upload_folder, '.key.txt')
140
141
        create_nested_path(upload_folder)
142
143
        with open(ssh_key_fp, 'wb') as f:
144
            f.write(ssh_key)
145
        chmod(ssh_key_fp, 0o600)
146
147
        qiita_plugin = Software.from_name_and_version('Qiita', 'alpha')
148
        if method == 'list':
149
            cmd = qiita_plugin.get_command('list_remote_files')
150
            params = Parameters.load(cmd, values_dict={
151
                'url': url, 'private_key': ssh_key_fp, 'study_id': study_id})
152
        elif method == 'transfer':
153
            cmd = qiita_plugin.get_command('download_remote_files')
154
            params = Parameters.load(cmd, values_dict={
155
                'url': url, 'private_key': ssh_key_fp,
156
                'destination': upload_folder})
157
        else:
158
            status = 'error'
159
            message = 'Not a valid method'
160
161
        if status == 'success':
162
            job = ProcessingJob.create(self.current_user, params, True)
163
            job.submit()
164
            r_client.set(
165
                UPLOAD_STUDY_FORMAT % study_id, dumps({'job_id': job.id}))
166
167
        self.write({'status': status, 'message': message})
168
169
170
class UploadFileHandler(BaseHandler):
171
    # """ main upload class
172
    # based on
173
    # https://github.com/23/resumable.js/blob/master/samples/Backend%20on%20PHP.md
174
    # """
175
    def validate_file_extension(self, filename):
176
        """simple method to avoid duplication of code
177
178
        This validation is server side in case they can go around the client
179
        side validation
180
        """
181
        if not filename.endswith(tuple(qiita_config.valid_upload_extension)):
182
            self.set_status(415)
183
            raise HTTPError(415, reason="User %s is trying to upload %s" %
184
                            (self.current_user, str(filename)))
185
186
    @authenticated
187
    @execute_as_transaction
188
    def post(self):
189
        resumable_identifier = self.get_argument('resumableIdentifier')
190
        resumable_filename = self.get_argument('resumableFilename')
191
        resumable_chunk_number = int(self.get_argument('resumableChunkNumber'))
192
        resumable_total_chunks = int(self.get_argument('resumableTotalChunks'))
193
        study_id = self.get_argument('study_id')
194
        data = self.request.files['file'][0]['body']
195
196
        check_access(self.current_user, Study(int(study_id)),
197
                     no_public=True, raise_error=True)
198
199
        self.validate_file_extension(resumable_filename)
200
201
        _, base_fp = get_mountpoint("uploads")[0]
202
203
        # creating temporal folder for upload of the file
204
        temp_dir = join(base_fp, study_id, resumable_identifier)
205
        create_nested_path(temp_dir)
206
207
        # location of the file as it is transmitted
208
        temporary_location = join(temp_dir, resumable_filename)
209
210
        # this is the result of a failed upload
211
        if resumable_chunk_number == 1 and exists(temporary_location):
212
            remove(temporary_location)
213
214
        # append every transmitted chunk
215
        with open(temporary_location, 'ab') as tmp_file:
216
            tmp_file.write(bytes(data))
217
218
        if resumable_chunk_number == resumable_total_chunks:
219
            final_location = join(base_fp, study_id, resumable_filename)
220
221
            if exists(final_location):
222
                remove(final_location)
223
224
            move(temporary_location, final_location)
225
            rmtree(temp_dir)
226
            self.set_status(200)
227
228
    @authenticated
229
    @execute_as_transaction
230
    def get(self):
231
        """ this is the first point of entry into the upload service
232
233
        this should either set the status as 400 (error) so the file/chunk is
234
        sent via post or 200 (valid) to not send the file
235
        """
236
        study_id = self.get_argument('study_id')
237
        resumable_filename = self.get_argument('resumableFilename')
238
239
        check_access(self.current_user, Study(int(study_id)),
240
                     no_public=True, raise_error=True)
241
242
        self.validate_file_extension(resumable_filename)
243
244
        # in the original version we used to check if a chunk was already
245
        # uploaded and if it was we would send self.set_status(200). Now, as
246
        # we are not chunking by file we can simply pass the no exists
247
        # response
248
        self.set_status(400)