[973924]: / qiita_pet / handlers / qiita_redbiom.py

Download this file

150 lines (130 with data), 6.1 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
# -----------------------------------------------------------------------------
# Copyright (c) 2014--, The Qiita Development Team.
#
# Distributed under the terms of the BSD 3-clause License.
#
# The full license is in the file LICENSE, distributed with this software.
# -----------------------------------------------------------------------------
from requests import ConnectionError
from collections import defaultdict
import redbiom.summarize
import redbiom.search
import redbiom._requests
import redbiom.util
import redbiom.fetch
import redbiom.admin
from tornado.gen import coroutine, Task
from tornado.web import HTTPError
from requests.exceptions import HTTPError as rHTTPError
from qiita_core.util import execute_as_transaction
from qiita_db.util import generate_study_list_without_artifacts
from .base_handlers import BaseHandler
class RedbiomPublicSearch(BaseHandler):
@execute_as_transaction
def get(self, search):
# making sure that if someone from a portal forces entry to this URI
# we go to the main portal
try:
timestamps = redbiom.admin.get_timestamps()
except (rHTTPError):
timestamps = []
if timestamps:
latest_release = timestamps[0]
else:
latest_release = 'Not reported'
if self.request.uri != '/redbiom/':
self.redirect('/redbiom/')
self.render('redbiom.html', latest_release=latest_release)
def _redbiom_metadata_search(self, query, contexts):
study_artifacts = defaultdict(lambda: defaultdict(list))
message = ''
try:
redbiom_samples = redbiom.search.metadata_full(query, False)
except ValueError:
message = (
'Not a valid search: "%s", your query is too small '
'(too few letters), try a longer query' % query)
except Exception:
message = (
'The query ("%s") did not work and may be malformed. Please '
'check the search help for more information on the queries.'
% query)
if not message and redbiom_samples:
study_artifacts = defaultdict(lambda: defaultdict(list))
for ctx in contexts:
# redbiom.fetch.data_from_samples returns a biom, which we
# will ignore, and a dict: {sample_id_in_table: original_id}
try:
# if redbiom can't find a valid sample in the context it
# will raise a ValueError: max() arg is an empty sequence
_, data = redbiom.fetch.data_from_samples(
ctx, redbiom_samples)
except ValueError:
continue
for idx in data.keys():
sample_id, aid = idx.rsplit('.', 1)
sid = sample_id.split('.', 1)[0]
study_artifacts[sid][aid].append(sample_id)
return message, study_artifacts
def _redbiom_feature_search(self, query, contexts):
study_artifacts = defaultdict(lambda: defaultdict(list))
query = [f for f in query.split(' ')]
for ctx in contexts:
for idx in redbiom.util.ids_from(query, False, 'feature', ctx):
aid, sample_id = idx.split('_', 1)
sid = sample_id.split('.', 1)[0]
study_artifacts[sid][aid].append(sample_id)
return '', study_artifacts
def _redbiom_taxon_search(self, query, contexts):
study_artifacts = defaultdict(lambda: defaultdict(list))
for ctx in contexts:
# find the features with those taxonomies and then search
# those features in the samples
features = redbiom.fetch.taxon_descendents(ctx, query)
# from empirical evidence we saw that when we return more than 600
# features we'll reach issue #2312 so avoiding saturating the
# workers and raise this error quickly
if len(features) > 600:
raise HTTPError(504)
for idx in redbiom.util.ids_from(features, False, 'feature', ctx):
aid, sample_id = idx.split('_', 1)
sid = sample_id.split('.', 1)[0]
study_artifacts[sid][aid].append(sample_id)
return '', study_artifacts
@execute_as_transaction
def _redbiom_search(self, query, search_on, callback):
search_f = {'metadata': self._redbiom_metadata_search,
'feature': self._redbiom_feature_search,
'taxon': self._redbiom_taxon_search}
message = ''
results = []
try:
df = redbiom.summarize.contexts()
except ConnectionError:
message = 'Redbiom is down - contact admin, thanks!'
else:
contexts = df.ContextName.values
if search_on in search_f:
message, study_artifacts = search_f[search_on](query, contexts)
if not message:
studies = study_artifacts.keys()
if studies:
results = generate_study_list_without_artifacts(
studies)
# inserting the artifact_biom_ids to the results
for i in range(len(results)):
results[i]['artifact_biom_ids'] = study_artifacts[
str(results[i]['study_id'])]
else:
message = "No samples were found! Try again ..."
else:
message = ('Incorrect search by: you can use metadata, '
'features or taxon and you passed: %s' % search_on)
callback((results, message))
@coroutine
@execute_as_transaction
def post(self, search):
search = self.get_argument('search')
search_on = self.get_argument('search_on')
data, msg = yield Task(self._redbiom_search, search, search_on)
self.write({'status': 'success', 'message': msg, 'data': data})