--- a +++ b/qiita_pet/handlers/qiita_redbiom.py @@ -0,0 +1,149 @@ +# ----------------------------------------------------------------------------- +# Copyright (c) 2014--, The Qiita Development Team. +# +# Distributed under the terms of the BSD 3-clause License. +# +# The full license is in the file LICENSE, distributed with this software. +# ----------------------------------------------------------------------------- + +from requests import ConnectionError +from collections import defaultdict +import redbiom.summarize +import redbiom.search +import redbiom._requests +import redbiom.util +import redbiom.fetch +import redbiom.admin +from tornado.gen import coroutine, Task +from tornado.web import HTTPError +from requests.exceptions import HTTPError as rHTTPError + +from qiita_core.util import execute_as_transaction +from qiita_db.util import generate_study_list_without_artifacts + +from .base_handlers import BaseHandler + + +class RedbiomPublicSearch(BaseHandler): + @execute_as_transaction + def get(self, search): + # making sure that if someone from a portal forces entry to this URI + # we go to the main portal + try: + timestamps = redbiom.admin.get_timestamps() + except (rHTTPError): + timestamps = [] + + if timestamps: + latest_release = timestamps[0] + else: + latest_release = 'Not reported' + if self.request.uri != '/redbiom/': + self.redirect('/redbiom/') + self.render('redbiom.html', latest_release=latest_release) + + def _redbiom_metadata_search(self, query, contexts): + study_artifacts = defaultdict(lambda: defaultdict(list)) + message = '' + try: + redbiom_samples = redbiom.search.metadata_full(query, False) + except ValueError: + message = ( + 'Not a valid search: "%s", your query is too small ' + '(too few letters), try a longer query' % query) + except Exception: + message = ( + 'The query ("%s") did not work and may be malformed. Please ' + 'check the search help for more information on the queries.' + % query) + if not message and redbiom_samples: + study_artifacts = defaultdict(lambda: defaultdict(list)) + for ctx in contexts: + # redbiom.fetch.data_from_samples returns a biom, which we + # will ignore, and a dict: {sample_id_in_table: original_id} + try: + # if redbiom can't find a valid sample in the context it + # will raise a ValueError: max() arg is an empty sequence + _, data = redbiom.fetch.data_from_samples( + ctx, redbiom_samples) + except ValueError: + continue + for idx in data.keys(): + sample_id, aid = idx.rsplit('.', 1) + sid = sample_id.split('.', 1)[0] + study_artifacts[sid][aid].append(sample_id) + + return message, study_artifacts + + def _redbiom_feature_search(self, query, contexts): + study_artifacts = defaultdict(lambda: defaultdict(list)) + query = [f for f in query.split(' ')] + for ctx in contexts: + for idx in redbiom.util.ids_from(query, False, 'feature', ctx): + aid, sample_id = idx.split('_', 1) + sid = sample_id.split('.', 1)[0] + study_artifacts[sid][aid].append(sample_id) + + return '', study_artifacts + + def _redbiom_taxon_search(self, query, contexts): + study_artifacts = defaultdict(lambda: defaultdict(list)) + for ctx in contexts: + # find the features with those taxonomies and then search + # those features in the samples + features = redbiom.fetch.taxon_descendents(ctx, query) + # from empirical evidence we saw that when we return more than 600 + # features we'll reach issue #2312 so avoiding saturating the + # workers and raise this error quickly + if len(features) > 600: + raise HTTPError(504) + for idx in redbiom.util.ids_from(features, False, 'feature', ctx): + aid, sample_id = idx.split('_', 1) + sid = sample_id.split('.', 1)[0] + study_artifacts[sid][aid].append(sample_id) + + return '', study_artifacts + + @execute_as_transaction + def _redbiom_search(self, query, search_on, callback): + search_f = {'metadata': self._redbiom_metadata_search, + 'feature': self._redbiom_feature_search, + 'taxon': self._redbiom_taxon_search} + + message = '' + results = [] + + try: + df = redbiom.summarize.contexts() + except ConnectionError: + message = 'Redbiom is down - contact admin, thanks!' + else: + contexts = df.ContextName.values + if search_on in search_f: + message, study_artifacts = search_f[search_on](query, contexts) + if not message: + studies = study_artifacts.keys() + if studies: + results = generate_study_list_without_artifacts( + studies) + # inserting the artifact_biom_ids to the results + for i in range(len(results)): + results[i]['artifact_biom_ids'] = study_artifacts[ + str(results[i]['study_id'])] + else: + message = "No samples were found! Try again ..." + else: + message = ('Incorrect search by: you can use metadata, ' + 'features or taxon and you passed: %s' % search_on) + + callback((results, message)) + + @coroutine + @execute_as_transaction + def post(self, search): + search = self.get_argument('search') + search_on = self.get_argument('search_on') + + data, msg = yield Task(self._redbiom_search, search, search_on) + + self.write({'status': 'success', 'message': msg, 'data': data})