a b/openomics/database/base.py
1
import copy
2
import difflib
3
import logging
4
import os
5
import warnings
6
from abc import ABC, abstractmethod
7
from os.path import exists, join
8
from typing import Dict, Union, Any, Callable
9
from typing import List
10
from urllib.error import URLError
11
12
import dask.dataframe as dd
13
import filetype
14
import networkx as nx
15
import pandas as pd
16
import tqdm
17
import validators
18
from logzero import logger
19
from typing.io import IO
20
21
from ..io.files import get_pkg_data_filename, decompress_file
22
from ..transforms.agg import get_multi_aggregators, merge_concat
23
from ..transforms.df import drop_duplicate_columns, match_iterable_keys, has_iterables
24
25
__all__ = ['Database', 'Annotatable']
26
27
class Database(object):
28
    """This is a base class used to instantiate an external Database given a a set
29
    of files from either local files or URLs. When creating a Database class, the
30
    `load_dataframe()` function is called where the file_resources are used to
31
    load (Pandas or Dask) DataFrames, then performs data wrangling to yield a
32
    dataframe at `self.data` . This class also provides an interface for -omics
33
    tables, e.g. `ExpressionData` , to annotate various annotations,
34
    expressions, sequences, and disease associations.
35
    """
36
    data: pd.DataFrame
37
    COLUMNS_RENAME_DICT = None  # Needs initialization since subclasses may use this field to rename columns in dataframes.
38
39
    def __init__(self, path: str, file_resources: Dict = None, index_col=None, keys=None, usecols=None,
40
                 col_rename: Dict[str, str] = None, blocksize: int = None, write_uncompressed: bool = False,
41
                 verbose=False,
42
                 **kwargs):
43
        """
44
        Args:
45
            path:
46
                The folder or url path containing the data file resources. If
47
                url path, the files will be downloaded and cached to the user's
48
                home folder (at ~/.astropy/).
49
            file_resources:
50
                Used to list required files for preprocessing of the
51
                database. A dictionary where keys are required filenames and
52
                value are file paths. If None, then the class constructor should
53
                automatically build the required file resources dict.
54
            index_col: str of column name, default None.
55
                 If provided, then set_index() the dataframe at self.data by this
56
                 column name.
57
            keys: a pd.Index or a List of str
58
                If provided, then filter the rows in self.data with `index_col`
59
                containing these values.
60
            col_rename (dict): default None,
61
                A dictionary to rename columns in the data table. If None, then
62
                automatically load defaults.
63
            usecols (list, optional): list of strings, default None.
64
                If provided when loading the dataframes, only use a subset of these
65
                columns, otherwise load all columns.
66
            blocksize (int): str, int or None, optional. Default None to
67
                Number of bytes by which to cut up larger files. Default value
68
                is computed based on available physical memory and the number
69
                of cores, up to a maximum of 64MB. Can be a number like 64000000
70
                or a string like "64MB". If None, a single block is used for
71
                each file.
72
            write_uncompressed (bool): default False
73
                Whether to write the uncompressed file to disk.
74
            verbose (bool): Default False.
75
        """
76
        self.data_path = path
77
        self.index_col = index_col
78
        self.keys = keys.compute() if isinstance(keys, (dd.Index, dd.Series)) else keys
79
        self.usecols = usecols
80
        self.blocksize = blocksize
81
        self.verbose = verbose
82
83
        self.file_resources = self.load_file_resources(path, file_resources=file_resources,
84
                                                       write_uncompressed=write_uncompressed, verbose=verbose)
85
86
        self.data = self.load_dataframe(self.file_resources, blocksize=blocksize)
87
        if self.data is not None and col_rename is not None:
88
            self.data = self.data.rename(columns=col_rename)
89
            if self.data.index.name in col_rename:
90
                self.data.index = self.data.index.rename(col_rename[self.data.index.name])
91
92
    def __repr__(self):
93
        out = []
94
        if hasattr(self, "data") and isinstance(self.data, (pd.DataFrame, dd.DataFrame)):
95
            out.append("{}: {} {}".format(self.name(), self.data.index.name, self.data.columns.tolist()))
96
        if hasattr(self, "network") and isinstance(self.network, nx.Graph):
97
            out.append("{} {}".format(self.network.name, str(self.network)))
98
        return "\n".join(out)
99
100
    def load_file_resources(self, base_path: str, file_resources: Dict[str, str], write_uncompressed: bool = False,
101
                            verbose=False) -> Dict[str, Any]:
102
        """For each file in file_resources, download the file if path+file is a
103
        URL or load from disk if a local path. Additionally unzip or unrar if
104
        the file is compressed.
105
106
        Args:
107
            base_path (str): The folder or url path containing the data file
108
                resources. If a url path, the files will be downloaded and
109
                cached to the user's home folder (at ~/.astropy/).
110
            file_resources (dict): default None, Used to list required files for
111
                preprocessing of the database. A dictionary where keys are
112
                required filenames and value are file paths. If None, then the
113
                class constructor should automatically build the required file
114
                resources dict.
115
            write_uncompressed (bool): default False
116
                Whether to write the uncompressed file to disk.
117
            verbose: default False
118
                Whether to show progress bar of files being loaded
119
        """
120
        file_resources_new = copy.copy(file_resources)
121
        if base_path.startswith("~"):
122
            base_path = os.path.expanduser(base_path)
123
124
        files_prog = tqdm.tqdm(file_resources.items(), desc='Loading file_resources', disable=not verbose)
125
        for name, filepath in files_prog:
126
            if filepath.startswith("~"):
127
                filepath = os.path.expanduser(filepath)
128
129
            if verbose:
130
                files_prog.set_description("Loading file_resources['{}']".format(name))
131
132
            # Remote database file URL
133
            if validators.url(filepath) or validators.url(join(base_path, filepath)):
134
                try:
135
                    filepath = get_pkg_data_filename(base_path, filepath)
136
                    filepath_ext = filetype.guess(filepath)
137
                except URLError as ue:
138
                    logger.error(f"{ue}. Skipping {filepath}")
139
                    continue
140
                except TypeError as te:
141
                    filepath_ext = None
142
143
            # Local database path
144
            elif isinstance(filepath, str) and (exists(filepath) or exists(join(base_path, filepath))):
145
                if not exists(filepath):
146
                    if exists(join(base_path, filepath)):
147
                        filepath = join(base_path, filepath)
148
                    else:
149
                        warnings.warn(f"`base_path` is a local file directory, so all file_resources must be local. "
150
                                      f"Cannot use `filepath` = {filepath} with `base_path` = {base_path}")
151
                        continue
152
                try:
153
                    filepath_ext = filetype.guess(filepath)
154
                except:
155
                    filepath_ext = None
156
157
            else:
158
                # file_path is an external file outside of `base_path`
159
                filepath_ext = None
160
161
            # Update filepath on uncompressed file
162
            file_resources_new[name] = filepath
163
164
            if filepath_ext:
165
                filestream, new_filename = decompress_file(filepath, name, file_ext=filepath_ext,
166
                                                           write_uncompressed=write_uncompressed)
167
                file_resources_new[new_filename] = filestream
168
169
        logging.info(f"{self.name()} file_resources: {file_resources_new}")
170
        return file_resources_new
171
172
    @abstractmethod
173
    def load_dataframe(self, file_resources: Dict[str, str], blocksize: int = None) -> pd.DataFrame:
174
        """Handles data preprocessing given the file_resources input, and
175
        returns a DataFrame.
176
177
        Args:
178
            file_resources (dict): A dict with keys as filenames and values as
179
                full file path.
180
            blocksize (int):
181
        """
182
183
    def close(self):
184
        # Close file readers on file resources (from decompress_file)
185
        for filename, filepath in self.file_resources.items():
186
            if isinstance(filepath, IO) or hasattr(filepath, 'close'):
187
                filepath.close()
188
189
    @classmethod
190
    def name(cls):
191
        return cls.__name__
192
193
    @staticmethod
194
    def list_databases():
195
        return DEFAULT_LIBRARIES
196
197
    def get_mapper(self, col_a: str, col_b: str) -> pd.Series:
198
        """
199
        Create a mapping between values from self.data['col_a'] to values in self.data['col_b']. If either 'col_a' or
200
        'col_b' contain list-like data, then expand the values in these lists.
201
        Args:
202
            col_a ():
203
            col_b ():
204
205
        Returns:
206
            pd.Series
207
        """
208
        df: Union[pd.DataFrame, dd.DataFrame] = self.data.reset_index()[[col_a, col_b]].dropna()
209
        if has_iterables(df[col_a]):
210
            df = df.explode(col_a).dropna()
211
        if has_iterables(df[col_b]):
212
            df = df.explode(col_b).dropna()
213
214
        mapping = df.set_index(col_a)[col_b]
215
216
        return mapping
217
218
    def get_annotations(self, on: Union[str, List[str]],
219
                        columns: List[str],
220
                        agg: str = "unique",
221
                        agg_for: Dict[str, Union[str, Callable, dd.Aggregation]] = None,
222
                        keys: pd.Index = None):
223
        """Returns the Database's DataFrame such that it's indexed by :param
224
        index:, which then applies a groupby operation and aggregates all other
225
        columns by concatenating all unique values.
226
227
        Args:
228
            on (str, list): The column name(s) of the DataFrame to group by.
229
            columns (list): a list of column names to aggregate.
230
            agg (str): Function to aggregate when there is more than one values
231
                for each index key value. E.g. ['first', 'last', 'sum', 'mean',
232
                'size', 'concat'], default 'concat'.
233
            agg_for (Dict[str, Any]): Bypass the `agg` function for certain
234
                columns with functions specified in this dict of column names
235
                and the `agg` function to aggregate for that column.
236
            keys (pd.Index): The values on the `index` column to
237
                filter before performing the groupby-agg operations.
238
239
        Returns:
240
            values: An filted-groupby-aggregated dataframe to be used for annotation.
241
        """
242
        if not set(columns).issubset(set(self.data.columns).union([self.data.index.name])):
243
            raise Exception(
244
                f"The columns argument must be a list such that it's subset of the following columns in the dataframe. "
245
                f"These columns doesn't exist in `self.data`: {list(set(columns) - set(self.data.columns.tolist()))}"
246
            )
247
        elif len(set(columns)) < len(columns):
248
            raise Exception(f"Duplicate values in `columns`: {columns}")
249
250
        # Select df columns including df. However, the `columns` list shouldn't contain the index column
251
        if on in columns:
252
            columns = [col for col in columns if col not in on]
253
254
        # All columns including `on` and `columns`
255
        select_cols = columns + ([on] if not isinstance(on, list) else on)
256
        if self.data.index.name in select_cols:
257
            # Remove self.data's index_col since we can't select index from the df
258
            index_col = select_cols.pop(select_cols.index(self.data.index.name))
259
        else:
260
            index_col = None
261
262
        if isinstance(self.data, pd.DataFrame):
263
            df = self.data.filter(select_cols, axis="columns")
264
        elif isinstance(self.data, dd.DataFrame):
265
            df = self.data[select_cols]
266
        else:
267
            raise Exception(f"{self} must have self.data as a pd.DataFrame or dd.DataFrame")
268
269
        if index_col and df.index.name != index_col:
270
            df[index_col] = self.data.index
271
272
        # Filter rows in the database if provided `keys` in the `on` column.
273
        if keys is not None:
274
            if isinstance(keys, (dd.Series, dd.Index)):
275
                keys = keys.compute()
276
277
            if not has_iterables(keys.head()):
278
                if on in df.columns:
279
                    df = df.loc[df[on].isin(keys)]
280
                elif on == df.index.name:
281
                    df = df.loc[df.index.isin(keys)]
282
283
        df = drop_duplicate_columns(df)
284
285
        # Groupby includes column that was in the index
286
        if on != df.index.name and df.index.name in columns:
287
            groupby = df.reset_index().groupby(on)
288
289
        # Groupby on index
290
        elif on == df.index.name:
291
            groupby = df.groupby(lambda x: x)
292
293
        # Groupby on other columns
294
        else:
295
            groupby = df.groupby(on)
296
297
        #  Aggregate by all columns by concatenating unique values
298
        agg_funcs = get_multi_aggregators(agg, agg_for=agg_for, use_dask=isinstance(df, dd.DataFrame))
299
        values = groupby.agg({col: agg_funcs[col] for col in columns})
300
301
        return values
302
303
    def get_expressions(self, index):
304
        """
305
        Args:
306
            index:
307
        """
308
        # TODO if index by gene, aggregate medians of transcript-level expressions
309
        return self.data.groupby(index).median()
310
311
312
313
class Annotatable(ABC):
314
    """This abstract class provides an interface for the -omics
315
    (:class:`Expression`) to annotate its genes list with the external data
316
    downloaded from various databases. The database will be imported as
317
    attributes information to the genes's annotations, or interactions between
318
    the genes.
319
    """
320
    SEQUENCE_COL = "sequence"
321
    DISEASE_ASSOCIATIONS_COL = "disease_associations"
322
323
    def get_annotations(self):
324
        if hasattr(self, "annotations"):
325
            return self.annotations
326
        else:
327
            raise Exception(
328
                "{} must run initialize_annotations() first.".format(
329
                    self.name()))
330
331
    def get_annotation_expressions(self):
332
        if hasattr(self, "annotation_expressions"):
333
            return self.annotation_expressions
334
        else:
335
            raise Exception("{} must run annotate_expressions() first.".format(
336
                self.name()))
337
338
    def init_annotations(self, index=None):
339
        """
340
        Args:
341
            index:
342
            gene_list:
343
        """
344
        if hasattr(self, 'annotations') and isinstance(self.annotations,
345
                                                       (pd.DataFrame, dd.DataFrame)) and not self.annotations.empty:
346
            warnings.warn("Cannot initialize annotations because annotations already exists.")
347
            return
348
349
        if index is None:
350
            index = self.get_genes_list()
351
352
        self.annotations: pd.DataFrame = pd.DataFrame(index=index)
353
354
    def annotate_attributes(self, database: Union[Database, pd.DataFrame], on: Union[str, List[str]],
355
                            columns: List[str], agg: str = "unique", agg_for: Dict[str, Any] = None,
356
                            fuzzy_match: bool = False, list_match=False):
357
        """Performs a left outer join between the annotation and Database's
358
        DataFrame, on the keys in `on` column. The `on` argument must be column present
359
        in both DataFrames. If there exists overlapping columns from the join,
360
        then .fillna() is used to fill NaN values in the old column with non-NaN
361
        values from the new column.
362
363
        Args:
364
            database (Database): Database which contains an dataframe.
365
            on (str): The column name which exists in both the annotations and
366
                Database dataframe to perform the join on.
367
            columns ([str]): a list of column name to join to the annotation.
368
            agg (str): Function to aggregate when there is more than one values
369
                for each index instance. E.g. ['first', 'last', 'sum', 'mean',
370
                'unique', 'concat'], default 'unique'.
371
            agg_for (Dict[str, Any]): Bypass the `agg` function for certain
372
                columns with functions specified in this dict of column names
373
                and the `agg` function to aggregate for that column.
374
            fuzzy_match (bool): default False.
375
                Whether to join the annotation by applying a fuzzy match on the
376
                string value index with difflib.get_close_matches(). It can be
377
                slow and thus should only be used sparingly.
378
            list_match (bool): default False.
379
380
        """
381
        if not hasattr(self, "annotations"):
382
            raise Exception("Must run .initialize_annotations() on, ", self.__class__.__name__, " first.")
383
384
        left_df = self.annotations
385
        if isinstance(on, str) and on in left_df.columns:
386
            keys = left_df[on]
387
        elif isinstance(on, list) and set(on).issubset(left_df.columns):
388
            keys = left_df[on]
389
        elif on == left_df.index.name:
390
            keys = left_df.index
391
        elif hasattr(left_df.index, 'names') and on in left_df.index.names:
392
            # MultiIndex
393
            keys = left_df.index.get_level_values(on)
394
        else:
395
            keys = None
396
397
        # Get grouped values from `database`
398
        if isinstance(database, (pd.DataFrame, dd.DataFrame)):
399
            df = database
400
            if on == df.index.name and on in df.columns:
401
                df.pop(on)  # Avoid ambiguous groupby col error
402
            agg_funcs = get_multi_aggregators(agg=agg, agg_for=agg_for, use_dask=isinstance(df, dd.DataFrame))
403
            if on == df.index.name or df.index.name in columns:
404
                groupby = df.reset_index().groupby(on)
405
            else:
406
                groupby = df.groupby(on)
407
            right_df = groupby.agg({col: agg_funcs[col] for col in columns})
408
        else:
409
            right_df = database.get_annotations(on, columns=columns, agg=agg, agg_for=agg_for, keys=keys)
410
411
        # Match values between `self.annotations[on]` and `values.index`.
412
        left_on = right_on = on
413
        orig_keys = left_df.index
414
        # Fuzzy match between string key values
415
        if fuzzy_match:
416
            left_on = right_df.index.map(
417
                lambda x: difflib.get_close_matches(x, self.annotations.index, n=1)[0])
418
        # Join on keys of 'list' values if they exist on either `left_keys` or `right_df.index`
419
        elif list_match:
420
            left_on, right_on = match_iterable_keys(left=keys, right=right_df.index)
421
422
        # Set whether to join on index
423
        left_index = True if isinstance(left_on, str) and left_df.index.name == left_on else False
424
        right_index = True if isinstance(right_on, str) and right_df.index.name == right_on else False
425
        if left_index:
426
            left_on = None
427
        if right_index:
428
            right_on = None
429
430
        # Performing join if `on` is already left_df's index
431
        try:
432
            if isinstance(left_df, type(right_df)) and left_index:
433
                merged = left_df.join(right_df, on=on, how="left", rsuffix="_")
434
435
            # Perform merge if `on` not index, and choose appropriate merge func depending on dask or pd DF
436
            else:
437
                if isinstance(left_df, pd.DataFrame) and isinstance(right_df, dd.DataFrame):
438
                    merged = dd.merge(left_df, right_df, how="left", left_on=left_on, left_index=left_index,
439
                                      right_on=right_on, right_index=right_index, suffixes=("", "_"))
440
                else:
441
                    merged = left_df.merge(right_df, how="left", left_on=left_on, left_index=left_index,
442
                                           right_on=right_on, right_index=right_index, suffixes=("", "_"))
443
        except Exception as e:
444
            print('left_index', left_index)
445
            print('left_on', left_on)
446
            print('right_index', right_index)
447
            print('right_on', right_on)
448
            raise e
449
450
        if list_match:
451
            if 'key_0' in merged.columns:
452
                merged = merged.drop(columns=['key_0'])
453
            # If `left_on` was a modified keys, then revert the original keys
454
            merged.index = orig_keys
455
456
        # Merge columns if the database DataFrame has overlapping columns with existing column
457
        duplicate_cols = {col: col.rstrip("_") for col in merged.columns if col.endswith("_")}
458
459
        if duplicate_cols:
460
            new_annotations = merged[list(duplicate_cols.keys())].rename(columns=duplicate_cols)
461
            logger.info(f"merging {new_annotations.columns}")
462
463
            # Combine new values with old values in overlapping columns
464
            assign_fn = {old_col: merged[old_col].combine(merged[new_col], func=merge_concat) \
465
                         for new_col, old_col in duplicate_cols.items()}
466
            merged = merged.assign(**assign_fn)
467
            # then drop duplicate columns with "_" suffix
468
            merged = merged.drop(columns=list(duplicate_cols.keys()))
469
470
        # Revert back to Pandas DF if not previously a Dask DF
471
        if isinstance(left_df, pd.DataFrame) and isinstance(merged, dd.DataFrame):
472
            merged = merged.compute()
473
474
        # Assign the new results
475
        self.annotations = merged
476
477
        return self
478
479
    def annotate_sequences(self,
480
                           database,
481
                           on: Union[str, List[str]],
482
                           agg="longest",
483
                           omic=None,
484
                           **kwargs):
485
        """Annotate a genes list (based on index) with a dictionary of
486
        <gene_name: sequence>. If multiple sequences per gene name, then perform
487
        some aggregation.
488
489
        Args:
490
            database (SequenceDatabase): The database
491
            on (str): The gene index column name.
492
            agg (str): The aggregation method, one of ["longest", "shortest", or
493
                "all"]. Default longest.
494
            omic (str): Default None. Declare the omic type to fetch sequences
495
                for.
496
            **kwargs:
497
        """
498
        if omic is None:
499
            omic = self.name()
500
501
        sequences = database.get_sequences(index=on, omic=omic, agg=agg, **kwargs)
502
503
        # Map sequences to the keys of `on` columns.
504
        if type(self.annotations.index) == pd.MultiIndex and self.annotations.index.names in on:
505
            seqs = self.annotations.index.get_level_values(on).map(sequences)
506
507
        elif self.annotations.index.name == on:
508
            seqs = self.annotations.index.map(sequences)
509
510
        elif isinstance(on, list):
511
            # Index is a multi columns
512
            seqs = pd.MultiIndex.from_frame(self.annotations.reset_index()[on]).map(sequences)
513
        else:
514
            seqs = pd.Index(self.annotations.reset_index()[on]).map(sequences)
515
516
        if isinstance(self.annotations, dd.DataFrame) and isinstance(seqs, pd.Series):
517
            seqs = dd.from_pandas(seqs, npartitions=self.annotations.npartitions)
518
519
        self.annotations = self.annotations.assign(**{Annotatable.SEQUENCE_COL: seqs})
520
521
        return self
522
523
    def annotate_expressions(self, database, index, fuzzy_match=False):
524
        """
525
526
        Args:
527
            database:
528
            index:
529
            fuzzy_match:
530
        """
531
        self.annotation_expressions = pd.DataFrame(index=self.annotations.index)
532
533
        if self.annotations.index.name == index:
534
            self.annotation_expressions = self.annotation_expressions.join(
535
                database.get_expressions(index=index))
536
        else:
537
            raise Exception(f"index argument must be one of {database.data.index}")
538
539
        return self
540
541
    def annotate_interactions(self, database, index):
542
        """
543
        Args:
544
            database (Interactions):
545
            index (str):
546
        """
547
        raise NotImplementedError("Use HeteroNetwork from `moge` package instead")
548
549
    def annotate_diseases(self, database, on: Union[str, List[str]]):
550
        """
551
        Args:
552
            database (DiseaseAssociation):
553
            on (str):
554
        """
555
        if on == self.annotations.index.name or (
556
            hasattr(self.annotations.index, 'names') and on == self.annotations.index.names):
557
            keys = self.annotations.index
558
        else:
559
            keys = self.annotations[on]
560
561
        groupby_agg = database.get_disease_assocs(index=on, )
562
563
        if isinstance(keys, (pd.DataFrame, pd.Series, pd.Index)):
564
            if isinstance(keys, pd.DataFrame):
565
                keys = pd.MultiIndex.from_frame(keys)
566
567
            values = keys.map(groupby_agg)
568
569
        elif isinstance(keys, dd.DataFrame):
570
            values = keys.apply(lambda x: groupby_agg.loc[x], axis=1, meta=pd.Series([['']]))
571
        elif isinstance(keys, dd.Series):
572
            values = keys.map(groupby_agg)
573
        else:
574
            raise Exception()
575
576
        self.annotations = self.annotations.assign(**{Annotatable.DISEASE_ASSOCIATIONS_COL: values})
577
578
    def set_index(self, new_index):
579
        """Resets :param new_index: :type new_index: str
580
581
        Args:
582
            new_index:
583
        """
584
        self.annotations[new_index].fillna(self.annotations.index.to_series(),
585
                                           axis=0,
586
                                           inplace=True)
587
        self.annotations = self.annotations.reset_index().set_index(new_index)
588
589
    def get_rename_dict(self, from_index, to_index):
590
        """
591
        Utility function used to retrieve a lookup dictionary to convert from one index to
592
        another, e.g., gene_id to gene_name, obtained from two columns in the dataframe.
593
594
        Returns
595
            Dict[str, str]: the lookup dictionary.
596
597
        Args:
598
            from_index (str): an index on the DataFrame for key
599
            to_index:
600
        """
601
        if self.annotations.index.name in [from_index, to_index]:
602
            dataframe = self.annotations.reset_index()
603
        else:
604
            dataframe = self.annotations
605
606
        dataframe = dataframe[dataframe[to_index].notnull()]
607
        return pd.Series(dataframe[to_index].values,
608
                         index=dataframe[from_index]).to_dict()
609
610
611
DEFAULT_LIBRARIES = [
612
    "10KImmunomes"
613
    "BioGRID"
614
    "CCLE"
615
    "DisGeNET"
616
    "ENSEMBL"
617
    "GENCODE"
618
    "GeneMania"
619
    "GeneOntology"
620
    "GlobalBiobankEngine"
621
    "GTEx"
622
    "HMDD_miRNAdisease"
623
    "HPRD_PPI"
624
    "HUGO_Gene_names"
625
    "HumanBodyMapLincRNAs"
626
    "IntAct"
627
    "lncBase"
628
    "LNCipedia"
629
    "LncReg"
630
    "lncRInter"
631
    "lncrna2target"
632
    "lncRNA_data_repository"
633
    "lncrnadisease"
634
    "lncRNome"
635
    "mirbase"
636
    "miRTarBase"
637
    "NHLBI_Exome_Sequencing_Project"
638
    "NONCODE"
639
    "NPInter"
640
    "PIRD"
641
    "RegNetwork"
642
    "RISE_RNA_Interactions"
643
    "RNAcentral"
644
    "StarBase_v2.0"
645
    "STRING_PPI"
646
    "TargetScan"
647
]
648