Switch to unified view

a b/scripts/analysis_zebrafish.py
1
import os, sys
2
3
import pandas as pd
4
from loguru import logger
5
6
from pyMultiOmics.base import SingleOmicsData, MultiOmicsData
7
8
sys.path.append('..')
9
10
from pyMultiOmics.constants import *
11
from pyMultiOmics.mapping import Mapper
12
from pyMultiOmics.common import set_log_level_info
13
from pyMultiOmics.analysis import *
14
from pyMultiOmics.query import *
15
from pyMultiOmics.pipelines import *
16
17
18
DATA_FOLDER = os.path.abspath(os.path.join('..', 'notebooks', 'zebrafish_data'))
19
gene_data = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_data_combined.csv'), index_col='Identifier')
20
gene_design = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_design.csv'), index_col='sample')
21
protein_data = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Uniprot')
22
protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')
23
compound_data = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data_kegg.csv'), index_col='Identifier')
24
compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')
25
26
set_log_level_info()
27
28
gene_so = SingleOmicsData(GENES, gene_data, gene_design)
29
prot_so = SingleOmicsData(GENES, protein_data, protein_design)
30
comp_so = SingleOmicsData(GENES, compound_data, compound_design)
31
32
mo = MultiOmicsData()
33
mo.add_data([gene_so, prot_so, comp_so])
34
35
m = Mapper(mo, DANIO_RERIO, metabolic_pathway_only=True) \
36
        .build()
37
38
ap = AnalysisPipeline(mo, m)
39
40
# method = INFERENCE_DESEQ
41
method = INFERENCE_T_TEST
42
ap.run_de(method, GENES, 'Distal', 'Proximal')
43
ap.run_de(method, GENES, 'Distal', 'Middle')
44
ap.run_de(method, GENES, 'Proximal', 'Middle')
45
46
method = INFERENCE_T_TEST
47
ap.run_de(method, PROTEINS, 'Distal', 'Proximal')
48
ap.run_de(method, PROTEINS, 'Distal', 'Middle')
49
ap.run_de(method, PROTEINS, 'Proximal', 'Middle')
50
ap.run_de(method, COMPOUNDS, 'Distal', 'Proximal')
51
ap.run_de(method, COMPOUNDS, 'Distal', 'Middle')
52
ap.run_de(method, COMPOUNDS, 'Proximal', 'Middle')
53
54
# Retrieve a single node
55
node_id = '15366'
56
res = QueryBuilder(ap) \
57
        .add(Entity(node_id)) \
58
        .run()
59
logger.info(res)
60
61
# Retrieve multiple nodes
62
node_id = ['15366', 'ENSDARG00000037781', 'F1QAA7']
63
res = QueryBuilder(ap) \
64
        .add(Entity(node_id)) \
65
        .run()
66
logger.info(res)
67
68
# Retrieve nodes connected to a single node
69
query_id = 'F1QAA7'
70
res = QueryBuilder(ap) \
71
        .add(Entity(query_id)) \
72
        .add(Connected()) \
73
        .run()
74
logger.info(res)
75
76
# Retrieve top-10 significantly changing genes
77
case = 'Distal'
78
control = 'Proximal'
79
pval = 0.05
80
fc_lte = -2
81
fc_gte = 2
82
N = 20
83
res = QueryBuilder(ap) \
84
        .add(Select(GENES)) \
85
        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
86
        .run()
87
logger.info(res)
88
89
# Find the compounds that are connected to the DE genes above
90
91
res = QueryBuilder(ap) \
92
        .add(Select(GENES)) \
93
        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
94
        .add(Connected(data_type=COMPOUNDS)) \
95
        .run()
96
logger.info(res)
97
98
# Plot some heatmap using Plotly
99
100
res = QueryBuilder(ap) \
101
        .add(Select(GENES)) \
102
        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
103
        .run()
104
logger.info(res)
105
106
data_type = GENES
107
analysis = ap.get_de_analysis(data_type, case, control)
108
wi = analysis.wi
109
data_df, design_df = wi.data_df, wi.design_df
110
111
case_group = design_df[design_df['group'] == case].index.values.tolist()
112
control_group = design_df[design_df['group'] == control].index.values.tolist()
113
selection = case_group + control_group
114
115
heatmap_df = wi.data_df.loc[res.index.values]
116
heatmap_df = heatmap_df[selection]
117
heatmap_df
118
119
from plotly import express as px
120
px.imshow(wi._normalise_df(heatmap_df, log=True))