Switch to side-by-side view

--- a
+++ b/scripts/analysis_zebrafish.py
@@ -0,0 +1,120 @@
+import os, sys
+
+import pandas as pd
+from loguru import logger
+
+from pyMultiOmics.base import SingleOmicsData, MultiOmicsData
+
+sys.path.append('..')
+
+from pyMultiOmics.constants import *
+from pyMultiOmics.mapping import Mapper
+from pyMultiOmics.common import set_log_level_info
+from pyMultiOmics.analysis import *
+from pyMultiOmics.query import *
+from pyMultiOmics.pipelines import *
+
+
+DATA_FOLDER = os.path.abspath(os.path.join('..', 'notebooks', 'zebrafish_data'))
+gene_data = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_data_combined.csv'), index_col='Identifier')
+gene_design = pd.read_csv(os.path.join(DATA_FOLDER, 'gene_design.csv'), index_col='sample')
+protein_data = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_data.csv'), index_col='Uniprot')
+protein_design = pd.read_csv(os.path.join(DATA_FOLDER, 'protein_design.csv'), index_col='sample')
+compound_data = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_data_kegg.csv'), index_col='Identifier')
+compound_design = pd.read_csv(os.path.join(DATA_FOLDER, 'compound_design.csv'), index_col='sample')
+
+set_log_level_info()
+
+gene_so = SingleOmicsData(GENES, gene_data, gene_design)
+prot_so = SingleOmicsData(GENES, protein_data, protein_design)
+comp_so = SingleOmicsData(GENES, compound_data, compound_design)
+
+mo = MultiOmicsData()
+mo.add_data([gene_so, prot_so, comp_so])
+
+m = Mapper(mo, DANIO_RERIO, metabolic_pathway_only=True) \
+        .build()
+
+ap = AnalysisPipeline(mo, m)
+
+# method = INFERENCE_DESEQ
+method = INFERENCE_T_TEST
+ap.run_de(method, GENES, 'Distal', 'Proximal')
+ap.run_de(method, GENES, 'Distal', 'Middle')
+ap.run_de(method, GENES, 'Proximal', 'Middle')
+
+method = INFERENCE_T_TEST
+ap.run_de(method, PROTEINS, 'Distal', 'Proximal')
+ap.run_de(method, PROTEINS, 'Distal', 'Middle')
+ap.run_de(method, PROTEINS, 'Proximal', 'Middle')
+ap.run_de(method, COMPOUNDS, 'Distal', 'Proximal')
+ap.run_de(method, COMPOUNDS, 'Distal', 'Middle')
+ap.run_de(method, COMPOUNDS, 'Proximal', 'Middle')
+
+# Retrieve a single node
+node_id = '15366'
+res = QueryBuilder(ap) \
+        .add(Entity(node_id)) \
+        .run()
+logger.info(res)
+
+# Retrieve multiple nodes
+node_id = ['15366', 'ENSDARG00000037781', 'F1QAA7']
+res = QueryBuilder(ap) \
+        .add(Entity(node_id)) \
+        .run()
+logger.info(res)
+
+# Retrieve nodes connected to a single node
+query_id = 'F1QAA7'
+res = QueryBuilder(ap) \
+        .add(Entity(query_id)) \
+        .add(Connected()) \
+        .run()
+logger.info(res)
+
+# Retrieve top-10 significantly changing genes
+case = 'Distal'
+control = 'Proximal'
+pval = 0.05
+fc_lte = -2
+fc_gte = 2
+N = 20
+res = QueryBuilder(ap) \
+        .add(Select(GENES)) \
+        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
+        .run()
+logger.info(res)
+
+# Find the compounds that are connected to the DE genes above
+
+res = QueryBuilder(ap) \
+        .add(Select(GENES)) \
+        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
+        .add(Connected(data_type=COMPOUNDS)) \
+        .run()
+logger.info(res)
+
+# Plot some heatmap using Plotly
+
+res = QueryBuilder(ap) \
+        .add(Select(GENES)) \
+        .add(SignificantDE(case, control, pval, fc_lte=fc_lte, fc_gte=fc_gte, N=N)) \
+        .run()
+logger.info(res)
+
+data_type = GENES
+analysis = ap.get_de_analysis(data_type, case, control)
+wi = analysis.wi
+data_df, design_df = wi.data_df, wi.design_df
+
+case_group = design_df[design_df['group'] == case].index.values.tolist()
+control_group = design_df[design_df['group'] == control].index.values.tolist()
+selection = case_group + control_group
+
+heatmap_df = wi.data_df.loc[res.index.values]
+heatmap_df = heatmap_df[selection]
+heatmap_df
+
+from plotly import express as px
+px.imshow(wi._normalise_df(heatmap_df, log=True))
\ No newline at end of file