[28d9d2]: / tests / test_main.py

Download this file

150 lines (115 with data), 3.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
from context import models, pl, tl, score
import mudata as md
import anndata as ad
import torch
import numpy as np
# Define some gene names (useful for enrichment analysis).
gene_names = [
"ENSG00000125877",
"ENSG00000184840",
"ENSG00000164440",
"ENSG00000177144",
"ENSG00000186815",
"ENSG00000079974",
"ENSG00000136159",
"ENSG00000177243",
"ENSG00000163932",
"ENSG00000112799",
"ENSG00000075618",
"ENSG00000092531",
"ENSG00000171408",
"ENSG00000150527",
"ENSG00000202429",
"ENSG00000140807",
"ENSG00000154589",
"ENSG00000166263",
"ENSG00000205268",
"ENSG00000115008",
]
n_cells, n_genes, n_peaks = 20, len(gene_names), 5
latent_dim = 5
# Create a random anndata object for RNA.
rna = ad.AnnData(np.random.rand(n_cells, n_genes))
rna.var["highly_variable"] = True
# Create a random anndata object for ATAC.
atac = ad.AnnData(np.random.rand(n_cells, n_peaks))
atac.var["highly_variable"] = True
# Create a MuData object combining RNA and ATAC.
mdata = md.MuData({"rna": rna, "atac": atac})
mdata.obs["rna:mod_weight"] = 0.5
mdata.obs["atac:mod_weight"] = 0.5
mdata.obs["label"] = np.random.choice(["A", "B", "C"], size=n_cells)
def test_default_params():
# Initialize the Mowgli model.
model = models.MowgliModel(
latent_dim=latent_dim,
cost_path={
"rna": "cost_rna.npy",
"atac": "cost_atac.npy",
},
)
# Train the model.
model.train(mdata)
# Check the size of the embedding.
assert mdata.obsm["W_OT"].shape == (n_cells, latent_dim)
# Check the size of the dictionaries.
assert mdata["rna"].uns["H_OT"].shape == (n_genes, latent_dim)
assert mdata["atac"].uns["H_OT"].shape == (n_peaks, latent_dim)
def test_custom_params():
# Initialize the Mowgli model.
model = models.MowgliModel(
latent_dim=latent_dim,
h_regularization={"rna": 0.1, "atac": 0.1},
use_mod_weight=True,
pca_cost=True,
cost_path={
"rna": "cost_rna.npy",
"atac": "cost_atac.npy",
},
)
model.init_parameters(
mdata,
force_recompute=True,
normalize_rows=True,
dtype=torch.float,
device="cpu",
)
# Train the model.
model.train(mdata, optim_name="adam")
# Check the size of the embedding.
assert mdata.obsm["W_OT"].shape == (n_cells, latent_dim)
# Check the size of the dictionaries.
assert mdata["rna"].uns["H_OT"].shape == (n_genes, latent_dim)
assert mdata["atac"].uns["H_OT"].shape == (n_peaks, latent_dim)
def test_plotting():
# Make a clustermap.
pl.clustermap(mdata, show=False)
# Make a violin plot.
pl.factor_violin(mdata, groupby="label", dim=0, show=False)
# Make a heatmap.
pl.heatmap(mdata, groupby="label", show=False)
def test_tools():
# Compute top genes.
tl.top_features(mdata, mod="rna", dim=0, threshold=0.2)
# Compute top peaks.
tl.top_features(mdata, mod="atac", dim=0, threshold=0.2)
# Compute enrichment.
tl.enrich(mdata, n_genes=10, ordered=False)
def test_score():
# Compute a silhouette score.
score.embedding_silhouette_score(
embedding=mdata.obsm["W_OT"],
labels=mdata.obs["label"],
metric="euclidean",
)
# Compute leiden clustering across resolutions.
score.embedding_leiden_across_resolutions(
embedding=mdata.obsm["W_OT"],
labels=mdata.obs["label"],
n_neighbors=10,
resolutions=[0.1, 0.5, 1.0],
)
# Compute a knn from the embedding.
knn = score.embedding_to_knn(embedding=mdata.obsm["W_OT"], k=15, metric="euclidean")
# Compute the knn purity score.
score.knn_purity_score(knn=knn, labels=mdata.obs["label"])