|
a |
|
b/tests/fig2_test.py |
|
|
1 |
import os |
|
|
2 |
import sys |
|
|
3 |
|
|
|
4 |
current_path = os.path.dirname(__file__) |
|
|
5 |
src_path = os.path.join(current_path, "..") |
|
|
6 |
sys.path.append(src_path) |
|
|
7 |
|
|
|
8 |
import pytest |
|
|
9 |
import numpy as np |
|
|
10 |
import sys |
|
|
11 |
import multivelo as mv |
|
|
12 |
import scanpy as sc |
|
|
13 |
import scvelo as scv |
|
|
14 |
|
|
|
15 |
scv.settings.verbosity = 3 |
|
|
16 |
scv.settings.presenter_view = True |
|
|
17 |
scv.set_figure_params('scvelo') |
|
|
18 |
np.set_printoptions(suppress=True) |
|
|
19 |
|
|
|
20 |
|
|
|
21 |
@pytest.fixture(scope="session") |
|
|
22 |
def result_data_2(): |
|
|
23 |
|
|
|
24 |
# read in the data |
|
|
25 |
adata_atac = sc.read("test_files/fig2_for_test.h5ad") |
|
|
26 |
|
|
|
27 |
# aggregate peaks |
|
|
28 |
adata_atac = mv.aggregate_peaks_10x(adata_atac, |
|
|
29 |
'test_files/peak_annotation.tsv', |
|
|
30 |
'test_files/feature_linkage.bedpe') |
|
|
31 |
|
|
|
32 |
return adata_atac |
|
|
33 |
|
|
|
34 |
|
|
|
35 |
# test the aggregate_peaks_10x function |
|
|
36 |
def test_agg_peaks(result_data_2): |
|
|
37 |
|
|
|
38 |
# the data indices we want to check |
|
|
39 |
indices = [0, 100000, 200000, 300000, 400000] |
|
|
40 |
|
|
|
41 |
# the results we should get |
|
|
42 |
data = [8.0, 4.0, 2.0, 2.0, 2.0] |
|
|
43 |
rows = [0, 1157, 2333, 3531, 4724] |
|
|
44 |
cols = [9, 276, 291, 78, 201] |
|
|
45 |
|
|
|
46 |
# convert the atac data into coo form |
|
|
47 |
atac_coo = result_data_2.X.tocoo() |
|
|
48 |
|
|
|
49 |
# check that there are the expected number of datapoints |
|
|
50 |
assert len(atac_coo.data) == 412887 |
|
|
51 |
|
|
|
52 |
# make sure that the data, rows, and columns all match |
|
|
53 |
for n, i in enumerate(indices): |
|
|
54 |
|
|
|
55 |
assert atac_coo.data[i] == pytest.approx(data[n]) |
|
|
56 |
assert atac_coo.row[i] == rows[n] |
|
|
57 |
assert atac_coo.col[i] == cols[n] |
|
|
58 |
|
|
|
59 |
|
|
|
60 |
def test_tfidf(result_data_2): |
|
|
61 |
|
|
|
62 |
tfidf_result = result_data_2.copy() |
|
|
63 |
|
|
|
64 |
# run tfidf |
|
|
65 |
mv.tfidf_norm(tfidf_result) |
|
|
66 |
|
|
|
67 |
# the data indices we want to check |
|
|
68 |
indices = [0, 100000, 200000, 300000, 400000] |
|
|
69 |
|
|
|
70 |
# the results we should get |
|
|
71 |
data = [66.66449, 29.424345, 85.36392, 42.239613, 26.855387] |
|
|
72 |
rows = np.array([0, 1157, 2333, 3531, 4724]) |
|
|
73 |
cols = np.array([9, 276, 291, 78, 201]) |
|
|
74 |
|
|
|
75 |
# convert the atac data into coo form |
|
|
76 |
atac_coo = tfidf_result.X.tocoo() |
|
|
77 |
|
|
|
78 |
# make sure that the length of the data array is correct |
|
|
79 |
assert len(atac_coo.data) == 412887 |
|
|
80 |
|
|
|
81 |
# make sure that the data, rows, and columns all match |
|
|
82 |
for n, i in enumerate(indices): |
|
|
83 |
|
|
|
84 |
assert atac_coo.data[i] == pytest.approx(data[n]) |
|
|
85 |
assert atac_coo.row[i] == rows[n] |
|
|
86 |
assert atac_coo.col[i] == cols[n] |
|
|
87 |
|
|
|
88 |
|
|
|
89 |
def test_smooth(result_data_2): |
|
|
90 |
|
|
|
91 |
new_result = result_data_2.copy() |
|
|
92 |
|
|
|
93 |
# load in the smoothing matrices |
|
|
94 |
nn_idx = np.loadtxt("test_files/nn_idx.txt", delimiter=',') |
|
|
95 |
nn_dist = np.loadtxt("test_files/nn_dist.txt", delimiter=',') |
|
|
96 |
|
|
|
97 |
# subset the ATAC data to make sure we can use the matrices |
|
|
98 |
atac_smooth = new_result[:nn_idx.shape[0], :] |
|
|
99 |
|
|
|
100 |
# run knn_smooth_chrom |
|
|
101 |
mv.knn_smooth_chrom(atac_smooth, nn_idx, nn_dist) |
|
|
102 |
|
|
|
103 |
# the data indices we want to check |
|
|
104 |
indices = [0, 70000, 140000, 210000, 280000] |
|
|
105 |
|
|
|
106 |
# the results we should get |
|
|
107 |
data = [8.0, 4.0, 2.0, 2.0, 2.0] |
|
|
108 |
rows = [0, 809, 1615, 2453, 3295] |
|
|
109 |
cols = [9, 327, 56, 25, 137] |
|
|
110 |
|
|
|
111 |
# convert the atac data into coo form |
|
|
112 |
atac_coo = atac_smooth.X.tocoo() |
|
|
113 |
|
|
|
114 |
# make sure that the length of the data array is correct |
|
|
115 |
assert len(atac_coo.data) == 285810 |
|
|
116 |
|
|
|
117 |
# make sure that the data, rows, and columns all match |
|
|
118 |
for n, i in enumerate(indices): |
|
|
119 |
|
|
|
120 |
assert atac_coo.data[i] == pytest.approx(data[n]) |
|
|
121 |
assert atac_coo.row[i] == rows[n] |
|
|
122 |
assert atac_coo.col[i] == cols[n] |