a b/tests/fig2_test.py
1
import os
2
import sys
3
4
current_path = os.path.dirname(__file__)
5
src_path = os.path.join(current_path, "..")
6
sys.path.append(src_path)
7
8
import pytest
9
import numpy as np
10
import sys
11
import multivelo as mv
12
import scanpy as sc
13
import scvelo as scv
14
15
scv.settings.verbosity = 3
16
scv.settings.presenter_view = True
17
scv.set_figure_params('scvelo')
18
np.set_printoptions(suppress=True)
19
20
21
@pytest.fixture(scope="session")
22
def result_data_2():
23
24
    # read in the data
25
    adata_atac = sc.read("test_files/fig2_for_test.h5ad")
26
27
    # aggregate peaks
28
    adata_atac = mv.aggregate_peaks_10x(adata_atac,
29
                                        'test_files/peak_annotation.tsv',
30
                                        'test_files/feature_linkage.bedpe')
31
32
    return adata_atac
33
34
35
# test the aggregate_peaks_10x function
36
def test_agg_peaks(result_data_2):
37
38
    # the data indices we want to check
39
    indices = [0, 100000, 200000, 300000, 400000]
40
41
    # the results we should get
42
    data = [8.0, 4.0, 2.0, 2.0, 2.0]
43
    rows = [0, 1157, 2333, 3531, 4724]
44
    cols = [9, 276, 291, 78, 201]
45
46
    # convert the atac data into coo form
47
    atac_coo = result_data_2.X.tocoo()
48
49
    # check that there are the expected number of datapoints
50
    assert len(atac_coo.data) == 412887
51
52
    # make sure that the data, rows, and columns all match
53
    for n, i in enumerate(indices):
54
55
        assert atac_coo.data[i] == pytest.approx(data[n])
56
        assert atac_coo.row[i] == rows[n]
57
        assert atac_coo.col[i] == cols[n]
58
59
60
def test_tfidf(result_data_2):
61
62
    tfidf_result = result_data_2.copy()
63
64
    # run tfidf
65
    mv.tfidf_norm(tfidf_result)
66
67
    # the data indices we want to check
68
    indices = [0, 100000, 200000, 300000, 400000]
69
70
    # the results we should get
71
    data = [66.66449, 29.424345, 85.36392, 42.239613, 26.855387]
72
    rows = np.array([0, 1157, 2333, 3531, 4724])
73
    cols = np.array([9, 276, 291, 78, 201])
74
75
    # convert the atac data into coo form
76
    atac_coo = tfidf_result.X.tocoo()
77
78
    # make sure that the length of the data array is correct
79
    assert len(atac_coo.data) == 412887
80
81
    # make sure that the data, rows, and columns all match
82
    for n, i in enumerate(indices):
83
84
        assert atac_coo.data[i] == pytest.approx(data[n])
85
        assert atac_coo.row[i] == rows[n]
86
        assert atac_coo.col[i] == cols[n]
87
88
89
def test_smooth(result_data_2):
90
91
    new_result = result_data_2.copy()
92
93
    # load in the smoothing matrices
94
    nn_idx = np.loadtxt("test_files/nn_idx.txt", delimiter=',')
95
    nn_dist = np.loadtxt("test_files/nn_dist.txt", delimiter=',')
96
97
    # subset the ATAC data to make sure we can use the matrices
98
    atac_smooth = new_result[:nn_idx.shape[0], :]
99
100
    # run knn_smooth_chrom
101
    mv.knn_smooth_chrom(atac_smooth, nn_idx, nn_dist)
102
103
    # the data indices we want to check
104
    indices = [0, 70000, 140000, 210000, 280000]
105
106
    # the results we should get
107
    data = [8.0, 4.0, 2.0, 2.0, 2.0]
108
    rows = [0, 809, 1615, 2453, 3295]
109
    cols = [9, 327, 56, 25, 137]
110
111
    # convert the atac data into coo form
112
    atac_coo = atac_smooth.X.tocoo()
113
114
    # make sure that the length of the data array is correct
115
    assert len(atac_coo.data) == 285810
116
117
    # make sure that the data, rows, and columns all match
118
    for n, i in enumerate(indices):
119
120
        assert atac_coo.data[i] == pytest.approx(data[n])
121
        assert atac_coo.row[i] == rows[n]
122
        assert atac_coo.col[i] == cols[n]