a b/tests/preprocessing/test_normalization.py
1
import warnings
2
from collections import OrderedDict
3
from pathlib import Path
4
5
import dask.array as da
6
import numpy as np
7
import pandas as pd
8
import pytest
9
from anndata import AnnData
10
11
import ehrapy as ep
12
from ehrapy.anndata._constants import CATEGORICAL_TAG, FEATURE_TYPE_KEY, NUMERIC_TAG
13
from ehrapy.io._read import read_csv
14
from tests.conftest import ARRAY_TYPES, TEST_DATA_PATH
15
16
CURRENT_DIR = Path(__file__).parent
17
from scipy import sparse
18
19
20
@pytest.fixture
21
def adata_mini():
22
    return read_csv(
23
        f"{TEST_DATA_PATH}/dataset1.csv",
24
        columns_obs_only=["glucose", "weight", "disease", "station"],
25
    )[:8]
26
27
28
@pytest.fixture
29
def adata_mini_integers_in_X():
30
    adata = read_csv(
31
        f"{TEST_DATA_PATH}/dataset1.csv",
32
        columns_obs_only=["idx", "sys_bp_entry", "dia_bp_entry", "glucose", "weight", "disease", "station"],
33
    )
34
    # cast data in X to integers; pd.read generates floats generously, but want to test integer normalization
35
    adata.X = adata.X.astype(np.int32)
36
    ep.ad.infer_feature_types(adata)
37
    ep.ad.replace_feature_types(adata, ["in_days"], "numeric")
38
    return adata
39
40
41
@pytest.fixture
42
def adata_to_norm():
43
    obs_data = {"ID": ["Patient1", "Patient2", "Patient3"], "Age": [31, 94, 62]}
44
45
    X_data = np.array(
46
        [
47
            [1, 3.4, -2.0, 1.0, "A string", "A different string"],
48
            [2, 5.4, 5.0, 2.0, "Silly string", "A different string"],
49
            [2, 5.7, 3.0, np.nan, "A string", "What string?"],
50
        ],
51
        dtype=np.dtype(object),
52
    )
53
    # the "ignore" tag is used to make the column being ignored; the original test selecting a few
54
    # columns induces a specific ordering which is kept for now
55
    var_data = {
56
        "Feature": [
57
            "Integer1",
58
            "Numeric1",
59
            "Numeric2",
60
            "Numeric3",
61
            "String1",
62
            "String2",
63
        ],
64
        "Type": ["Integer", "Numeric", "Numeric", "Numeric", "String", "String"],
65
        FEATURE_TYPE_KEY: [
66
            CATEGORICAL_TAG,
67
            NUMERIC_TAG,
68
            NUMERIC_TAG,
69
            "ignore",
70
            CATEGORICAL_TAG,
71
            CATEGORICAL_TAG,
72
        ],
73
    }
74
    adata = AnnData(
75
        X=X_data,
76
        obs=pd.DataFrame(data=obs_data),
77
        var=pd.DataFrame(data=var_data, index=var_data["Feature"]),
78
        uns=OrderedDict(),
79
    )
80
81
    adata = ep.pp.encode(adata, autodetect=True, encodings="label")
82
83
    return adata
84
85
86
def test_vars_checks(adata_to_norm):
87
    """Test for checks that vars argument is valid."""
88
    with pytest.raises(ValueError, match=r"Some selected vars are not numeric"):
89
        ep.pp.scale_norm(adata_to_norm, vars=["String1"])
90
91
92
# TODO: check this for each function, with just default settings?
93
@pytest.mark.parametrize(
94
    "array_type,expected_error",
95
    [
96
        (np.array, None),
97
        (da.array, None),
98
        (sparse.csr_matrix, NotImplementedError),
99
    ],
100
)
101
def test_norm_scale_array_types(adata_to_norm, array_type, expected_error):
102
    adata_to_norm.X = array_type(adata_to_norm.X)
103
    if expected_error:
104
        with pytest.raises(expected_error):
105
            ep.pp.scale_norm(adata_to_norm)
106
107
108
@pytest.mark.parametrize("array_type", [np.array, da.array])
109
def test_norm_scale(adata_to_norm, array_type):
110
    """Test for the scaling normalization method."""
111
    warnings.filterwarnings("ignore")
112
    adata_to_norm.X = array_type(adata_to_norm.X)
113
    ep.pp.scale_norm(adata_to_norm)
114
115
    adata_norm = ep.pp.scale_norm(adata_to_norm, copy=True)
116
117
    num1_norm = np.array([-1.4039999, 0.55506986, 0.84893], dtype=np.float32)
118
    num2_norm = np.array([-1.3587323, 1.0190493, 0.3396831], dtype=np.float32)
119
120
    assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
121
    assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
122
    assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
123
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
124
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
125
    assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
126
127
128
def test_norm_scale_integers(adata_mini_integers_in_X):
129
    adata_norm = ep.pp.scale_norm(adata_mini_integers_in_X, copy=True)
130
    in_days_norm = np.array(
131
        [
132
            [-0.4472136],
133
            [0.4472136],
134
            [-1.34164079],
135
            [-0.4472136],
136
            [-1.34164079],
137
            [-0.4472136],
138
            [0.4472136],
139
            [1.34164079],
140
            [2.23606798],
141
            [-0.4472136],
142
            [0.4472136],
143
            [-0.4472136],
144
        ]
145
    )
146
    assert np.allclose(adata_norm.X, in_days_norm)
147
148
149
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
150
def test_norm_scale_kwargs(array_type, adata_to_norm):
151
    adata_to_norm.X = array_type(adata_to_norm.X)
152
153
    adata_norm = ep.pp.scale_norm(adata_to_norm, copy=True, with_mean=False)
154
155
    num1_norm = np.array([3.3304186, 5.2894883, 5.5833483], dtype=np.float32)
156
    num2_norm = np.array([-0.6793662, 1.6984155, 1.0190493], dtype=np.float32)
157
158
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
159
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
160
161
162
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
163
def test_norm_scale_group(array_type, adata_mini):
164
    adata_mini_casted = adata_mini.copy()
165
    adata_mini_casted.X = array_type(adata_mini_casted.X)
166
167
    with pytest.raises(KeyError):
168
        ep.pp.scale_norm(adata_mini_casted, group_key="invalid_key", copy=True)
169
170
    adata_mini_norm = ep.pp.scale_norm(
171
        adata_mini_casted,
172
        vars=["sys_bp_entry", "dia_bp_entry"],
173
        group_key="disease",
174
        copy=True,
175
    )
176
    col1_norm = np.array(
177
        [
178
            -1.34164079,
179
            -0.4472136,
180
            0.4472136,
181
            1.34164079,
182
            -1.34164079,
183
            -0.4472136,
184
            0.4472136,
185
            1.34164079,
186
        ]
187
    )
188
    col2_norm = col1_norm
189
    assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0])
190
    assert np.allclose(adata_mini_norm.X[:, 1], col1_norm)
191
    assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
192
193
194
@pytest.mark.parametrize(
195
    "array_type,expected_error",
196
    [
197
        (np.array, None),
198
        (da.array, None),
199
        (sparse.csr_matrix, NotImplementedError),
200
    ],
201
)
202
def test_norm_minmax_array_types(adata_to_norm, array_type, expected_error):
203
    adata_to_norm.X = array_type(adata_to_norm.X)
204
    if expected_error:
205
        with pytest.raises(expected_error):
206
            ep.pp.minmax_norm(adata_to_norm)
207
208
209
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
210
def test_norm_minmax(array_type, adata_to_norm):
211
    """Test for the minmax normalization method."""
212
    adata_to_norm.X = array_type(adata_to_norm.X)
213
214
    adata_norm = ep.pp.minmax_norm(adata_to_norm, copy=True)
215
216
    num1_norm = np.array([0.0, 0.86956537, 0.9999999], dtype=np.dtype(np.float32))
217
    num2_norm = np.array([0.0, 1.0, 0.71428573], dtype=np.float32)
218
219
    assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
220
    assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
221
    assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
222
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
223
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
224
    assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
225
226
227
def test_norm_minmax_integers(adata_mini_integers_in_X):
228
    adata_norm = ep.pp.minmax_norm(adata_mini_integers_in_X, copy=True)
229
    in_days_norm = np.array([[0.25], [0.5], [0.0], [0.25], [0.0], [0.25], [0.5], [0.75], [1.0], [0.25], [0.5], [0.25]])
230
    assert np.allclose(adata_norm.X, in_days_norm)
231
232
233
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
234
def test_norm_minmax_kwargs(array_type, adata_to_norm):
235
    adata_to_norm.X = array_type(adata_to_norm.X)
236
237
    adata_norm = ep.pp.minmax_norm(adata_to_norm, copy=True, feature_range=(0, 2))
238
239
    num1_norm = np.array([0.0, 1.7391307, 1.9999998], dtype=np.float32)
240
    num2_norm = np.array([0.0, 2.0, 1.4285715], dtype=np.float32)
241
242
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
243
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
244
245
246
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
247
def test_norm_minmax_group(array_type, adata_mini):
248
    adata_mini_casted = adata_mini.copy()
249
    adata_mini_casted.X = array_type(adata_mini_casted.X)
250
251
    with pytest.raises(KeyError):
252
        ep.pp.minmax_norm(adata_mini_casted, group_key="invalid_key", copy=True)
253
254
    adata_mini_norm = ep.pp.minmax_norm(
255
        adata_mini_casted,
256
        vars=["sys_bp_entry", "dia_bp_entry"],
257
        group_key="disease",
258
        copy=True,
259
    )
260
    col1_norm = np.array([0.0, 0.33333333, 0.66666667, 1.0, 0.0, 0.33333333, 0.66666667, 1.0])
261
    col2_norm = col1_norm
262
    assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0])
263
    assert np.allclose(adata_mini_norm.X[:, 1], col1_norm)
264
    assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
265
266
267
@pytest.mark.parametrize(
268
    "array_type,expected_error",
269
    [
270
        (np.array, None),
271
        (da.array, NotImplementedError),
272
        (sparse.csr_matrix, NotImplementedError),
273
    ],
274
)
275
def test_norm_maxabs_array_types(adata_to_norm, array_type, expected_error):
276
    adata_to_norm.X = array_type(adata_to_norm.X)
277
    if expected_error:
278
        with pytest.raises(expected_error):
279
            ep.pp.maxabs_norm(adata_to_norm)
280
    else:
281
        ep.pp.maxabs_norm(adata_to_norm)
282
283
284
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
285
def test_norm_maxabs(array_type, adata_to_norm):
286
    """Test for the maxabs normalization method."""
287
    adata_to_norm.X = array_type(adata_to_norm.X)
288
289
    if "dask" in array_type.__name__:
290
        with pytest.raises(NotImplementedError):
291
            adata_norm = ep.pp.maxabs_norm(adata_to_norm, copy=True)
292
293
    else:
294
        adata_norm = ep.pp.maxabs_norm(adata_to_norm, copy=True)
295
296
        num1_norm = np.array([0.5964913, 0.94736844, 1.0], dtype=np.float32)
297
        num2_norm = np.array([-0.4, 1.0, 0.6], dtype=np.float32)
298
299
        assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
300
        assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
301
        assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
302
        assert np.allclose(adata_norm.X[:, 3], num1_norm)
303
        assert np.allclose(adata_norm.X[:, 4], num2_norm)
304
        assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
305
306
307
def test_norm_maxabs_integers(adata_mini_integers_in_X):
308
    adata_norm = ep.pp.maxabs_norm(adata_mini_integers_in_X, copy=True)
309
    in_days_norm = np.array([[0.25], [0.5], [0.0], [0.25], [0.0], [0.25], [0.5], [0.75], [1.0], [0.25], [0.5], [0.25]])
310
    assert np.allclose(adata_norm.X, in_days_norm)
311
312
313
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
314
def test_norm_maxabs_group(array_type, adata_mini):
315
    adata_mini_casted = adata_mini.copy()
316
    adata_mini_casted.X = array_type(adata_mini_casted.X)
317
318
    if "dask" in array_type.__name__:
319
        with pytest.raises(NotImplementedError):
320
            ep.pp.maxabs_norm(adata_mini_casted, copy=True)
321
    else:
322
        with pytest.raises(KeyError):
323
            ep.pp.maxabs_norm(adata_mini_casted, group_key="invalid_key", copy=True)
324
325
        adata_mini_norm = ep.pp.maxabs_norm(
326
            adata_mini_casted,
327
            vars=["sys_bp_entry", "dia_bp_entry"],
328
            group_key="disease",
329
            copy=True,
330
        )
331
        col1_norm = np.array(
332
            [
333
                0.9787234,
334
                0.9858156,
335
                0.9929078,
336
                1.0,
337
                0.98013245,
338
                0.98675497,
339
                0.99337748,
340
                1.0,
341
            ]
342
        )
343
        col2_norm = np.array([0.96296296, 0.97530864, 0.98765432, 1.0, 0.9625, 0.975, 0.9875, 1.0])
344
        assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0])
345
        assert np.allclose(adata_mini_norm.X[:, 1], col1_norm)
346
        assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
347
348
349
@pytest.mark.parametrize(
350
    "array_type,expected_error",
351
    [
352
        (np.array, None),
353
        (da.array, None),
354
        (sparse.csr_matrix, NotImplementedError),
355
    ],
356
)
357
def test_norm_robust_scale_array_types(adata_to_norm, array_type, expected_error):
358
    adata_to_norm.X = array_type(adata_to_norm.X)
359
    if expected_error:
360
        with pytest.raises(expected_error):
361
            ep.pp.robust_scale_norm(adata_to_norm)
362
363
364
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
365
def test_norm_robust_scale(array_type, adata_to_norm):
366
    """Test for the robust_scale normalization method."""
367
    adata_to_norm.X = array_type(adata_to_norm.X)
368
369
    adata_norm = ep.pp.robust_scale_norm(adata_to_norm, copy=True)
370
371
    num1_norm = np.array([-1.73913043, 0.0, 0.26086957], dtype=np.float32)
372
    num2_norm = np.array([-1.4285715, 0.5714286, 0.0], dtype=np.float32)
373
374
    assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
375
    assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
376
    assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
377
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
378
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
379
    assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
380
381
382
def test_norm_robust_scale_integers(adata_mini_integers_in_X):
383
    adata_norm = ep.pp.robust_scale_norm(adata_mini_integers_in_X, copy=True)
384
    in_days_norm = np.array([[0.0], [1.0], [-1.0], [0.0], [-1.0], [0.0], [1.0], [2.0], [3.0], [0.0], [1.0], [0.0]])
385
    assert np.allclose(adata_norm.X, in_days_norm)
386
387
388
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
389
def test_norm_robust_scale_kwargs(adata_to_norm, array_type):
390
    adata_to_norm.X = array_type(adata_to_norm.X)
391
392
    adata_norm = ep.pp.robust_scale_norm(adata_to_norm, copy=True, with_scaling=False)
393
394
    num1_norm = np.array([-2.0, 0.0, 0.2999997], dtype=np.float32)
395
    num2_norm = np.array([-5.0, 2.0, 0.0], dtype=np.float32)
396
397
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
398
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
399
400
401
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
402
def test_norm_robust_scale_group(array_type, adata_mini):
403
    adata_mini_casted = adata_mini.copy()
404
    adata_mini_casted.X = array_type(adata_mini_casted.X)
405
406
    with pytest.raises(KeyError):
407
        ep.pp.robust_scale_norm(adata_mini_casted, group_key="invalid_key", copy=True)
408
409
    adata_mini_norm = ep.pp.robust_scale_norm(
410
        adata_mini_casted,
411
        vars=["sys_bp_entry", "dia_bp_entry"],
412
        group_key="disease",
413
        copy=True,
414
    )
415
    col1_norm = np.array(
416
        [-1.0, -0.33333333, 0.33333333, 1.0, -1.0, -0.33333333, 0.33333333, 1.0],
417
        dtype=np.float32,
418
    )
419
    col2_norm = col1_norm
420
    assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0])
421
    assert np.allclose(adata_mini_norm.X[:, 1], col1_norm)
422
    assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
423
424
425
@pytest.mark.parametrize(
426
    "array_type,expected_error",
427
    [
428
        (np.array, None),
429
        (da.array, None),
430
        (sparse.csr_matrix, NotImplementedError),
431
    ],
432
)
433
def test_norm_quantile_array_types(adata_to_norm, array_type, expected_error):
434
    adata_to_norm.X = array_type(adata_to_norm.X)
435
    if expected_error:
436
        with pytest.raises(expected_error):
437
            ep.pp.quantile_norm(adata_to_norm)
438
439
440
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
441
def test_norm_quantile_uniform(array_type, adata_to_norm):
442
    """Test for the quantile normalization method."""
443
    warnings.filterwarnings("ignore", category=UserWarning)
444
    adata_to_norm.X = array_type(adata_to_norm.X)
445
446
    adata_norm = ep.pp.quantile_norm(adata_to_norm, copy=True)
447
448
    num1_norm = np.array([0.0, 0.5, 1.0], dtype=np.float32)
449
    num2_norm = np.array([0.0, 1.0, 0.5], dtype=np.float32)
450
451
    assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
452
    assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
453
    assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
454
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
455
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
456
    assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
457
458
459
def test_norm_quantile_integers(adata_mini_integers_in_X):
460
    adata_norm = ep.pp.quantile_norm(adata_mini_integers_in_X, copy=True)
461
    in_days_norm = np.array(
462
        [
463
            [0.36363636],
464
            [0.72727273],
465
            [0.0],
466
            [0.36363636],
467
            [0.0],
468
            [0.36363636],
469
            [0.72727273],
470
            [0.90909091],
471
            [1.0],
472
            [0.36363636],
473
            [0.72727273],
474
            [0.36363636],
475
        ]
476
    )
477
    assert np.allclose(adata_norm.X, in_days_norm)
478
479
480
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
481
def test_norm_quantile_uniform_kwargs(array_type, adata_to_norm):
482
    adata_to_norm.X = array_type(adata_to_norm.X)
483
484
    adata_norm = ep.pp.quantile_norm(adata_to_norm, copy=True, output_distribution="normal")
485
486
    num1_norm = np.array([-5.19933758, 0.0, 5.19933758], dtype=np.float32)
487
    num2_norm = np.array([-5.19933758, 5.19933758, 0.0], dtype=np.float32)
488
489
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
490
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
491
492
493
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
494
def test_norm_quantile_uniform_group(array_type, adata_mini):
495
    adata_mini_casted = adata_mini.copy()
496
    adata_mini_casted.X = array_type(adata_mini_casted.X)
497
498
    with pytest.raises(KeyError):
499
        ep.pp.quantile_norm(adata_mini_casted, group_key="invalid_key", copy=True)
500
501
    adata_mini_norm = ep.pp.quantile_norm(
502
        adata_mini_casted,
503
        vars=["sys_bp_entry", "dia_bp_entry"],
504
        group_key="disease",
505
        copy=True,
506
    )
507
    col1_norm = np.array(
508
        [0.0, 0.33333333, 0.66666667, 1.0, 0.0, 0.33333333, 0.66666667, 1.0],
509
        dtype=np.float32,
510
    )
511
    col2_norm = col1_norm
512
    assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0])
513
    assert np.allclose(adata_mini_norm.X[:, 1], col1_norm)
514
    assert np.allclose(adata_mini_norm.X[:, 2], col2_norm)
515
516
517
@pytest.mark.parametrize(
518
    "array_type,expected_error",
519
    [
520
        (np.array, None),
521
        (da.array, None),
522
        (sparse.csr_matrix, NotImplementedError),
523
    ],
524
)
525
def test_norm_power_array_types(adata_to_norm, array_type, expected_error):
526
    adata_to_norm.X = array_type(adata_to_norm.X)
527
    if expected_error:
528
        with pytest.raises(expected_error):
529
            ep.pp.power_norm(adata_to_norm)
530
531
532
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
533
def test_norm_power(array_type, adata_to_norm):
534
    """Test for the power transformation normalization method."""
535
    adata_to_norm.X = array_type(adata_to_norm.X)
536
537
    if "dask" in array_type.__name__:
538
        with pytest.raises(NotImplementedError):
539
            ep.pp.power_norm(adata_to_norm, copy=True)
540
    else:
541
        adata_norm = ep.pp.power_norm(adata_to_norm, copy=True)
542
543
        num1_norm = np.array([-1.3821232, 0.43163615, 0.950487], dtype=np.float32)
544
        num2_norm = np.array([-1.340104, 1.0613203, 0.27878374], dtype=np.float32)
545
546
        assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
547
        assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
548
        assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
549
        assert np.allclose(adata_norm.X[:, 3], num1_norm, rtol=1.1)
550
        assert np.allclose(adata_norm.X[:, 4], num2_norm, rtol=1.1)
551
        assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
552
553
554
def test_norm_power_integers(adata_mini_integers_in_X):
555
    adata_norm = ep.pp.power_norm(adata_mini_integers_in_X, copy=True)
556
    in_days_norm = np.array(
557
        [
558
            [-0.31234142],
559
            [0.58319338],
560
            [-1.65324303],
561
            [-0.31234142],
562
            [-1.65324303],
563
            [-0.31234142],
564
            [0.58319338],
565
            [1.27419965],
566
            [1.8444134],
567
            [-0.31234142],
568
            [0.58319338],
569
            [-0.31234142],
570
        ]
571
    )
572
    assert np.allclose(adata_norm.X, in_days_norm)
573
574
575
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
576
def test_norm_power_kwargs(array_type, adata_to_norm):
577
    adata_to_norm.X = array_type(adata_to_norm.X)
578
579
    if "dask" in array_type.__name__:
580
        with pytest.raises(NotImplementedError):
581
            ep.pp.power_norm(adata_to_norm, copy=True)
582
    else:
583
        with pytest.raises(ValueError):
584
            ep.pp.power_norm(adata_to_norm, copy=True, method="box-cox")
585
586
        adata_norm = ep.pp.power_norm(adata_to_norm, copy=True, standardize=False)
587
588
        num1_norm = np.array([201.03636, 1132.8341, 1399.3877], dtype=np.float32)
589
        num2_norm = np.array([-1.8225479, 5.921072, 3.397709], dtype=np.float32)
590
591
        assert np.allclose(adata_norm.X[:, 3], num1_norm, rtol=1e-02, atol=1e-02)
592
        assert np.allclose(adata_norm.X[:, 4], num2_norm, rtol=1e-02, atol=1e-02)
593
594
595
@pytest.mark.parametrize("array_type", ARRAY_TYPES)
596
def test_norm_power_group(array_type, adata_mini):
597
    adata_mini_casted = adata_mini.copy()
598
    adata_mini_casted.X = array_type(adata_mini_casted.X)
599
600
    if "dask" in array_type.__name__:
601
        with pytest.raises(NotImplementedError):
602
            ep.pp.power_norm(adata_mini_casted, copy=True)
603
    else:
604
        with pytest.raises(KeyError):
605
            ep.pp.power_norm(adata_mini_casted, group_key="invalid_key", copy=True)
606
607
        adata_mini_norm = ep.pp.power_norm(
608
            adata_mini_casted,
609
            vars=["sys_bp_entry", "dia_bp_entry"],
610
            group_key="disease",
611
            copy=True,
612
        )
613
        col1_norm = np.array(
614
            [
615
                -1.34266204,
616
                -0.44618949,
617
                0.44823148,
618
                1.34062005,
619
                -1.34259417,
620
                -0.44625773,
621
                0.44816403,
622
                1.34068786,
623
            ],
624
            dtype=np.float32,
625
        )
626
        col2_norm = np.array(
627
            [
628
                [
629
                    -1.3650659,
630
                    -0.41545486,
631
                    0.45502198,
632
                    1.3254988,
633
                    -1.3427324,
634
                    -0.4461177,
635
                    0.44829938,
636
                    1.3405508,
637
                ]
638
            ],
639
            dtype=np.float32,
640
        )
641
        # The tests are disabled (= tolerance set to 1)
642
        # because depending on weird dependency versions they currently give different results
643
        assert np.allclose(adata_mini_norm.X[:, 0], adata_mini_casted.X[:, 0], rtol=1, atol=1)
644
        assert np.allclose(adata_mini_norm.X[:, 1], col1_norm, rtol=1, atol=1)
645
        assert np.allclose(adata_mini_norm.X[:, 2], col2_norm, rtol=1, atol=1)
646
647
648
@pytest.mark.parametrize(
649
    "array_type,expected_error",
650
    [
651
        (np.array, None),
652
        (da.array, None),
653
        (sparse.csr_matrix, None),
654
    ],
655
)
656
def test_norm_log_norm_array_types(adata_to_norm, array_type, expected_error):
657
    adata_to_norm.X = array_type(adata_to_norm.X)
658
    if expected_error:
659
        with pytest.raises(expected_error):
660
            ep.pp.log_norm(adata_to_norm)
661
662
663
def test_norm_log1p(adata_to_norm):
664
    """Test for the log normalization method."""
665
    # Ensure that some test data is strictly positive
666
    log_adata = adata_to_norm.copy()
667
    log_adata.X[0, 4] = 1
668
669
    adata_norm = ep.pp.log_norm(log_adata, copy=True)
670
671
    num1_norm = np.array([1.4816046, 1.856298, 1.9021075], dtype=np.float32)
672
    num2_norm = np.array([0.6931472, 1.7917595, 1.3862944], dtype=np.float32)
673
674
    assert np.array_equal(adata_norm.X[:, 0], adata_to_norm.X[:, 0])
675
    assert np.array_equal(adata_norm.X[:, 1], adata_to_norm.X[:, 1])
676
    assert np.array_equal(adata_norm.X[:, 2], adata_to_norm.X[:, 2])
677
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
678
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
679
    assert np.allclose(adata_norm.X[:, 5], adata_to_norm.X[:, 5], equal_nan=True)
680
681
    # Check alternative base works
682
    adata_norm = ep.pp.log_norm(log_adata, base=10, copy=True)
683
684
    num1_norm = np.divide(np.array([1.4816046, 1.856298, 1.9021075], dtype=np.float32), np.log(10))
685
    num2_norm = np.divide(np.array([0.6931472, 1.7917595, 1.3862944], dtype=np.float32), np.log(10))
686
687
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
688
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
689
690
    # Check alternative offset works
691
    adata_norm = ep.pp.log_norm(log_adata, offset=0.5, copy=True)
692
693
    num1_norm = np.array([1.3609766, 1.7749524, 1.8245492], dtype=np.float32)
694
    num2_norm = np.array([0.4054651, 1.7047482, 1.252763], dtype=np.float32)
695
696
    assert np.allclose(adata_norm.X[:, 3], num1_norm)
697
    assert np.allclose(adata_norm.X[:, 4], num2_norm)
698
699
    try:
700
        ep.pp.log_norm(adata_to_norm, vars="Numeric2", offset=3, copy=True)
701
    except ValueError:
702
        pytest.fail("Unexpected ValueError exception was raised.")
703
704
    with pytest.raises(ValueError):
705
        ep.pp.log_norm(adata_to_norm, copy=True)
706
707
    with pytest.raises(ValueError):
708
        ep.pp.log_norm(adata_to_norm, vars="Numeric2", offset=1, copy=True)
709
710
711
def test_norm_record(adata_to_norm):
712
    """Test for logging of applied normalization methods."""
713
    adata_norm = ep.pp.minmax_norm(adata_to_norm, copy=True)
714
715
    assert adata_norm.uns["normalization"] == {
716
        "Numeric1": ["minmax"],
717
        "Numeric2": ["minmax"],
718
    }
719
720
    adata_norm = ep.pp.maxabs_norm(adata_norm, vars=["Numeric1"], copy=True)
721
722
    assert adata_norm.uns["normalization"] == {
723
        "Numeric1": ["minmax", "maxabs"],
724
        "Numeric2": ["minmax"],
725
    }
726
727
728
def test_offset_negative_values():
729
    """Test for the offset_negative_values method."""
730
    to_offset_adata = AnnData(X=np.array([[-1, -5, -10], [5, 6, -20]], dtype=np.float32))
731
    expected_adata = AnnData(X=np.array([[19, 15, 10], [25, 26, 0]], dtype=np.float32))
732
733
    assert np.array_equal(expected_adata.X, ep.pp.offset_negative_values(to_offset_adata, copy=True).X)
734
735
736
def test_norm_numerical_only():
737
    """Test for the log_norm method."""
738
    to_normalize_adata = AnnData(X=np.array([[1, 0, 0], [0, 0, 1]], dtype=np.float32))
739
    expected_adata = AnnData(X=np.array([[0.6931472, 0, 0], [0, 0, 0.6931472]], dtype=np.float32))
740
741
    assert np.array_equal(expected_adata.X, ep.pp.log_norm(to_normalize_adata, copy=True).X)