Diff of /tests/test_coverage.py [000000] .. [d7cf27]

Switch to unified view

a b/tests/test_coverage.py
1
import os
2
from itertools import product
3
4
import matplotlib
5
matplotlib.use('AGG')  # pylint: disable=
6
7
import numpy as np
8
import pandas
9
import pkg_resources
10
import pytest
11
from pybedtools import BedTool
12
13
from janggu.data import Bioseq
14
from janggu.data import Cover
15
from janggu.data import Transpose
16
from janggu.data import GenomicIndexer
17
from janggu.data import plotGenomeTrack
18
from janggu.data import LineTrack
19
from janggu.data import SeqTrack
20
from janggu.data import HeatTrack
21
22
23
def test_channel_last_first():
24
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
25
    bed_file = os.path.join(data_path, "sample.bed")
26
27
    bwfile_ = os.path.join(data_path, "sample.bw")
28
29
    cover = Cover.create_from_bigwig(
30
        'test',
31
        bigwigfiles=bwfile_,
32
        resolution=1,
33
        binsize=200,
34
        roi=bed_file,
35
        store_whole_genome=True,
36
        storage='ndarray')
37
    assert cover.shape == (100, 200, 1, 1)
38
    assert cover[0].shape == (1, 200, 1, 1)
39
    cover1 = cover
40
41
    cover = Transpose(Cover.create_from_bigwig(
42
        'test',
43
        bigwigfiles=bwfile_,
44
        resolution=1,
45
        binsize=200,
46
        roi=bed_file,
47
        store_whole_genome=True,
48
        storage='ndarray'), axis=(0, 3, 2, 1))
49
    assert cover.shape == (100, 1, 1, 200)
50
    assert cover[0].shape == (1, 1, 1, 200)
51
52
    np.testing.assert_equal(cover1[0], np.transpose(cover[0], (0, 3, 2, 1)))
53
54
55
def test_cover_roi_binsize_padding(tmpdir):
56
57
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
58
    bed_file = os.path.join(data_path, 'sample_equalsize.bed')
59
    print(pandas.read_csv(bed_file,
60
                          sep='\t', header=None,
61
                          names=['chrom', 'start', 'end',
62
                                 'name', 'score', 'strand']))
63
64
    roi_file = os.path.join(data_path, "sample.bed")
65
    roi = pandas.read_csv(roi_file,
66
                          sep='\t', header=None,
67
                          names=['chrom', 'start', 'end',
68
                                 'name', 'score', 'strand'])
69
70
    roi.end.iloc[0] += 12
71
    roi.end.iloc[1] += 111
72
    print(roi)
73
74
    with pytest.raises(ValueError):
75
        # error due to binsize not being a multiple of resolution
76
        Cover.create_from_bed('test',
77
                              bedfiles=bed_file,
78
                              roi=roi, binsize=30,
79
                              stepsize=30,
80
                              store_whole_genome=True,
81
                              cache=False, resolution=7)
82
83
    with pytest.raises(ValueError):
84
        # interval starts must align with resolution intervals
85
        rroi = roi.copy()
86
        rroi.start += 1
87
        Cover.create_from_bed('test',
88
                              bedfiles=bed_file,
89
                              roi=rroi, binsize=30,
90
                              stepsize=30,
91
                              store_whole_genome=True,
92
                              cache=False, resolution=30)
93
94
    for swg, store in product([True, False], ['ndarray', 'sparse']):
95
        cov = Cover.create_from_bed('test',
96
                                    bedfiles=bed_file,
97
                                    roi=roi, binsize=300,
98
                                    stepsize=300,
99
                                    store_whole_genome=swg,
100
                                    storage=store,
101
                                    cache=False, resolution=10)
102
        assert len(cov) == 68
103
        assert cov.shape == (68, 30, 1, 1)
104
        [c for c in cov]
105
106
    for swg, store in product([True, False], ['ndarray', 'sparse']):
107
        cov = Cover.create_from_bed('test',
108
                                    bedfiles=bed_file,
109
                                    roi=roi, binsize=300,
110
                                    stepsize=300,
111
                                    store_whole_genome=swg,
112
                                    cache=False, resolution=3)
113
        assert len(cov) == 68
114
        assert cov.shape == (68, 100, 1, 1)
115
        [c for c in cov]
116
117
    for swg, store in product([True, False], ['ndarray', 'sparse']):
118
        cov = Cover.create_from_bed('test',
119
                                    bedfiles=bed_file,
120
                                    roi=roi, binsize=300,
121
                                    stepsize=300,
122
                                    store_whole_genome=swg,
123
                                    storage=store,
124
                                    cache=False, resolution=3)
125
        assert len(cov) == 68
126
        assert cov.shape == (68, 100, 1, 1)
127
        [c for c in cov]
128
129
    for swg, store in product([True, False], ['ndarray', 'sparse']):
130
        cov = Cover.create_from_bed('test',
131
                                    bedfiles=bed_file,
132
                                    roi=roi, binsize=300,
133
                                    stepsize=300,
134
                                    store_whole_genome=swg,
135
                                    storage=store,
136
                                    cache=False, resolution=3)
137
        assert len(cov) == 68
138
        assert cov.shape == (68, 100, 1, 1)
139
        [c for c in cov]
140
141
    for swg, store in product([True, False], ['ndarray', 'sparse']):
142
        cov = Cover.create_from_bed('test',
143
                                    bedfiles=bed_file,
144
                                    roi=roi, binsize=300,
145
                                    stepsize=300,
146
                                    store_whole_genome=swg,
147
                                    storage=store,
148
                                    cache=False, resolution=100)
149
        assert len(cov) == 68
150
        assert cov.shape == (68, 3, 1, 1)
151
        [c for c in cov]
152
    for swg, store in product([True, False], ['ndarray', 'sparse']):
153
        cov = Cover.create_from_bed('test',
154
                                    bedfiles=bed_file,
155
                                    roi=roi, binsize=300,
156
                                    stepsize=300,
157
                                    store_whole_genome=swg,
158
                                    cache=False, resolution=100,
159
                                    storage=store,
160
                                    zero_padding=False)
161
        assert len(cov) == 66
162
        assert cov.shape == (66, 3, 1, 1)
163
        [c for c in cov]
164
165
    bwfile_ = os.path.join(data_path, "sample.bw")
166
167
    for swg, store in product([True, False], ['ndarray', 'sparse']):
168
        cover = Cover.create_from_bigwig(
169
            'test',
170
            bigwigfiles=bwfile_,
171
            resolution=100,
172
            binsize=300,
173
            roi=roi,
174
            storage=store,
175
            store_whole_genome=swg)
176
        assert len(cover) == 68
177
        assert cover.shape == (68, 3, 1, 1)
178
        [c for c in cover]
179
    for swg, store in product([True, False], ['ndarray', 'sparse']):
180
        cover = Cover.create_from_bigwig(
181
            'test',
182
            bigwigfiles=bwfile_,
183
            resolution=100,
184
            binsize=300,
185
            roi=roi, zero_padding=False,
186
            storage=store,
187
            store_whole_genome=swg)
188
        assert len(cover) == 66
189
        assert cover.shape == (66, 3, 1, 1)
190
        [c for c in cover]
191
192
    bamfile_ = os.path.join(data_path, "sample.bam")
193
    for swg, store in product([True, False], ['ndarray', 'sparse']):
194
        cover = Cover.create_from_bam(
195
            'test',
196
            bamfile_,
197
            resolution=100,
198
            binsize=300,
199
            roi=roi,
200
            stranded=False,
201
            storage=store,
202
            store_whole_genome=swg)
203
        assert len(cover) == 68
204
        assert cover.shape == (68, 3, 1, 1)
205
        [c for c in cover]
206
    for swg, store in product([True, False], ['ndarray', 'sparse']):
207
        cover = Cover.create_from_bam(
208
            'test',
209
            bamfile_,
210
            resolution=100,
211
            binsize=300,
212
            roi=roi, zero_padding=False,
213
            stranded=False,
214
            storage=store,
215
            store_whole_genome=swg)
216
        assert len(cover) == 66
217
        assert cover.shape == (66, 3, 1, 1)
218
        [c for c in cover]
219
220
def test_cover_export_bigwig(tmpdir):
221
    path = tmpdir.strpath
222
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
223
    bed_file = os.path.join(data_path, "sample.bed")
224
225
    bwfile_ = os.path.join(data_path, "sample.bw")
226
227
    for resolution in [1, 50]:
228
        for storage in [True, False]:
229
            print('resolution=', resolution)
230
            print('store_whole_genome', storage)
231
            cover = Cover.create_from_bigwig(
232
                'test',
233
                bigwigfiles=bwfile_,
234
                resolution=resolution,
235
                binsize=200,
236
                roi=bed_file,
237
                store_whole_genome=storage,
238
                storage='ndarray')
239
240
            cover.export_to_bigwig(output_dir=path)
241
242
            cov2 = Cover.create_from_bigwig('test',
243
                bigwigfiles='{path}/{name}.{sample}.bigwig'.format(
244
                path=path, name=cover.name,
245
                sample=cover.conditions[0]),
246
                resolution=resolution,
247
                binsize=200,
248
                roi=bed_file,
249
                store_whole_genome=storage,
250
                storage='ndarray')
251
252
            assert cover.shape == (100, 200 // resolution, 1, 1)
253
            assert cover.shape == cov2.shape
254
            np.testing.assert_allclose(cover[:].sum(), 1044.0 / resolution)
255
            np.testing.assert_allclose(cov2[:].sum(), 1044.0 / resolution)
256
257
258
def test_bam_genomic_interval_access():
259
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
260
    bed_file = os.path.join(data_path, "sample.bed")
261
262
    bamfile_ = os.path.join(data_path, "sample.bam")
263
264
    for reso, shift, storage in product([1, 50], [0, 1], [True, False]):
265
            cover = Cover.create_from_bam(
266
                'test',
267
                bamfiles=bamfile_,
268
                roi=bed_file,
269
                flank=0,
270
                storage='ndarray',
271
                store_whole_genome=storage,
272
                resolution=reso)
273
274
            for i in range(len(cover)):
275
                print('storage :',storage,'/ resolution :',reso,'/ shift :',shift)
276
277
                np.testing.assert_equal(np.repeat(cover[i],
278
                                    cover.garray.resolution,
279
                                    axis=1), cover[cover.gindexer[i]])
280
281
                chrom, start, end, strand = cover.gindexer[i].chrom, \
282
                    cover.gindexer[i].start, \
283
                    cover.gindexer[i].end, \
284
                    cover.gindexer[i].strand
285
286
                np.testing.assert_equal(np.repeat(cover[i],
287
                                        cover.garray.resolution, axis=1),
288
                                        cover[chrom, start, end, strand])
289
290
                np.testing.assert_equal(cover[chrom, start, end, strand],
291
                                        cover[chrom, start-1, end+1, strand][:, 1:-1, :, :])
292
                if shift != 0:
293
                    start += shift * reso
294
                    end += shift * reso
295
296
                    if strand != '-':
297
                        gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:]
298
                        np.testing.assert_equal(cover[i][:, shift:,:, :],
299
                            gicov.reshape((1, gicov.shape[1]//reso, reso, 2, 1))[:, :, 0, :, :])
300
                    else:
301
                        gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:]
302
                        np.testing.assert_equal(cover[i][:, :-shift,:, :],
303
                        gicov.reshape((1, gicov.shape[1]//reso, reso, 2, 1))[:, :, 0, :, :])
304
305
306
def test_bigwig_genomic_interval_access():
307
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
308
    bed_file = os.path.join(data_path, "sample.bed")
309
310
    bamfile_ = os.path.join(data_path, "sample.bw")
311
312
    for reso, shift, storage in product([1, 50], [0, 1], [True, False]):
313
            cover = Cover.create_from_bigwig(
314
                'test',
315
                bigwigfiles=bamfile_,
316
                roi=bed_file,
317
                flank=0,
318
                storage='ndarray',
319
                store_whole_genome=storage,
320
                resolution=reso)
321
322
            for i in range(len(cover)):
323
                print('storage :',storage,'/ resolution :',reso,'/ shift :',shift)
324
325
                np.testing.assert_equal(np.repeat(cover[i],
326
                                    cover.garray.resolution,
327
                                    axis=1), cover[cover.gindexer[i]])
328
329
                chrom, start, end, strand = cover.gindexer[i].chrom, \
330
                    cover.gindexer[i].start, \
331
                    cover.gindexer[i].end, \
332
                    cover.gindexer[i].strand
333
334
                np.testing.assert_equal(np.repeat(cover[i],
335
                                    cover.garray.resolution, axis=1),
336
                                    cover[chrom, start, end, strand])
337
338
                if shift != 0:
339
                    start += shift * reso
340
                    end += shift * reso
341
342
                    if strand != '-':
343
                        gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:]
344
                        np.testing.assert_equal(cover[i][:, shift:,:, :],
345
                            gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
346
                    else:
347
                        gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:]
348
                        np.testing.assert_equal(cover[i][:, :-shift,:, :],
349
                        gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
350
351
352
def test_bed_genomic_interval_access():
353
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
354
    bed_file = os.path.join(data_path, "sample.bed")
355
356
    bamfile_ = os.path.join(data_path, "sample.bed")
357
358
359
    for reso, shift, storage in product([1, 50], [0, 1], [True, False]):
360
        cover = Cover.create_from_bed(
361
            'test',
362
            bedfiles=bamfile_,
363
            roi=bed_file,
364
            flank=0,
365
            storage='ndarray',
366
            store_whole_genome=storage,
367
            resolution=reso)
368
369
        for i in range(len(cover)):
370
            print('storage :',storage,'/ resolution :',reso,'/ shift :',shift)
371
372
            np.testing.assert_equal(np.repeat(cover[i],
373
                                cover.garray.resolution,
374
                                axis=1), cover[cover.gindexer[i]])
375
376
            chrom, start, end, strand = cover.gindexer[i].chrom, \
377
                cover.gindexer[i].start, \
378
                cover.gindexer[i].end, \
379
                cover.gindexer[i].strand
380
381
            np.testing.assert_equal(np.repeat(cover[i],
382
                                cover.garray.resolution, axis=1),
383
                                cover[chrom, start, end, strand])
384
385
            if shift != 0:
386
                start += shift * reso
387
                end += shift * reso
388
389
                if strand != '-':
390
                    gicov = cover[chrom, start, end, strand][:, :(-shift*reso),:,:]
391
                    np.testing.assert_equal(cover[i][:, shift:,:, :],
392
                        gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
393
                else:
394
                    gicov = cover[chrom, start, end, strand][:, (shift*reso):,:,:]
395
                    np.testing.assert_equal(cover[i][:, :-shift,:, :],
396
                    gicov.reshape((1, gicov.shape[1]//reso, reso, 1, 1))[:, :, 0, :, :])
397
398
399
def test_bam_inferred_binsize():
400
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
401
    bed_file = os.path.join(data_path, "positive.bed")
402
403
    bamfile_ = os.path.join(data_path, "sample.bam")
404
405
    cover = Cover.create_from_bam(
406
        'test',
407
        bamfiles=bamfile_,
408
        roi=bed_file,
409
        flank=0,
410
        storage='ndarray')
411
    assert len(cover) == 25
412
    assert cover.shape == (25, 200, 2, 1)
413
414
415
def test_bigwig_inferred_binsize():
416
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
417
    bed_file = os.path.join(data_path, "positive.bed")
418
419
    bwfile_ = os.path.join(data_path, "sample.bw")
420
421
    cover = Cover.create_from_bigwig(
422
        'test',
423
        bigwigfiles=bwfile_,
424
        resolution=1,
425
        roi=bed_file,
426
        storage='ndarray')
427
    assert len(cover) == 25
428
    assert cover.shape == (25, 200, 1, 1)
429
430
431
def test_bed_unsync_roi_targets():
432
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
433
    bed_file = os.path.join(data_path, "positive.bed")
434
    bed_shift_file = os.path.join(data_path, "positive_shift.bed")
435
436
    cover = Cover.create_from_bed(
437
        'test',
438
        bedfiles=bed_shift_file,
439
        roi=bed_file,
440
        resolution=None,
441
        storage='ndarray')
442
    assert len(cover) == 25
443
    assert cover.shape == (25, 1, 1, 1)
444
    assert cover[:].sum() == 1
445
446
    cover = Cover.create_from_bed(
447
        'test',
448
        bedfiles=bed_shift_file,
449
        roi=bed_file,
450
        resolution=50,
451
        storage='ndarray')
452
    assert len(cover) == 25
453
    assert cover.shape == (25, 4, 1, 1)
454
    assert cover[:].sum() == 1
455
456
457
    cover = Cover.create_from_bed(
458
        'test',
459
        bedfiles=bed_shift_file,
460
        roi=bed_file,
461
        resolution=50,
462
        store_whole_genome=True,
463
        storage='ndarray')
464
    assert len(cover) == 25
465
    assert cover.shape == (25, 4, 1, 1)
466
    assert cover[:].sum() == 1
467
468
    cover = Cover.create_from_bed(
469
        'test',
470
        bedfiles=bed_shift_file,
471
        roi=bed_file,
472
        resolution=1,
473
        store_whole_genome=False,
474
        storage='ndarray')
475
    assert len(cover) == 25
476
    assert cover.shape == (25, 200, 1, 1)
477
    assert cover[0].sum() == 49
478
479
    cover = Cover.create_from_bed(
480
        'test',
481
        bedfiles=bed_shift_file,
482
        roi=bed_file,
483
        resolution=1,
484
        store_whole_genome=True,
485
        storage='ndarray')
486
    assert len(cover) == 25
487
    assert cover.shape == (25, 200, 1, 1)
488
    assert cover[:].sum() == 49
489
490
    cover = Cover.create_from_bed(
491
        'test',
492
        bedfiles=bed_shift_file,
493
        roi=bed_file,
494
        resolution=1,
495
        store_whole_genome=True,
496
        storage='ndarray', minoverlap=.5)
497
    assert len(cover) == 25
498
    assert cover.shape == (25, 200, 1, 1)
499
    assert cover[:].sum() == 0
500
501
    # check bed file loading without roi
502
    cover_ = Cover.create_from_bed(
503
        'test',
504
        bedfiles=bed_shift_file,
505
        roi=None,
506
        resolution=1,
507
        store_whole_genome=True,
508
        storage='ndarray', minoverlap=.5)
509
510
    cover_.gindexer = cover.gindexer
511
    assert len(cover) == 25
512
    assert cover.shape == (25, 200, 1, 1)
513
    assert cover[:].sum() == 0
514
515
def test_bed_inferred_binsize():
516
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
517
    bed_file = os.path.join(data_path, "positive.bed")
518
519
520
    cover = Cover.create_from_bed(
521
        'test',
522
        bedfiles=bed_file,
523
        roi=bed_file,
524
        resolution=1,
525
        storage='ndarray')
526
    assert len(cover) == 25
527
    assert cover.shape == (25, 200, 1, 1)
528
529
    bed_file = os.path.join(data_path, "positive_gap.bed")
530
    cover = Cover.create_from_bed(
531
        'test',
532
        bedfiles=bed_file,
533
        roi=bed_file,
534
        resolution=1,
535
        store_whole_genome=True,
536
        storage='ndarray')
537
    assert len(cover) == 2
538
    assert cover.shape == (2, 50, 1, 1)
539
540
def test_bed_overreaching_ends_whole_genome():
541
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
542
    bed_file = os.path.join(data_path, "bed_test.bed")
543
544
    for store in ['ndarray', 'sparse']:
545
        print(store)
546
        cover = Cover.create_from_bed(
547
            'test',
548
            bedfiles=bed_file,
549
            roi=bed_file,
550
            binsize=2,
551
            flank=20,
552
            resolution=1,
553
            store_whole_genome=True,
554
            storage=store)
555
        assert len(cover) == 9
556
        assert cover.shape == (9, 2+2*20, 1, 1)
557
        np.testing.assert_equal(cover[0].sum(), 18)
558
        np.testing.assert_equal(cover[:].sum(), 9*18)
559
560
561
def test_bed_overreaching_ends_part_genome():
562
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
563
    bed_file = os.path.join(data_path, "bed_test.bed")
564
565
    for store in ['ndarray', 'sparse']:
566
        print(store)
567
        cover = Cover.create_from_bed(
568
            'test',
569
            bedfiles=bed_file,
570
            roi=bed_file,
571
            binsize=2,
572
            flank=2,
573
            resolution=1,
574
            store_whole_genome=False,
575
            storage=store)
576
        assert len(cover) == 9
577
        assert cover.shape == (9, 2+2*2, 1, 1)
578
        np.testing.assert_equal(cover[0].sum(), 4)
579
        np.testing.assert_equal(cover[:].sum(), 6*7 + 8)
580
581
582
def test_bed_store_whole_genome_option():
583
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
584
    bed_file = os.path.join(data_path, "positive_shift.bed")
585
586
    cover1 = Cover.create_from_bed(
587
        'test',
588
        bedfiles=bed_file,
589
        roi=bed_file,
590
        store_whole_genome=True,
591
        storage='ndarray')
592
    cover2 = Cover.create_from_bed(
593
        'test2',
594
        bedfiles=bed_file,
595
        roi=bed_file,
596
        store_whole_genome=False,
597
        storage='ndarray')
598
599
    assert len(cover1) == 1
600
    assert len(cover2) == len(cover1)
601
    assert cover1.shape == (1, 49, 1, 1)
602
    assert cover1.shape == cover2.shape
603
    np.testing.assert_equal(cover1[:], np.ones(cover1.shape))
604
    np.testing.assert_equal(cover2[:], np.ones(cover1.shape))
605
606
607
def test_bed_store_whole_genome_option_dataframe(tmpdir):
608
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
609
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
610
    bed_file = os.path.join(data_path, "sample.bed")
611
612
    # as pd.dataframe
613
    roi = pandas.read_csv(bed_file,
614
                          sep='\t', header=None,
615
                          names=['chrom', 'start', 'end',
616
                                 'name', 'score', 'strand'])
617
618
    print(roi.head())
619
    cover1 = Cover.create_from_bed(
620
        'test',
621
        bedfiles=bed_file,
622
        roi=roi,
623
        binsize=200, stepsize=200,
624
        store_whole_genome=True,
625
        cache=False,
626
        storage='ndarray')
627
    cover2 = Cover.create_from_bed(
628
        'test2',
629
        bedfiles=bed_file,
630
        roi=roi,
631
        binsize=200, stepsize=200,
632
        store_whole_genome=False,
633
        cache=True,
634
        storage='ndarray')
635
636
    print(cover1.gindexer[0])
637
    assert len(cover1) == 100
638
    assert len(cover2) == len(cover1)
639
    assert cover1.shape == (100, 200, 1, 1)
640
    assert cover1.shape == cover2.shape
641
    np.testing.assert_equal(cover1[:], cover2[:])
642
    np.testing.assert_equal(cover1[:], np.ones(cover1.shape))
643
644
    # as bedtool
645
    roi = BedTool(bed_file)
646
    print(roi)
647
648
    cover1 = Cover.create_from_bed(
649
        'test',
650
        bedfiles=bed_file,
651
        roi=roi,
652
        binsize=200, stepsize=200,
653
        store_whole_genome=True,
654
        storage='ndarray')
655
    cover2 = Cover.create_from_bed(
656
        'test2',
657
        bedfiles=bed_file,
658
        roi=roi,
659
        binsize=200, stepsize=200,
660
        store_whole_genome=False,
661
        cache=True,
662
        storage='ndarray')
663
664
    assert len(cover1) == 100
665
    assert len(cover2) == len(cover1)
666
    assert cover1.shape == (100, 200, 1, 1)
667
    assert cover1.shape == cover2.shape
668
    np.testing.assert_equal(cover1[:], cover2[:])
669
    np.testing.assert_equal(cover1[:], np.ones(cover1.shape))
670
671
    # as interval list
672
    roi = [iv for iv in BedTool(bed_file)]
673
    print(roi)
674
675
    cover1 = Cover.create_from_bed(
676
        'test',
677
        bedfiles=bed_file,
678
        roi=roi,
679
        binsize=200, stepsize=200,
680
        store_whole_genome=True,
681
        storage='ndarray')
682
    cover2 = Cover.create_from_bed(
683
        'test2',
684
        bedfiles=bed_file,
685
        roi=roi,
686
        binsize=200, stepsize=200,
687
        store_whole_genome=False,
688
        cache=True,
689
        storage='ndarray')
690
691
    assert len(cover1) == 100
692
    assert len(cover2) == len(cover1)
693
    assert cover1.shape == (100, 200, 1, 1)
694
    assert cover1.shape == cover2.shape
695
    np.testing.assert_equal(cover1[:], cover2[:])
696
    np.testing.assert_equal(cover1[:], np.ones(cover1.shape))
697
698
699
def test_bigwig_store_whole_genome_option():
700
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
701
    bed_file = os.path.join(data_path, "sample.bed")
702
    bwfile_ = os.path.join(data_path, "sample.bw")
703
704
    cover1 = Cover.create_from_bigwig(
705
        'test',
706
        bigwigfiles=bwfile_,
707
        roi=bed_file,
708
        store_whole_genome=True,
709
        binsize=200, stepsize=200,
710
        storage='ndarray')
711
    cover2 = Cover.create_from_bigwig(
712
        'test2',
713
        bigwigfiles=bwfile_,
714
        roi=bed_file,
715
        store_whole_genome=False,
716
        binsize=200, stepsize=200,
717
        storage='ndarray')
718
    cover3 = Cover.create_from_bigwig(
719
        'test3',
720
        bigwigfiles=bwfile_,
721
        roi=bed_file,
722
        store_whole_genome=False,
723
        binsize=200, stepsize=200,
724
        nan_to_num=False,
725
        storage='ndarray')
726
727
    assert len(cover1) == 100
728
    assert len(cover2) == len(cover1)
729
    assert cover1.shape == (100, 200, 1, 1)
730
    assert cover1.shape == cover2.shape
731
    np.testing.assert_equal(cover1[:], cover2[:])
732
    assert cover1[:].sum() == 1044.0
733
    assert cover3[:].sum() == 1044.0
734
735
736
def test_bigwig_store_whole_genome_option_dataframe(tmpdir):
737
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
738
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
739
    bed_file = os.path.join(data_path, "sample.bed")
740
    bwfile_ = os.path.join(data_path, "sample.bw")
741
742
    # as dataframe
743
    roi = pandas.read_csv(bed_file,
744
                          sep='\t', header=None, names=['chrom', 'start', 'end', 'name', 'score', 'strand'])
745
746
    cover1 = Cover.create_from_bigwig(
747
        'test',
748
        bigwigfiles=bwfile_,
749
        roi=roi,
750
        store_whole_genome=True,
751
        binsize=200, stepsize=200,
752
        storage='ndarray')
753
    cover2 = Cover.create_from_bigwig(
754
        'test2',
755
        bigwigfiles=bwfile_,
756
        roi=roi,
757
        store_whole_genome=False,
758
        binsize=200, stepsize=200,
759
        cache=True,
760
        storage='ndarray')
761
    cover3 = Cover.create_from_bigwig(
762
        'test3',
763
        bigwigfiles=bwfile_,
764
        roi=roi,
765
        store_whole_genome=False,
766
        binsize=200, stepsize=200,
767
        nan_to_num=False,
768
        storage='ndarray')
769
770
    assert len(cover1) == 100
771
    assert len(cover2) == len(cover1)
772
    assert cover1.shape == (100, 200, 1, 1)
773
    assert cover1.shape == cover2.shape
774
    np.testing.assert_equal(cover1[:], cover2[:])
775
    assert cover1[:].sum() == 1044.0
776
    assert cover3[:].sum() == 1044.0
777
778
    # as bedtool
779
    roi = BedTool(bed_file)
780
781
    cover1 = Cover.create_from_bigwig(
782
        'test',
783
        bigwigfiles=bwfile_,
784
        roi=roi,
785
        store_whole_genome=True,
786
        binsize=200, stepsize=200,
787
        storage='ndarray')
788
    cover2 = Cover.create_from_bigwig(
789
        'test2',
790
        bigwigfiles=bwfile_,
791
        roi=roi,
792
        store_whole_genome=False,
793
        binsize=200, stepsize=200,
794
        cache=True,
795
        storage='ndarray')
796
    cover3 = Cover.create_from_bigwig(
797
        'test3',
798
        bigwigfiles=bwfile_,
799
        roi=roi,
800
        store_whole_genome=False,
801
        binsize=200, stepsize=200,
802
        nan_to_num=False,
803
        storage='ndarray')
804
805
    assert len(cover1) == 100
806
    assert len(cover2) == len(cover1)
807
    assert cover1.shape == (100, 200, 1, 1)
808
    assert cover1.shape == cover2.shape
809
    np.testing.assert_equal(cover1[:], cover2[:])
810
    assert cover1[:].sum() == 1044.0
811
    assert cover3[:].sum() == 1044.0
812
813
    # as list of intervals
814
    roi = [iv for iv in roi]
815
816
    cover1 = Cover.create_from_bigwig(
817
        'test',
818
        bigwigfiles=bwfile_,
819
        roi=roi,
820
        store_whole_genome=True,
821
        binsize=200, stepsize=200,
822
        storage='ndarray')
823
    cover2 = Cover.create_from_bigwig(
824
        'test2',
825
        bigwigfiles=bwfile_,
826
        roi=roi,
827
        store_whole_genome=False,
828
        binsize=200, stepsize=200,
829
        cache=True,
830
        storage='ndarray')
831
    cover3 = Cover.create_from_bigwig(
832
        'test3',
833
        bigwigfiles=bwfile_,
834
        roi=roi,
835
        store_whole_genome=False,
836
        binsize=200, stepsize=200,
837
        nan_to_num=False,
838
        storage='ndarray')
839
840
    assert len(cover1) == 100
841
    assert len(cover2) == len(cover1)
842
    assert cover1.shape == (100, 200, 1, 1)
843
    assert cover1.shape == cover2.shape
844
    np.testing.assert_equal(cover1[:], cover2[:])
845
    assert cover1[:].sum() == 1044.0
846
    assert cover3[:].sum() == 1044.0
847
848
849
850
def test_bam_store_whole_genome_option_dataframe(tmpdir):
851
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
852
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
853
    bed_file = os.path.join(data_path, "sample.bed")
854
    bamfile_ = os.path.join(data_path, "sample.bam")
855
856
    # as dataframe
857
    roi = pandas.read_csv(bed_file,
858
                          sep='\t', header=None, names=['chrom', 'start', 'end', 'name', 'score', 'strand'])
859
860
    cover1 = Cover.create_from_bam(
861
        'test',
862
        bamfiles=bamfile_,
863
        roi=roi,
864
        store_whole_genome=True,
865
        binsize=200, stepsize=200,
866
        storage='ndarray')
867
    cover2 = Cover.create_from_bam(
868
        'test2',
869
        bamfiles=bamfile_,
870
        roi=roi,
871
        store_whole_genome=False,
872
        binsize=200, stepsize=200,
873
        cache=True,
874
        storage='ndarray')
875
876
    assert len(cover1) == 100
877
    assert len(cover2) == len(cover1)
878
    assert cover1.shape == (100, 200, 2, 1)
879
    assert cover1.shape == cover2.shape
880
    np.testing.assert_equal(cover1[:], cover2[:])
881
    assert cover1[:].sum() == 29.
882
883
    # as bedtool
884
    roi = BedTool(bed_file)
885
886
    cover1 = Cover.create_from_bam(
887
        'test',
888
        bamfiles=bamfile_,
889
        roi=roi,
890
        store_whole_genome=True,
891
        binsize=200, stepsize=200,
892
        storage='ndarray')
893
    cover2 = Cover.create_from_bam(
894
        'test2',
895
        bamfiles=bamfile_,
896
        roi=roi,
897
        store_whole_genome=False,
898
        binsize=200, stepsize=200,
899
        cache=True,
900
        storage='ndarray')
901
902
    assert len(cover1) == 100
903
    assert len(cover2) == len(cover1)
904
    assert cover1.shape == (100, 200, 2, 1)
905
    assert cover1.shape == cover2.shape
906
    np.testing.assert_equal(cover1[:], cover2[:])
907
    assert cover1[:].sum() == 29.
908
909
    # as list of intervals
910
    roi = [iv for iv in roi]
911
912
    cover1 = Cover.create_from_bam(
913
        'test',
914
        bamfiles=bamfile_,
915
        roi=roi,
916
        store_whole_genome=True,
917
        binsize=200, stepsize=200,
918
        storage='ndarray')
919
    cover2 = Cover.create_from_bam(
920
        'test2',
921
        bamfiles=bamfile_,
922
        roi=roi,
923
        store_whole_genome=False,
924
        binsize=200, stepsize=200,
925
        cache=True,
926
        storage='ndarray')
927
928
    assert len(cover1) == 100
929
    assert len(cover2) == len(cover1)
930
    assert cover1.shape == (100, 200, 2, 1)
931
    assert cover1.shape == cover2.shape
932
    np.testing.assert_equal(cover1[:], cover2[:])
933
    assert cover1[:].sum() == 29.
934
935
936
def test_bam_store_whole_genome_option():
937
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
938
    bed_file = os.path.join(data_path, "sample.bed")
939
    bamfile_ = os.path.join(data_path, "sample.bam")
940
941
    cover1 = Cover.create_from_bam(
942
        'test',
943
        bamfiles=bamfile_,
944
        roi=bed_file,
945
        store_whole_genome=True,
946
        binsize=200, stepsize=200,
947
        storage='ndarray')
948
    cover2 = Cover.create_from_bam(
949
        'test2',
950
        bamfiles=bamfile_,
951
        roi=bed_file,
952
        store_whole_genome=False,
953
        binsize=200, stepsize=200,
954
        storage='ndarray')
955
956
    assert len(cover1) == 100
957
    assert len(cover2) == len(cover1)
958
    assert cover1.shape == (100, 200, 2, 1)
959
    assert cover1.shape == cover2.shape
960
    np.testing.assert_equal(cover1[:], cover2[:])
961
    assert cover1[:].sum() == 29.
962
963
964
def test_cover_from_bam_sanity():
965
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
966
    bed_file = os.path.join(data_path, "sample.bed")
967
968
    bamfile_ = os.path.join(data_path, "sample.bam")
969
    cover = Cover.create_from_bam(
970
        'test',
971
        bamfiles=bamfile_,
972
        roi=bed_file,
973
        binsize=200, stepsize=200,
974
        flank=0,
975
        storage='ndarray')
976
    cover[0]
977
978
    with pytest.raises(IndexError):
979
        # not interable
980
        cover[1.2]
981
982
    cov2 = Cover.create_from_bam(
983
           'test',
984
           bamfiles=bamfile_,
985
           storage='ndarray',
986
           store_whole_genome=True)
987
988
    assert len(cover.gindexer) == len(cover.garray.handle['data'])
989
    assert len(cov2.garray.handle) != len(cover.garray.handle['data'])
990
991
    with pytest.raises(Exception):
992
        # name must be a string
993
        Cover.create_from_bam(
994
            1.2,
995
            bamfiles=bamfile_,
996
            roi=bed_file,
997
            binsize=1, stepsize=1,
998
            storage='ndarray')
999
1000
    with pytest.raises(Exception):
1001
        # bamfile does not exist
1002
        Cover.create_from_bam(
1003
            'test',
1004
            bamfiles="",
1005
            roi=bed_file,
1006
            binsize=1, stepsize=1,
1007
            flank=-1,
1008
            storage='ndarray')
1009
    with pytest.raises(Exception):
1010
        # bamfile does not exist
1011
        Cover.create_from_bam(
1012
            'test',
1013
            bamfiles=[],
1014
            roi=bed_file,
1015
            binsize=1, stepsize=1,
1016
            flank=-1,
1017
            storage='ndarray')
1018
    with pytest.raises(Exception):
1019
        Cover.create_from_bam(
1020
            'test',
1021
            bamfiles=bamfile_,
1022
            roi=bed_file,
1023
            binsize=1, stepsize=1,
1024
            flank=-1,
1025
            storage='ndarray')
1026
    with pytest.raises(Exception):
1027
        Cover.create_from_bam(
1028
            'test',
1029
            bamfiles=bamfile_,
1030
            roi=bed_file,
1031
            binsize=1, stepsize=-1,
1032
            flank=0,
1033
            storage='ndarray')
1034
    with pytest.raises(Exception):
1035
        Cover.create_from_bam(
1036
            'test',
1037
            bamfiles=bamfile_,
1038
            roi=bed_file,
1039
            binsize=-1, stepsize=1,
1040
            flank=0,
1041
            storage='ndarray')
1042
1043
    with pytest.warns(FutureWarning):
1044
        Cover.create_from_bam(
1045
            'test',
1046
            bamfiles=bamfile_,
1047
            roi=bed_file,
1048
            binsize=200, stepsize=200,
1049
            storage='ndarray',
1050
            overwrite=True)
1051
1052
    with pytest.warns(FutureWarning):
1053
        Cover.create_from_bam(
1054
            'test',
1055
            bamfiles=bamfile_,
1056
            roi=bed_file,
1057
            binsize=200, stepsize=200,
1058
            storage='ndarray',
1059
            datatags=['asdf'])
1060
1061
1062
def test_cover_from_bigwig_sanity():
1063
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1064
    bed_file = os.path.join(data_path, "sample.bed")
1065
1066
    bwfile_ = os.path.join(data_path, "sample.bw")
1067
    cover = Cover.create_from_bigwig(
1068
        'test',
1069
        bigwigfiles=bwfile_,
1070
        roi=bed_file,
1071
        binsize=200, stepsize=50,
1072
        resolution=50,
1073
        flank=0,
1074
        storage='ndarray')
1075
    cover[0]
1076
    assert len(cover.gindexer) == 394
1077
    assert len(cover.garray.handle['data']) == 394
1078
1079
    cover = Cover.create_from_bigwig(
1080
        'test',
1081
        bigwigfiles=bwfile_,
1082
        roi=bed_file,
1083
        binsize=200, stepsize=50,
1084
        resolution=50,
1085
        flank=0,
1086
        storage='ndarray',
1087
        store_whole_genome=True)
1088
    cover[0]
1089
    assert len(cover.gindexer) == 394
1090
    assert len(cover.garray.handle) == 2
1091
    cov2 = Cover.create_from_bigwig(
1092
        'test',
1093
        bigwigfiles=bwfile_,
1094
        resolution=7,
1095
        storage='ndarray',
1096
        store_whole_genome=True)
1097
1098
    assert len(cov2.garray.handle) == 2
1099
    assert cov2['chr1', 100, 200].shape == (1, 100, 1, 1)
1100
1101
    with pytest.raises(Exception):
1102
        cov2.shape
1103
    with pytest.raises(Exception):
1104
        cov2[0]
1105
1106
    with pytest.raises(Exception):
1107
        # name must be a string
1108
        Cover.create_from_bigwig(
1109
            1.2,
1110
            bigwigfiles=bwfile_,
1111
            roi=bed_file,
1112
            binsize=1, stepsize=1,
1113
            storage='ndarray')
1114
1115
    with pytest.raises(Exception):
1116
        Cover.create_from_bigwig(
1117
            'test',
1118
            bigwigfiles=bwfile_,
1119
            roi=bed_file,
1120
            binsize=1, stepsize=1,
1121
            flank=-1,
1122
            storage='ndarray')
1123
    with pytest.raises(Exception):
1124
        Cover.create_from_bigwig(
1125
            'test',
1126
            bigwigfiles=bwfile_,
1127
            roi=bed_file,
1128
            binsize=1, stepsize=-1,
1129
            flank=0,
1130
            storage='ndarray')
1131
    with pytest.raises(Exception):
1132
        Cover.create_from_bigwig(
1133
            'test',
1134
            bigwigfiles=bwfile_,
1135
            roi=bed_file,
1136
            binsize=-1, stepsize=1,
1137
            flank=0,
1138
            storage='ndarray')
1139
1140
    with pytest.warns(FutureWarning):
1141
        Cover.create_from_bigwig(
1142
            'test',
1143
            bigwigfiles=bwfile_,
1144
            roi=bed_file,
1145
            binsize=200, stepsize=200,
1146
            flank=0,
1147
            storage='ndarray',
1148
            overwrite=True)
1149
    with pytest.warns(FutureWarning):
1150
        Cover.create_from_bigwig(
1151
            'test',
1152
            bigwigfiles=bwfile_,
1153
            roi=bed_file,
1154
            binsize=200, stepsize=200,
1155
            storage='ndarray',
1156
            datatags=['asdf'])
1157
1158
1159
def test_cover_from_bed_sanity():
1160
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1161
    bed_file = os.path.join(data_path, 'sample.bed')
1162
1163
    bwfile_ = os.path.join(data_path, "scored_sample.bed")
1164
    cover = Cover.create_from_bed(
1165
        'test',
1166
        bedfiles=bwfile_,
1167
        roi=bed_file,
1168
        binsize=200, stepsize=50,
1169
        resolution=50,
1170
        flank=0,
1171
        storage='ndarray')
1172
    cover[0]
1173
    Cover.create_from_bed(
1174
        'test',
1175
        bedfiles=bwfile_,
1176
        roi=bed_file,
1177
        binsize=200, stepsize=50,
1178
        resolution=50,
1179
        storage='ndarray')
1180
1181
    with pytest.raises(Exception):
1182
        # name must be a string
1183
        Cover.create_from_bed(
1184
            1.2,
1185
            bedfiles=bwfile_,
1186
            roi=bed_file,
1187
            binsize=1, stepsize=1,
1188
            storage='ndarray')
1189
1190
    with pytest.raises(Exception):
1191
        Cover.create_from_bed(
1192
            'test',
1193
            bedfiles=bwfile_,
1194
            roi=bed_file,
1195
            binsize=1, stepsize=1,
1196
            flank=-1,
1197
            storage='ndarray')
1198
    with pytest.raises(Exception):
1199
        Cover.create_from_bed(
1200
            'test',
1201
            bedfiles=bwfile_,
1202
            roi=bed_file,
1203
            binsize=1, stepsize=-1,
1204
            flank=0,
1205
            storage='ndarray')
1206
    with pytest.raises(Exception):
1207
        Cover.create_from_bed(
1208
            'test',
1209
            bedfiles=bwfile_,
1210
            roi=bed_file,
1211
            binsize=-1, stepsize=1,
1212
            flank=0,
1213
            storage='ndarray')
1214
    with pytest.raises(Exception):
1215
        csvfile = os.path.join(data_path, 'ctcf_sample.csv')
1216
        # must be a bed file
1217
        Cover.create_from_bed(
1218
            'test',
1219
            bedfiles=csvfile,
1220
            roi=bed_file,
1221
            binsize=1, stepsize=1,
1222
            storage='ndarray')
1223
    with pytest.warns(FutureWarning):
1224
        Cover.create_from_bed(
1225
            'test',
1226
            bedfiles=bwfile_,
1227
            roi=bed_file,
1228
            binsize=200, stepsize=200,
1229
            flank=0,
1230
            storage='ndarray',
1231
            overwrite=True)
1232
    with pytest.warns(FutureWarning):
1233
        Cover.create_from_bed(
1234
            'test',
1235
            bedfiles=bwfile_,
1236
            roi=bed_file,
1237
            binsize=200, stepsize=200,
1238
            storage='ndarray',
1239
            datatags=['asdf'])
1240
1241
1242
def test_cover_bam_unstranded():
1243
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1244
    bamfile_ = os.path.join(data_path, "sample.bam")
1245
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')
1246
1247
    content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'],
1248
                              index_col='chr')
1249
1250
    gsize = content.to_dict()['length']
1251
1252
    bed_file = os.path.join(data_path, "sample.bed")
1253
1254
1255
    cover = Cover.create_from_bam(
1256
        "yeast_I_II_III.bam",
1257
        bamfiles=bamfile_,
1258
        roi=bed_file,
1259
        binsize=200, stepsize=200,
1260
        genomesize=gsize,
1261
        stranded=False)
1262
1263
    np.testing.assert_equal(len(cover), 100)
1264
    np.testing.assert_equal(cover.shape, (100, 200, 1, 1))
1265
1266
    # the region is read relative to the forward strand
1267
    # read on the reverse strand
1268
    val = np.where(cover[4] == 1)
1269
    np.testing.assert_equal(cover[4].sum(), 1.)
1270
    np.testing.assert_equal(val[1][0], 179)  # pos
1271
1272
    # two reads on the forward strand
1273
    val = np.where(cover[13] == 1)
1274
    np.testing.assert_equal(cover[13].sum(), 2.)
1275
    np.testing.assert_equal(val[1], np.asarray([162, 178]))  # pos
1276
1277
    # the region is read relative to the reverse strand
1278
    # for index 50
1279
    # read on the reverse strand
1280
    val = np.where(cover[52] == 1)
1281
    np.testing.assert_equal(cover[52].sum(), 2.)
1282
    np.testing.assert_equal(val[1], np.asarray([9, 89]))  # pos
1283
1284
    # two reads on the forward strand
1285
    val = np.where(cover[96] == 1)
1286
    np.testing.assert_equal(cover[96].sum(), 1.)
1287
    np.testing.assert_equal(val[1], np.asarray([25]))  # pos
1288
1289
1290
def test_cover_bam_paired_5pend():
1291
    # sample2.bam contains paired end examples,
1292
    # unmapped examples, unmapped mate and low quality example
1293
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1294
    bamfile_ = os.path.join(data_path, "sample2.bam")
1295
1296
    cover = Cover.create_from_bam(
1297
        "yeast_I_II_III.bam",
1298
        bamfiles=bamfile_,
1299
        stranded=False,
1300
        pairedend='5pend',
1301
        min_mapq=30,
1302
        store_whole_genome=True)
1303
1304
    assert cover.garray.handle['ref'].sum() == 4, cover.garray.handle['ref']
1305
1306
    # the read starts at index 6 and tlen is 39
1307
    assert cover.garray.handle['ref'][6, 0, 0] == 1
1308
    # another read maps to index 24
1309
    assert cover.garray.handle['ref'][24, 0, 0] == 1
1310
1311
1312
def test_cover_bam_paired_midpoint():
1313
    # sample2.bam contains paired end examples,
1314
    # unmapped examples, unmapped mate and low quality example
1315
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1316
    bamfile_ = os.path.join(data_path, "sample2.bam")
1317
1318
1319
    cover = Cover.create_from_bam(
1320
        "yeast_I_II_III.bam",
1321
        bamfiles=bamfile_,
1322
        stranded=False,
1323
        pairedend='midpoint',
1324
        min_mapq=30,
1325
        store_whole_genome=True)
1326
1327
    assert cover.garray.handle['ref'].sum() == 2, cover.garray.handle['ref']
1328
    print(cover.garray.handle['ref'])
1329
    # the read starts at index 6 and tlen is 39
1330
    assert cover.garray.handle['ref'][6 + 39//2, 0, 0] == 1
1331
    # another read maps to index 34
1332
    assert cover.garray.handle['ref'][34, 0, 0] == 1
1333
1334
1335
def test_cover_bam_list(tmpdir):
1336
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1337
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1338
    bamfile_ = os.path.join(data_path, "sample.bam")
1339
1340
    bed_file = os.path.join(data_path, "sample.bed")
1341
1342
    for store in ['ndarray', 'hdf5', 'sparse']:
1343
        # base pair binsize
1344
        cover = Cover.create_from_bam(
1345
            "yeast_I_II_III.bam",
1346
            bamfiles=[bamfile_],
1347
            roi=bed_file,
1348
            conditions=['condition2'],
1349
            normalizer='tpm',
1350
            binsize=200, stepsize=200)
1351
1352
def test_cover_bam(tmpdir):
1353
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1354
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1355
    bamfile_ = os.path.join(data_path, "sample.bam")
1356
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')
1357
1358
    content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'],
1359
                              index_col='chr')
1360
1361
    gsize = content.to_dict()['length']
1362
1363
    bed_file = os.path.join(data_path, "sample.bed")
1364
1365
    for store in ['ndarray', 'hdf5', 'sparse']:
1366
        # base pair binsize
1367
        cover = Cover.create_from_bam(
1368
            "yeast_I_II_III.bam",
1369
            bamfiles=bamfile_,
1370
            roi=bed_file,
1371
            binsize=200, stepsize=200,
1372
            genomesize=gsize,
1373
            storage=store, cache=True)
1374
1375
        np.testing.assert_equal(len(cover), 100)
1376
        np.testing.assert_equal(cover.shape, (100, 200, 2, 1))
1377
1378
        # the region is read relative to the forward strand
1379
        # read on the reverse strand
1380
        val = np.where(cover[4] == 1)
1381
        np.testing.assert_equal(cover[4].sum(), 1.)
1382
        np.testing.assert_equal(val[1][0], 179)  # pos
1383
        np.testing.assert_equal(val[2][0], 1)  # strand
1384
1385
        # two reads on the forward strand
1386
        val = np.where(cover[13] == 1)
1387
        np.testing.assert_equal(cover[13].sum(), 2.)
1388
        np.testing.assert_equal(val[1], np.asarray([162, 178]))  # pos
1389
        np.testing.assert_equal(val[2], np.asarray([0, 0]))  # strand
1390
1391
        # the region is read relative to the reverse strand
1392
        # for index 50
1393
        # read on the reverse strand
1394
        val = np.where(cover[52] == 1)
1395
        np.testing.assert_equal(cover[52].sum(), 2.)
1396
        np.testing.assert_equal(val[1], np.asarray([9, 89]))  # pos
1397
        np.testing.assert_equal(val[2], np.asarray([0, 0]))  # strand
1398
1399
        # two reads on the forward strand
1400
        val = np.where(cover[96] == 1)
1401
        np.testing.assert_equal(cover[96].sum(), 1.)
1402
        np.testing.assert_equal(val[1], np.asarray([25]))  # pos
1403
        np.testing.assert_equal(val[2], np.asarray([1]))  # strand
1404
1405
1406
def test_load_bam_resolution10(tmpdir):
1407
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1408
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1409
    bamfile_ = os.path.join(data_path, "sample.bam")
1410
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')
1411
1412
    content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'],
1413
                              index_col='chr')
1414
1415
    gsize = content.to_dict()['length']
1416
1417
    bed_file = os.path.join(data_path, "sample.bed")
1418
1419
    for store, store_genome in product(['ndarray', 'hdf5', 'sparse'], [True, False]):
1420
        # base pair binsize
1421
        cover = Cover.create_from_bam(
1422
            "yeast_I_II_III.bam",
1423
            bamfiles=bamfile_,
1424
            roi=bed_file,
1425
            binsize=200, stepsize=200,
1426
            genomesize=gsize,
1427
            resolution=10,
1428
            store_whole_genome=store_genome,
1429
            storage=store, cache=True)
1430
1431
        np.testing.assert_equal(len(cover), 100)
1432
        np.testing.assert_equal(cover.shape, (100, 20, 2, 1))
1433
1434
        # the region is read relative to the forward strand
1435
        # read on the reverse strand
1436
        val = np.where(cover[4] == 1)
1437
        np.testing.assert_equal(cover[4].sum(), 1.)
1438
        np.testing.assert_equal(val[1][0], 17)  # pos
1439
        np.testing.assert_equal(val[2][0], 1)  # strand
1440
1441
        # two reads on the forward strand
1442
        val = np.where(cover[13] == 1)
1443
        np.testing.assert_equal(cover[13].sum(), 2.)
1444
        np.testing.assert_equal(val[1], np.asarray([16, 17]))  # pos
1445
        np.testing.assert_equal(val[2], np.asarray([0, 0]))  # strand
1446
1447
        # the region is read relative to the reverse strand
1448
        # for index 50
1449
        # read on the reverse strand
1450
        val = np.where(cover[52] == 1)
1451
        np.testing.assert_equal(cover[52].sum(), 2.)
1452
        np.testing.assert_equal(val[1], np.asarray([0, 8]))  # pos
1453
        np.testing.assert_equal(val[2], np.asarray([0, 0]))  # strand
1454
1455
        # two reads on the forward strand
1456
        val = np.where(cover[96] == 1)
1457
        np.testing.assert_equal(cover[96].sum(), 1.)
1458
        np.testing.assert_equal(val[1], np.asarray([2]))  # pos
1459
        np.testing.assert_equal(val[2], np.asarray([1]))  # strand
1460
1461
1462
def test_load_bam_resolutionNone(tmpdir):
1463
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1464
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1465
    bamfile_ = os.path.join(data_path, "sample.bam")
1466
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')
1467
1468
    content = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'],
1469
                              index_col='chr')
1470
1471
    gsize = content.to_dict()['length']
1472
1473
    bed_file = os.path.join(data_path, "sample.bed")
1474
1475
    for store in ['ndarray', 'hdf5', 'sparse']:
1476
        # base pair binsize
1477
        cover1 = Cover.create_from_bam(
1478
            "yeast_I_II_III.bam",
1479
            bamfiles=bamfile_,
1480
            roi=bed_file,
1481
            binsize=200, stepsize=200,
1482
            genomesize=gsize,
1483
            resolution=1,
1484
            storage=store, cache=True)
1485
        cover = Cover.create_from_bam(
1486
            "yeast_I_II_III.bam",
1487
            bamfiles=bamfile_,
1488
            roi=bed_file,
1489
            binsize=200, stepsize=200,
1490
            genomesize=gsize,
1491
            resolution=None,
1492
            storage=store, cache=True)
1493
1494
        np.testing.assert_equal(len(cover), 100)
1495
        np.testing.assert_equal(cover.shape, (100, 1, 2, 1))
1496
1497
        np.testing.assert_equal(cover1[:].sum(axis=1), cover[:].sum(axis=1))
1498
1499
1500
def test_load_cover_bigwig_default(tmpdir):
1501
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1502
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1503
1504
    bwfile_ = os.path.join(data_path, "sample.bw")
1505
    gsfile_ = os.path.join(data_path, 'sample.chrom.sizes')
1506
1507
    gsize = pandas.read_csv(gsfile_, sep='\t', names=['chr', 'length'],
1508
                            index_col='chr').to_dict()['length']
1509
1510
    bed_file = os.path.join(data_path, "sample.bed")
1511
1512
    for store in ['ndarray', 'hdf5', 'sparse']:
1513
        # base pair binsize
1514
        print(store)
1515
        cover = Cover.create_from_bigwig(
1516
            "cov",
1517
            bigwigfiles=bwfile_,
1518
            roi=bed_file,
1519
            binsize=200, stepsize=200,
1520
            genomesize=gsize,
1521
            storage=store,
1522
            store_whole_genome=True,
1523
            cache=True)
1524
1525
        np.testing.assert_equal(len(cover), 100)
1526
        np.testing.assert_equal(cover.shape, (100, 200, 1, 1))
1527
1528
        # there is one read in the region
1529
        np.testing.assert_allclose(cover[4].sum(), 36.)
1530
        np.testing.assert_allclose(cover[52].sum(), 2*36.)
1531
1532
    cover = Cover.create_from_bigwig(
1533
        "cov",
1534
        bigwigfiles=bwfile_,
1535
        roi=bed_file,
1536
        binsize=200, stepsize=200,
1537
        genomesize=gsize,
1538
        store_whole_genome=False, cache=True)
1539
1540
    np.testing.assert_equal(len(cover), 100)
1541
    np.testing.assert_equal(cover.shape, (100, 200, 1, 1))
1542
1543
    # there is one read in the region
1544
    np.testing.assert_allclose(cover[4].sum(), 36.)
1545
    np.testing.assert_allclose(cover[52].sum(), 2*36.)
1546
1547
def test_load_cover_bigwig_resolution1(tmpdir):
1548
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1549
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1550
1551
    bwfile_ = os.path.join(data_path, "sample.bw")
1552
1553
    bed_file = os.path.join(data_path, "sample.bed")
1554
1555
    for store in ['ndarray', 'hdf5', 'sparse']:
1556
        # base pair binsize
1557
        print(store)
1558
        cover = Cover.create_from_bigwig(
1559
            "cov",
1560
            bigwigfiles=bwfile_,
1561
            roi=bed_file,
1562
            binsize=200, stepsize=200,
1563
            resolution=1,
1564
            storage=store, cache=True)
1565
1566
        np.testing.assert_equal(len(cover), 100)
1567
        np.testing.assert_equal(cover.shape, (100, 200, 1, 1))
1568
1569
        # there is one read in the region 4
1570
        np.testing.assert_allclose(cover[4].sum(), 36)
1571
        np.testing.assert_equal(cover[4][0, :, 0, 0],
1572
         np.asarray(
1573
          [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1574
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1575
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1576
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1577
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1578
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1579
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1580
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1581
           0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1582
           1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1583
           1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0.,
1584
           0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))
1585
        # and two reads in region 52
1586
        np.testing.assert_allclose(cover[52].sum(), 2*36)
1587
        np.testing.assert_equal(cover[52][0, :, 0, 0],
1588
        np.asarray(
1589
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1.,
1590
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1591
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0.,
1592
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1593
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1594
         0., 0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1595
         1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1., 1.,
1596
         1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1597
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1598
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1599
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
1600
         0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]))
1601
1602
1603
1604
def test_load_cover_bigwig_resolutionNone(tmpdir):
1605
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1606
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
1607
1608
    bwfile_ = os.path.join(data_path, "sample.bw")
1609
1610
    bed_file = os.path.join(data_path, "sample.bed")
1611
1612
    for store in ['ndarray', 'hdf5', 'sparse']:
1613
        # base pair binsize
1614
        print(store)
1615
        cover1 = Cover.create_from_bigwig(
1616
            "cov",
1617
            bigwigfiles=bwfile_,
1618
            roi=bed_file,
1619
            binsize=200, stepsize=200,
1620
            resolution=1,
1621
            storage=store, cache=True)
1622
1623
        cover = Cover.create_from_bigwig(
1624
            "cov",
1625
            bigwigfiles=bwfile_,
1626
            roi=bed_file,
1627
            binsize=200, stepsize=200,
1628
            resolution=None,
1629
            storage=store, cache=True,
1630
            collapser='sum')
1631
        np.testing.assert_equal(len(cover), 100)
1632
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1633
1634
        np.testing.assert_equal(cover1[:].sum(axis=1), cover[:].sum(axis=1))
1635
1636
1637
def test_load_cover_bed_binary(tmpdir):
1638
    os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
1639
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1640
    score_file = pkg_resources.resource_filename('janggu',
1641
                                                 'resources/scored_sample.bed')
1642
1643
    for store in ['ndarray', 'hdf5', 'sparse']:
1644
        print('store', store)
1645
        cover = Cover.create_from_bed(
1646
            "cov",
1647
            bedfiles=score_file,
1648
            roi=bed_file,
1649
            binsize=200, stepsize=200,
1650
            resolution=200,
1651
            storage=store,
1652
            mode='binary', cache=True)
1653
        np.testing.assert_equal(len(cover), 100)
1654
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1655
        np.testing.assert_equal(cover[0].sum(), 0)
1656
        np.testing.assert_equal(cover[4].sum(), 1)
1657
1658
        cover = Cover.create_from_bed(
1659
            "cov50",
1660
            bedfiles=score_file,
1661
            roi=bed_file,
1662
            binsize=200, stepsize=200,
1663
            storage=store,
1664
            resolution=50,
1665
            collapser='max',
1666
            mode='binary', cache=True)
1667
        np.testing.assert_equal(len(cover), 100)
1668
        np.testing.assert_equal(cover.shape, (100, 4, 1, 1))
1669
        np.testing.assert_equal(cover[0].sum(), 0)
1670
        np.testing.assert_equal(cover[4].sum(), 4*1)
1671
1672
        cover = Cover.create_from_bed(
1673
            "cov50_firstdim",
1674
            bedfiles=score_file,
1675
            roi=bed_file,
1676
            binsize=200, stepsize=200,
1677
            storage=store,
1678
            resolution=None,
1679
            collapser='max',
1680
            mode='binary', cache=True)
1681
        np.testing.assert_equal(len(cover), 100)
1682
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1683
        np.testing.assert_equal(cover[0].sum(), 0)
1684
        np.testing.assert_equal(cover[4].sum(), 1)
1685
1686
        cover = Cover.create_from_bed(
1687
            "cov50_firstdim",
1688
            bedfiles=score_file,
1689
            roi=bed_file,
1690
            binsize=200, stepsize=200,
1691
            storage=store,
1692
            store_whole_genome=True,
1693
            resolution=200,
1694
            collapser='max',
1695
            mode='binary', cache=True)
1696
        np.testing.assert_equal(len(cover), 100)
1697
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1698
        np.testing.assert_equal(cover[0].sum(), 0)
1699
        np.testing.assert_equal(cover[4].sum(), 1)
1700
1701
1702
def test_load_cover_bed_scored():
1703
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1704
    score_file = pkg_resources.resource_filename('janggu',
1705
                                                 'resources/scored_sample.bed')
1706
1707
    for store in ['ndarray', 'sparse']:
1708
        cover = Cover.create_from_bed(
1709
            "cov",
1710
            bedfiles=score_file,
1711
            roi=bed_file,
1712
            binsize=200, stepsize=200,
1713
            resolution=200,
1714
            storage=store,
1715
            store_whole_genome=True,
1716
            mode='score')
1717
1718
        np.testing.assert_equal(len(cover), 100)
1719
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1720
        np.testing.assert_equal(cover[0].sum(), 0)
1721
        np.testing.assert_equal(cover[4].sum(), 5)
1722
        np.testing.assert_equal(cover[50].sum(), 0)
1723
        np.testing.assert_equal(cover[54].sum(), 4)
1724
1725
        cover = Cover.create_from_bed(
1726
            "cov50",
1727
            bedfiles=score_file,
1728
            roi=bed_file,
1729
            binsize=200, stepsize=200,
1730
            storage=store,
1731
            resolution=50,
1732
            mode='score')
1733
1734
        np.testing.assert_equal(len(cover), 100)
1735
        np.testing.assert_equal(cover.shape, (100, 4, 1, 1))
1736
        np.testing.assert_equal(cover[0].sum(), 0)
1737
        np.testing.assert_equal(cover[4].sum(), 4*5)
1738
1739
        cover = Cover.create_from_bed(
1740
            "cov50",
1741
            bedfiles=score_file,
1742
            roi=bed_file,
1743
            storage=store,
1744
            resolution=None,
1745
            binsize=200, stepsize=200,
1746
            collapser='max',
1747
            mode='score')
1748
1749
        np.testing.assert_equal(len(cover), 100)
1750
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1751
        np.testing.assert_equal(cover[0].sum(), 0)
1752
        np.testing.assert_equal(cover[4].sum(), 5)
1753
1754
1755
def test_load_cover_bed_categorical():
1756
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1757
    score_file = pkg_resources.resource_filename('janggu',
1758
                                                 'resources/scored_sample.bed')
1759
1760
    with pytest.raises(ValueError):
1761
        # Only one bed file allowed.
1762
        cover = Cover.create_from_bed(
1763
            "cov",
1764
            bedfiles=[score_file] * 2,
1765
            roi=bed_file,
1766
            binsize=200, stepsize=200,
1767
            resolution=200,
1768
            mode='categorical')
1769
1770
    for store in ['ndarray', 'sparse']:
1771
        print(store)
1772
        cover = Cover.create_from_bed(
1773
            "cov",
1774
            bedfiles=score_file,
1775
            roi=bed_file,
1776
            binsize=200, stepsize=200,
1777
            resolution=200,
1778
            storage=store,
1779
            mode='categorical')
1780
1781
        np.testing.assert_equal(len(cover), 100)
1782
        np.testing.assert_equal(cover.shape, (100, 1, 1, 4))
1783
        np.testing.assert_equal(cover[0].sum(), 0)
1784
        np.testing.assert_equal(cover[4].sum(), 1)
1785
        np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]])
1786
1787
        cover = Cover.create_from_bed(
1788
            "cov50",
1789
            bedfiles=score_file,
1790
            roi=bed_file,
1791
            binsize=200, stepsize=200,
1792
            resolution=50,
1793
            storage=store,
1794
            mode='categorical')
1795
1796
        np.testing.assert_equal(len(cover), 100)
1797
        np.testing.assert_equal(cover.shape, (100, 4, 1, 4))
1798
        np.testing.assert_equal(cover[0].sum(), 0)
1799
        np.testing.assert_equal(cover[4].sum(), 4*1)
1800
1801
        cover = Cover.create_from_bed(
1802
            "cov50",
1803
            bedfiles=score_file,
1804
            roi=bed_file,
1805
            resolution=None,
1806
            binsize=200, stepsize=200,
1807
            storage=store,
1808
            collapser='max',
1809
            mode='categorical')
1810
1811
        np.testing.assert_equal(len(cover), 100)
1812
        np.testing.assert_equal(cover.shape, (100, 1, 1, 4))
1813
        np.testing.assert_equal(cover[0].sum(), 0)
1814
        np.testing.assert_equal(cover[4].sum(), 1)
1815
        np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]])
1816
1817
1818
def test_load_cover_bed_score_category():
1819
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1820
    score_file = pkg_resources.resource_filename('janggu',
1821
                                                 'resources/scored_sample.bed')
1822
1823
    with pytest.raises(ValueError):
1824
        # Only one bed file allowed.
1825
        cover = Cover.create_from_bed(
1826
            "cov",
1827
            bedfiles=[score_file] * 2,
1828
            roi=bed_file,
1829
            binsize=200, stepsize=200,
1830
            resolution=200,
1831
            mode='score_category')
1832
1833
    for store in ['ndarray', 'sparse']:
1834
        print(store)
1835
        cover = Cover.create_from_bed(
1836
            "cov",
1837
            bedfiles=score_file,
1838
            roi=bed_file,
1839
            binsize=200, stepsize=200,
1840
            resolution=200,
1841
            storage=store,
1842
            mode='score_category')
1843
1844
        assert cover.conditions == ['1', '2', '4', '5']
1845
        np.testing.assert_equal(len(cover), 100)
1846
        np.testing.assert_equal(cover.shape, (100, 1, 1, 4))
1847
        np.testing.assert_equal(cover[0].sum(), 0)
1848
        np.testing.assert_equal(cover[4].sum(), 1)
1849
        np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]])
1850
1851
        cover = Cover.create_from_bed(
1852
            "cov",
1853
            bedfiles=score_file,
1854
            roi=bed_file,
1855
            binsize=200, stepsize=200,
1856
            resolution=200,
1857
            conditions=['1', '2', '4', '5'],
1858
            storage=store,
1859
            mode='score_category')
1860
1861
        assert cover.conditions == ['1', '2', '4', '5']
1862
        np.testing.assert_equal(len(cover), 100)
1863
        np.testing.assert_equal(cover.shape, (100, 1, 1, 4))
1864
        np.testing.assert_equal(cover[0].sum(), 0)
1865
        np.testing.assert_equal(cover[4].sum(), 1)
1866
        np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]])
1867
1868
        cover = Cover.create_from_bed(
1869
            "cov50",
1870
            bedfiles=score_file,
1871
            roi=bed_file,
1872
            binsize=200, stepsize=200,
1873
            resolution=50,
1874
            storage=store,
1875
            mode='score_category')
1876
1877
        assert cover.conditions == ['1', '2', '4', '5']
1878
        np.testing.assert_equal(len(cover), 100)
1879
        np.testing.assert_equal(cover.shape, (100, 4, 1, 4))
1880
        np.testing.assert_equal(cover[0].sum(), 0)
1881
        np.testing.assert_equal(cover[4].sum(), 4*1)
1882
1883
        cover = Cover.create_from_bed(
1884
            "cov50",
1885
            bedfiles=score_file,
1886
            roi=bed_file,
1887
            resolution=None,
1888
            binsize=200, stepsize=200,
1889
            storage=store,
1890
            collapser='max',
1891
            mode='score_category')
1892
1893
        assert cover.conditions == ['1', '2', '4', '5']
1894
        np.testing.assert_equal(len(cover), 100)
1895
        np.testing.assert_equal(cover.shape, (100, 1, 1, 4))
1896
        np.testing.assert_equal(cover[0].sum(), 0)
1897
        np.testing.assert_equal(cover[4].sum(), 1)
1898
        np.testing.assert_equal(cover[4], [[[[0., 0., 0., 1.]]]])
1899
1900
1901
def test_load_cover_bedgraph():
1902
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1903
    score_file = pkg_resources.resource_filename('janggu',
1904
                                                 'resources/sample.bedgraph')
1905
1906
    for store in ['ndarray', 'sparse']:
1907
        print(store)
1908
        cover = Cover.create_from_bed(
1909
            "cov",
1910
            bedfiles=score_file,
1911
            roi=bed_file,
1912
            binsize=200, stepsize=200,
1913
            resolution=200,
1914
            storage=store,
1915
            mode='bedgraph')
1916
1917
        np.testing.assert_equal(len(cover), 100)
1918
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1919
        np.testing.assert_equal(cover[0].sum(), 0)
1920
        np.testing.assert_equal(cover[4].sum(), .5)
1921
        np.testing.assert_equal(cover[4], [[[[.5]]]])
1922
1923
        cover = Cover.create_from_bed(
1924
            "cov50",
1925
            bedfiles=score_file,
1926
            roi=bed_file,
1927
            binsize=200, stepsize=200,
1928
            resolution=50,
1929
            storage=store,
1930
            mode='bedgraph')
1931
1932
        np.testing.assert_equal(len(cover), 100)
1933
        np.testing.assert_equal(cover.shape, (100, 4, 1, 1))
1934
        np.testing.assert_equal(cover[0].sum(), 0)
1935
        np.testing.assert_equal(cover[4].sum(), 4*.5)
1936
1937
        cover = Cover.create_from_bed(
1938
            "cov50",
1939
            bedfiles=score_file,
1940
            roi=bed_file,
1941
            resolution=None,
1942
            binsize=200, stepsize=200,
1943
            storage=store,
1944
            collapser='max',
1945
            mode='bedgraph')
1946
1947
        np.testing.assert_equal(len(cover), 100)
1948
        np.testing.assert_equal(cover.shape, (100, 1, 1, 1))
1949
        np.testing.assert_equal(cover[0].sum(), 0)
1950
        np.testing.assert_equal(cover[4].sum(), .5)
1951
        np.testing.assert_equal(cover[4], [[[[.5]]]])
1952
1953
1954
def test_load_cover_bed_name_category():
1955
    bed_file = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
1956
    score_file = pkg_resources.resource_filename('janggu',
1957
                                                 'resources/scored_sample.bed')
1958
1959
    with pytest.raises(ValueError):
1960
        # Only one bed file allowed.
1961
        cover = Cover.create_from_bed(
1962
            "cov",
1963
            bedfiles=[score_file] * 2,
1964
            roi=bed_file,
1965
            binsize=200, stepsize=200,
1966
            resolution=200,
1967
            mode='name_category')
1968
1969
    for store in ['ndarray', 'sparse']:
1970
        print(store)
1971
        cover = Cover.create_from_bed(
1972
            "cov",
1973
            bedfiles=score_file,
1974
            roi=bed_file,
1975
            binsize=200, stepsize=200,
1976
            resolution=200,
1977
            storage=store,
1978
            mode='name_category')
1979
1980
        assert cover.conditions == ['state1', 'state2']
1981
        np.testing.assert_equal(len(cover), 100)
1982
        np.testing.assert_equal(cover.shape, (100, 1, 1, 2))
1983
        np.testing.assert_equal(cover[0].sum(), 0)
1984
        np.testing.assert_equal(cover[4].sum(), 1)
1985
        np.testing.assert_equal(cover[3], [[[[1., 0.]]]])
1986
        np.testing.assert_equal(cover[4], [[[[0., 1.]]]])
1987
1988
        cover = Cover.create_from_bed(
1989
            "cov",
1990
            bedfiles=score_file,
1991
            roi=bed_file,
1992
            binsize=200, stepsize=200,
1993
            resolution=200,
1994
            conditions=['state1', 'state2'],
1995
            storage=store,
1996
            mode='name_category')
1997
1998
        assert cover.conditions == ['state1', 'state2']
1999
        np.testing.assert_equal(len(cover), 100)
2000
        np.testing.assert_equal(cover.shape, (100, 1, 1, 2))
2001
        np.testing.assert_equal(cover[0].sum(), 0)
2002
        np.testing.assert_equal(cover[4].sum(), 1)
2003
        np.testing.assert_equal(cover[3], [[[[1., 0.]]]])
2004
        np.testing.assert_equal(cover[4], [[[[0., 1.]]]])
2005
2006
        cover = Cover.create_from_bed(
2007
            "cov50",
2008
            bedfiles=score_file,
2009
            roi=bed_file,
2010
            binsize=200, stepsize=200,
2011
            resolution=50,
2012
            storage=store,
2013
            mode='name_category')
2014
2015
        assert cover.conditions == ['state1', 'state2']
2016
        np.testing.assert_equal(len(cover), 100)
2017
        np.testing.assert_equal(cover.shape, (100, 4, 1, 2))
2018
        np.testing.assert_equal(cover[0].sum(), 0)
2019
        np.testing.assert_equal(cover[4].sum(), 4*1)
2020
2021
        cover = Cover.create_from_bed(
2022
            "cov50",
2023
            bedfiles=score_file,
2024
            roi=bed_file,
2025
            resolution=None,
2026
            binsize=200, stepsize=200,
2027
            storage=store,
2028
            collapser='max',
2029
            mode='name_category')
2030
2031
        assert cover.conditions == ['state1', 'state2']
2032
        np.testing.assert_equal(len(cover), 100)
2033
        np.testing.assert_equal(cover.shape, (100, 1, 1, 2))
2034
        np.testing.assert_equal(cover[0].sum(), 0)
2035
        np.testing.assert_equal(cover[4].sum(), 1)
2036
        np.testing.assert_equal(cover[3], [[[[1., 0.]]]])
2037
        np.testing.assert_equal(cover[4], [[[[0., 1.]]]])
2038
2039
2040
def test_filter_by_region():
2041
2042
    roi_file = pkg_resources.resource_filename('janggu',
2043
                                 'resources/bed_test.bed')
2044
2045
    roi = GenomicIndexer.create_from_file(regions=roi_file, binsize=2, stepsize=2)
2046
    np.testing.assert_equal(len(roi), 9)
2047
2048
    np.testing.assert_equal((roi[0].chrom, roi[0].start, roi[0].end), ('chr1', 0, 2))
2049
    np.testing.assert_equal((roi[-1].chrom, roi[-1].start, roi[-1].end), ('chr1', 16, 18))
2050
2051
    test1 = roi.filter_by_region(include='chr1', start=0, end=18)
2052
2053
    for i in range(len(test1)):
2054
        np.testing.assert_equal(test1[i], roi[i])
2055
2056
    test2 = roi.filter_by_region(include='chr1', start=5, end=10)
2057
    np.testing.assert_equal(len(test2), 3)
2058
    np.testing.assert_equal((test2[0].chrom, test2[0].start, test2[0].end), ('chr1', 4, 6))
2059
    np.testing.assert_equal((test2[1].chrom, test2[1].start, test2[1].end), ('chr1', 6, 8))
2060
    np.testing.assert_equal((test2[2].chrom, test2[2].start, test2[2].end), ('chr1', 8, 10))
2061
2062
    test3 = roi.filter_by_region(include='chr1', start=5, end=11)
2063
    np.testing.assert_equal(len(test3), 4)
2064
    np.testing.assert_equal((test3[0].chrom, test3[0].start, test3[0].end), ('chr1', 4, 6))
2065
    np.testing.assert_equal((test3[1].chrom, test3[1].start, test3[1].end), ('chr1', 6, 8))
2066
    np.testing.assert_equal((test3[2].chrom, test3[2].start, test3[2].end), ('chr1', 8, 10))
2067
    np.testing.assert_equal((test3[3].chrom, test3[3].start, test3[3].end), ('chr1', 10, 12))
2068
2069
    test4 = roi.filter_by_region(include='chr1', start=6, end=10)
2070
    np.testing.assert_equal(len(test4), 2)
2071
    np.testing.assert_equal((test4[0].chrom, test4[0].start, test4[0].end), ('chr1', 6, 8))
2072
    np.testing.assert_equal((test4[1].chrom, test4[1].start, test4[1].end), ('chr1', 8, 10))
2073
2074
    test5 = roi.filter_by_region(include='chr1', start=6, end=11)
2075
    np.testing.assert_equal(len(test5), 3)
2076
    np.testing.assert_equal((test5[0].chrom, test5[0].start, test5[0].end), ('chr1', 6, 8))
2077
    np.testing.assert_equal((test5[1].chrom, test5[1].start, test5[1].end), ('chr1', 8, 10))
2078
    np.testing.assert_equal((test5[2].chrom, test5[2].start, test5[2].end), ('chr1', 10, 12))
2079
2080
    test6 = roi.filter_by_region(include='chr1', start=20, end=30)
2081
    np.testing.assert_equal(len(test6), 0)
2082
2083
2084
def test_plotgenometracks_bigwigs():
2085
2086
    roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
2087
2088
    bw_file = pkg_resources.resource_filename('janggu', 'resources/sample.bw')
2089
2090
    cover = Cover.create_from_bigwig('coverage2',
2091
                                     bigwigfiles=bw_file,
2092
                                     roi=roi,
2093
                                     binsize=200,
2094
                                     stepsize=200,
2095
                                     resolution=50)
2096
2097
    cover2 = Cover.create_from_bigwig('morecoverage',
2098
                                      bigwigfiles=[bw_file] * 4,
2099
                                      roi=roi,
2100
                                      binsize=200,
2101
                                      stepsize=200,
2102
                                      resolution=50)
2103
2104
    # line plots
2105
    a = plotGenomeTrack([cover,cover2],'chr1',16000,18000)
2106
    a = plotGenomeTrack(cover,'chr1',16000,18000)
2107
2108
    a = plotGenomeTrack(LineTrack(cover),'chr1',16000,18000)
2109
2110
    a = plotGenomeTrack([cover,cover2],'chr1',16000,18000, plottypes=['heatmap'] * 2)
2111
    with pytest.raises(AssertionError):
2112
        # differing number of plottypes and coverage objects raises an error
2113
        a = plotGenomeTrack(cover,'chr1',16000,18000, plottypes=['heatmap'] * 2)
2114
    with pytest.raises(ValueError):
2115
        # coverage not a sequence
2116
        a = plotGenomeTrack(cover,'chr1',16000,18000, plottypes=['seqplot'])
2117
    with pytest.raises(ValueError):
2118
        # coverage not a sequence
2119
        a = plotGenomeTrack(cover2,'chr1',16000,18000, plottypes=['seqplot'])
2120
2121
2122
def test_plotgenometracks_bams():
2123
2124
    roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
2125
2126
    bw_file = pkg_resources.resource_filename('janggu', 'resources/sample.bam')
2127
2128
    cover = Cover.create_from_bam('coverage',
2129
                                  bamfiles=bw_file,
2130
                                  roi=roi,
2131
                                  binsize=200,
2132
                                  stepsize=200,
2133
                                  resolution=50)
2134
2135
    # line plots
2136
    a = plotGenomeTrack(cover,'chr1',16000,18000)
2137
2138
    a = plotGenomeTrack([cover,cover],'chr1',16000,18000, plottypes=['heatmap'] * 2)
2139
2140
    a = plotGenomeTrack([HeatTrack(cover), HeatTrack(cover)],'chr1',16000,18000)
2141
    a = plotGenomeTrack([LineTrack(cover)],'chr1',16000,18000)
2142
2143
2144
def test_plotgenometracks_seqplot():
2145
2146
    roi = pkg_resources.resource_filename('janggu', 'resources/sample.bed')
2147
2148
    refgenome = pkg_resources.resource_filename('janggu',
2149
                                               'resources/sample_genome.fa')
2150
2151
    dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
2152
                                       storage='ndarray',
2153
                                       roi=roi, order=1,
2154
                                       store_whole_genome=True)
2155
2156
    a = plotGenomeTrack(dna,'chr1',16000,18000, plottypes=['seqplot'])
2157
2158
    a = plotGenomeTrack(SeqTrack(dna), 'chr1', 16000, 18000)
2159
2160
def test_padding_value_nan():
2161
    variantsfile = pkg_resources.resource_filename('janggu', 'resources/pseudo_snps.vcf')
2162
    gindexer = GenomicIndexer.create_from_file(variantsfile, None, None)
2163
    array = np.zeros((len(gindexer), 3))
2164
2165
    snpcov = Cover.create_from_array('snps', array,
2166
                                     gindexer,
2167
                                     store_whole_genome=True,
2168
                                     padding_value=np.nan)
2169
2170
    assert snpcov.shape == (6, 1, 1, 3)
2171
2172
    np.testing.assert_equal(snpcov['pseudo1', 650, 670][0,:,0,0],
2173
                            np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
2174
                                      0., np.nan,  0., np.nan,  0.,  0.,  0.,
2175
                                      np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]))
2176
2177
    snpcov = Cover.create_from_array('snps', array,
2178
                                     gindexer,
2179
                                     store_whole_genome=False,
2180
                                     padding_value=np.nan)
2181
2182
    assert snpcov.shape == (6, 1, 1, 3)
2183
2184
    np.testing.assert_equal(snpcov['pseudo1', 650, 670][0,:,0,0],
2185
                            np.array([np.nan, np.nan, np.nan, np.nan, np.nan, np.nan,
2186
                                      0., np.nan,  0., np.nan,  0.,  0.,  0.,
2187
                                      np.nan, np.nan, np.nan, np.nan, np.nan, np.nan, np.nan]))
2188
2189
2190
def test_bedgraph():
2191
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2192
    bed_file = os.path.join(data_path, "positive.bed")
2193
    bgfile_ = os.path.join(data_path, "positive.bedgraph")
2194
2195
    cover1 = Cover.create_from_bed(
2196
        'test',
2197
        bedfiles=bgfile_,
2198
        roi=bed_file,
2199
        mode='bedgraph',
2200
        store_whole_genome=True)
2201
    cover2 = Cover.create_from_bed(
2202
        'test2',
2203
        bedfiles=bgfile_,
2204
        roi=bed_file,
2205
        mode='bedgraph',
2206
        store_whole_genome=False)
2207
2208
    assert len(cover1) == 25
2209
    assert len(cover2) == len(cover1)
2210
    assert cover1.shape == (25, 200, 1, 1)
2211
    assert cover1.shape == cover2.shape
2212
    np.testing.assert_equal(cover1[:], cover2[:])
2213
2214
def test_fulltilebigwig():
2215
2216
    import pkg_resources
2217
    import os
2218
    from janggu.data import Cover
2219
2220
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2221
    roi = os.path.join(data_path, "sample_fulltile.bed")
2222
    bwfile = os.path.join(data_path, "sample.bw")
2223
2224
    cover1 = Cover.create_from_bigwig(
2225
        'test',
2226
        bigwigfiles=bwfile,
2227
        roi=roi,
2228
        store_whole_genome=True)
2229
    cover2 = Cover.create_from_bigwig(
2230
        'test2',
2231
        bigwigfiles=bwfile,
2232
        roi=roi,
2233
        store_whole_genome=False)
2234
2235
    assert len(cover1) == 2
2236
    assert len(cover2) == len(cover1)
2237
    assert cover1.shape == (2, 30000, 1, 1)
2238
    assert cover1.shape == cover2.shape
2239
    np.testing.assert_equal(cover1[:], cover2[:])
2240
2241
    cover1 = Cover.create_from_bigwig(
2242
        'test',
2243
        bigwigfiles=bwfile,
2244
        roi=roi, binsize=200,
2245
        store_whole_genome=True)
2246
    cover2 = Cover.create_from_bigwig(
2247
        'test2',
2248
        bigwigfiles=bwfile,
2249
        roi=roi, binsize=200,
2250
        store_whole_genome=False)
2251
2252
    assert len(cover1) == 300
2253
    assert len(cover2) == len(cover1)
2254
    assert cover1.shape == (300, 200, 1, 1)
2255
    assert cover1.shape == cover2.shape
2256
    np.testing.assert_equal(cover1[:], cover2[:])
2257
2258
    cover1 = Cover.create_from_bigwig(
2259
        'test',
2260
        bigwigfiles=bwfile,
2261
        roi=roi, binsize=200,
2262
        flank=150,
2263
        store_whole_genome=True)
2264
    cover2 = Cover.create_from_bigwig(
2265
        'test2',
2266
        bigwigfiles=bwfile,
2267
        roi=roi, binsize=200,
2268
        flank=150,
2269
        store_whole_genome=False)
2270
2271
    assert len(cover1) == 300
2272
    assert len(cover2) == len(cover1)
2273
    assert cover1.shape == (300, 500, 1, 1)
2274
    assert cover1.shape == cover2.shape
2275
    np.testing.assert_equal(cover1[:], cover2[:])
2276
2277
def test_fulltilebigwig2():
2278
2279
    import pkg_resources
2280
    import os
2281
    from janggu.data import Cover
2282
2283
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2284
    roi = os.path.join(data_path, "sample_fulltile2.bed")
2285
    bwfile = os.path.join(data_path, "sample.bw")
2286
2287
    cover1 = Cover.create_from_bigwig(
2288
        'test',
2289
        bigwigfiles=bwfile,
2290
        roi=roi,
2291
        store_whole_genome=True)
2292
    cover2 = Cover.create_from_bigwig(
2293
        'test2',
2294
        bigwigfiles=bwfile,
2295
        roi=roi,
2296
        store_whole_genome=False)
2297
2298
    assert len(cover1) == 3
2299
    assert len(cover2) == len(cover1)
2300
    assert cover1.shape == (3, 30000, 1, 1)
2301
    assert cover1.shape == cover2.shape
2302
    np.testing.assert_equal(cover1[:], cover2[:])
2303
2304
    cover1 = Cover.create_from_bigwig(
2305
        'test',
2306
        bigwigfiles=bwfile,
2307
        roi=roi, binsize=200,
2308
        store_whole_genome=True)
2309
    cover2 = Cover.create_from_bigwig(
2310
        'test2',
2311
        bigwigfiles=bwfile,
2312
        roi=roi, binsize=200,
2313
        store_whole_genome=False)
2314
2315
    assert len(cover1) == 450
2316
    assert len(cover2) == len(cover1)
2317
    assert cover1.shape == (450, 200, 1, 1)
2318
    assert cover1.shape == cover2.shape
2319
    np.testing.assert_equal(cover1[:], cover2[:])
2320
2321
    cover1 = Cover.create_from_bigwig(
2322
        'test',
2323
        bigwigfiles=bwfile,
2324
        roi=roi, binsize=200,
2325
        flank=150,
2326
        store_whole_genome=True)
2327
    cover2 = Cover.create_from_bigwig(
2328
        'test2',
2329
        bigwigfiles=bwfile,
2330
        roi=roi, binsize=200,
2331
        flank=150,
2332
        store_whole_genome=False)
2333
2334
    assert len(cover1) == 450
2335
    assert len(cover2) == len(cover1)
2336
    assert cover1.shape == (450, 500, 1, 1)
2337
    assert cover1.shape == cover2.shape
2338
    np.testing.assert_equal(cover1[:], cover2[:])
2339
2340
2341
def test_fulltilebam():
2342
2343
    import pkg_resources
2344
    import os
2345
    from janggu.data import Cover
2346
2347
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2348
    roi = os.path.join(data_path, "sample_fulltile.bed")
2349
    bwfile = os.path.join(data_path, "sample.bam")
2350
2351
    cover1 = Cover.create_from_bam(
2352
        'test',
2353
        bamfiles=bwfile,
2354
        roi=roi,
2355
        stranded=False,
2356
        store_whole_genome=True)
2357
    cover2 = Cover.create_from_bam(
2358
        'test2',
2359
        bamfiles=bwfile,
2360
        stranded=False,
2361
        roi=roi,
2362
        store_whole_genome=False)
2363
2364
    assert len(cover1) == 2
2365
    assert len(cover2) == len(cover1)
2366
    assert cover1.shape == (2, 30000, 1, 1)
2367
    assert cover1.shape == cover2.shape
2368
    np.testing.assert_equal(cover1[:], cover2[:])
2369
2370
    cover1 = Cover.create_from_bam(
2371
        'test',
2372
        bamfiles=bwfile,
2373
        roi=roi, binsize=200,
2374
        stranded=False,
2375
        store_whole_genome=True)
2376
    cover2 = Cover.create_from_bam(
2377
        'test2',
2378
        bamfiles=bwfile,
2379
        roi=roi, binsize=200,
2380
        stranded=False,
2381
        store_whole_genome=False)
2382
2383
    assert len(cover1) == 300
2384
    assert len(cover2) == len(cover1)
2385
    assert cover1.shape == (300, 200, 1, 1)
2386
    assert cover1.shape == cover2.shape
2387
    np.testing.assert_equal(cover1[:], cover2[:])
2388
2389
    cover1 = Cover.create_from_bam(
2390
        'test',
2391
        bamfiles=bwfile,
2392
        roi=roi, binsize=200,
2393
        flank=150,
2394
        stranded=False,
2395
        store_whole_genome=True)
2396
    cover2 = Cover.create_from_bam(
2397
        'test2',
2398
        bamfiles=bwfile,
2399
        roi=roi, binsize=200,
2400
        flank=150,
2401
        stranded=False,
2402
        store_whole_genome=False)
2403
2404
    assert len(cover1) == 300
2405
    assert len(cover2) == len(cover1)
2406
    assert cover1.shape == (300, 500, 1, 1)
2407
    assert cover1.shape == cover2.shape
2408
    np.testing.assert_equal(cover1[:], cover2[:])
2409
2410
def test_fulltilebam2():
2411
2412
    import pkg_resources
2413
    import os
2414
    from janggu.data import Cover
2415
2416
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2417
    roi = os.path.join(data_path, "sample_fulltile2.bed")
2418
    bwfile = os.path.join(data_path, "sample.bam")
2419
2420
    cover1 = Cover.create_from_bam(
2421
        'test',
2422
        bamfiles=bwfile,
2423
        roi=roi,
2424
        stranded=False,
2425
        store_whole_genome=True)
2426
    cover2 = Cover.create_from_bam(
2427
        'test2',
2428
        bamfiles=bwfile,
2429
        roi=roi,
2430
        stranded=False,
2431
        store_whole_genome=False)
2432
2433
    assert len(cover1) == 3
2434
    assert len(cover2) == len(cover1)
2435
    assert cover1.shape == (3, 30000, 1, 1)
2436
    assert cover1.shape == cover2.shape
2437
    np.testing.assert_equal(cover1[:], cover2[:])
2438
2439
    cover1 = Cover.create_from_bam(
2440
        'test',
2441
        bamfiles=bwfile,
2442
        roi=roi, binsize=200,
2443
        stranded=False,
2444
        store_whole_genome=True)
2445
    cover2 = Cover.create_from_bam(
2446
        'test2',
2447
        bamfiles=bwfile,
2448
        roi=roi, binsize=200,
2449
        stranded=False,
2450
        store_whole_genome=False)
2451
2452
    assert len(cover1) == 450
2453
    assert len(cover2) == len(cover1)
2454
    assert cover1.shape == (450, 200, 1, 1)
2455
    assert cover1.shape == cover2.shape
2456
    np.testing.assert_equal(cover1[:], cover2[:])
2457
2458
    cover1 = Cover.create_from_bam(
2459
        'test',
2460
        bamfiles=bwfile,
2461
        roi=roi, binsize=200,
2462
        flank=150,
2463
        stranded=False,
2464
        store_whole_genome=True)
2465
    cover2 = Cover.create_from_bam(
2466
        'test2',
2467
        bamfiles=bwfile,
2468
        roi=roi, binsize=200,
2469
        flank=150,
2470
        stranded=False,
2471
        store_whole_genome=False)
2472
2473
    assert len(cover1) == 450
2474
    assert len(cover2) == len(cover1)
2475
    assert cover1.shape == (450, 500, 1, 1)
2476
    assert cover1.shape == cover2.shape
2477
    np.testing.assert_equal(cover1[:], cover2[:])
2478
2479
2480
def test_fulltilebed():
2481
2482
    import pkg_resources
2483
    import os
2484
    from janggu.data import Cover
2485
2486
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2487
    roi = os.path.join(data_path, "sample_fulltile.bed")
2488
    bwfile = os.path.join(data_path, "sample.bed")
2489
2490
    cover1 = Cover.create_from_bed(
2491
        'test',
2492
        bedfiles=bwfile,
2493
        roi=roi,
2494
        store_whole_genome=True)
2495
    cover2 = Cover.create_from_bed(
2496
        'test2',
2497
        bedfiles=bwfile,
2498
        roi=roi,
2499
        store_whole_genome=False)
2500
2501
    assert len(cover1) == 2
2502
    assert len(cover2) == len(cover1)
2503
    assert cover1.shape == (2, 30000, 1, 1)
2504
    assert cover1.shape == cover2.shape
2505
    np.testing.assert_equal(cover1[:], cover2[:])
2506
2507
    cover1 = Cover.create_from_bed(
2508
        'test',
2509
        bedfiles=bwfile,
2510
        roi=roi, binsize=200,
2511
        store_whole_genome=True)
2512
    cover2 = Cover.create_from_bed(
2513
        'test2',
2514
        bedfiles=bwfile,
2515
        roi=roi, binsize=200,
2516
        store_whole_genome=False)
2517
2518
    assert len(cover1) == 300
2519
    assert len(cover2) == len(cover1)
2520
    assert cover1.shape == (300, 200, 1, 1)
2521
    assert cover1.shape == cover2.shape
2522
    np.testing.assert_equal(cover1[:], cover2[:])
2523
2524
    cover1 = Cover.create_from_bed(
2525
        'test',
2526
        bedfiles=bwfile,
2527
        roi=roi, binsize=200,
2528
        flank=150,
2529
        store_whole_genome=True)
2530
    cover2 = Cover.create_from_bed(
2531
        'test2',
2532
        bedfiles=bwfile,
2533
        roi=roi, binsize=200,
2534
        flank=150,
2535
        store_whole_genome=False)
2536
2537
    assert len(cover1) == 300
2538
    assert len(cover2) == len(cover1)
2539
    assert cover1.shape == (300, 500, 1, 1)
2540
    assert cover1.shape == cover2.shape
2541
    np.testing.assert_equal(cover1[:], cover2[:])
2542
2543
def test_fulltilebed2():
2544
2545
    import pkg_resources
2546
    import os
2547
    from janggu.data import Cover
2548
2549
    data_path = pkg_resources.resource_filename('janggu', 'resources/')
2550
    roi = os.path.join(data_path, "sample_fulltile2.bed")
2551
    bwfile = os.path.join(data_path, "sample.bed")
2552
2553
    cover1 = Cover.create_from_bed(
2554
        'test',
2555
        bedfiles=bwfile,
2556
        roi=roi,
2557
        store_whole_genome=True)
2558
    cover2 = Cover.create_from_bed(
2559
        'test2',
2560
        bedfiles=bwfile,
2561
        roi=roi,
2562
        store_whole_genome=False)
2563
2564
    assert len(cover1) == 3
2565
    assert len(cover2) == len(cover1)
2566
    assert cover1.shape == (3, 30000, 1, 1)
2567
    assert cover1.shape == cover2.shape
2568
    np.testing.assert_equal(cover1[:], cover2[:])
2569
2570
    cover1 = Cover.create_from_bed(
2571
        'test',
2572
        bedfiles=bwfile,
2573
        roi=roi, binsize=200,
2574
        store_whole_genome=True)
2575
    cover2 = Cover.create_from_bed(
2576
        'test2',
2577
        bedfiles=bwfile,
2578
        roi=roi, binsize=200,
2579
        store_whole_genome=False)
2580
2581
    assert len(cover1) == 450
2582
    assert len(cover2) == len(cover1)
2583
    assert cover1.shape == (450, 200, 1, 1)
2584
    assert cover1.shape == cover2.shape
2585
    np.testing.assert_equal(cover1[:], cover2[:])
2586
2587
    cover1 = Cover.create_from_bed(
2588
        'test',
2589
        bedfiles=bwfile,
2590
        roi=roi, binsize=200,
2591
        flank=150,
2592
        store_whole_genome=True)
2593
    cover2 = Cover.create_from_bed(
2594
        'test2',
2595
        bedfiles=bwfile,
2596
        roi=roi, binsize=200,
2597
        flank=150,
2598
        store_whole_genome=False)
2599
2600
    assert len(cover1) == 450
2601
    assert len(cover2) == len(cover1)
2602
    assert cover1.shape == (450, 500, 1, 1)
2603
    assert cover1.shape == cover2.shape
2604
    np.testing.assert_equal(cover1[:], cover2[:])
2605