[0473b3]: / tests / test_data.py

Download this file

112 lines (76 with data), 3.8 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
import os
import matplotlib
import pkg_resources
import pytest
from janggu.data import Bioseq
from janggu.data import split_train_test
from janggu.data import subset
from janggu.data import view
from janggu.data.data import _data_props
matplotlib.use('AGG')
def test_dna_props_extraction(tmpdir):
os.environ['JANGGU_OUTPUT'] = tmpdir.strpath
data_path = pkg_resources.resource_filename('janggu', 'resources/')
bed_file = os.path.join(data_path, 'sample.bed')
refgenome = os.path.join(data_path, 'sample_genome.fa')
dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
storage='ndarray',
roi=bed_file,
binsize=200, stepsize=200,
order=1)
props = _data_props(dna)
assert 'dna' in props
assert props['dna']['shape'] == (200, 1, 4)
with pytest.raises(Exception):
_data_props((0,))
def test_split_train_test():
data_path = pkg_resources.resource_filename('janggu', 'resources/')
bed_file = os.path.join(data_path, 'sample.bed')
refgenome = os.path.join(data_path, 'sample_genome.fa')
dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
storage='ndarray',
roi=bed_file,
binsize=200, stepsize=200,
order=1, store_whole_genome=True)
traindna, testdna = split_train_test(dna, holdout_chroms='chr2')
assert len(traindna) == 50
assert len(testdna) == 50
assert len(dna) == len(traindna) + len(testdna)
traindna, testdna = split_train_test([dna, dna], holdout_chroms='chr2')
assert len(traindna[0]) == 50
assert len(testdna[0]) == 50
assert len(dna) == len(traindna[0]) + len(testdna[0])
def test_subset_include_chrname_test():
data_path = pkg_resources.resource_filename('janggu', 'resources/')
bed_file = os.path.join(data_path, 'sample.bed')
refgenome = os.path.join(data_path, 'sample_genome.fa')
dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
storage='ndarray',
roi=bed_file,
binsize=200, stepsize=200,
order=1, store_whole_genome=True)
subdna = subset(dna, include_regions='chr2')
assert len(subdna) == 50
def test_subset_exclude_chrname_test():
data_path = pkg_resources.resource_filename('janggu', 'resources/')
bed_file = os.path.join(data_path, 'sample.bed')
refgenome = os.path.join(data_path, 'sample_genome.fa')
dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
storage='ndarray',
roi=bed_file,
binsize=200, stepsize=200,
order=1, store_whole_genome=True)
subdna = subset(dna, exclude_regions='chr2')
assert len(subdna) == 50
def test_view_bed_test():
data_path = pkg_resources.resource_filename('janggu', 'resources/')
bed_file = os.path.join(data_path, 'sample.bed')
bedsub_file = os.path.join(data_path, 'scored_sample.bed')
refgenome = os.path.join(data_path, 'sample_genome.fa')
dna = Bioseq.create_from_refgenome('dna', refgenome=refgenome,
storage='ndarray',
roi=bed_file,
binsize=200, stepsize=200,
order=1, store_whole_genome=True)
subdna = view(dna, use_regions=bedsub_file)
assert len(subdna) == 4