--- a +++ b/tests/testthat/test-preprocess.R @@ -0,0 +1,81 @@ +context("preprocess") + +test_that("Check preprocessing", { + + #testthat::skip_if_not_installed("tensorflow") + testthat::skip_if_not(reticulate::py_module_available("tensorflow")) + + z <- seq_encoding_lm(sequence = c(1,0,5,1,3,4,3,1,4,1,2), + maxlen = 5, + vocabulary = c("a", "c", "g", "t"), + start_ind = c(1,3), + ambiguous_nuc = "equal", + target_len = 1, + output_format = "target_right") + + x <- z[[1]] + y <- z[[2]] + + expect_equivalent(x[1,1,], c(1,0,0,0)) + expect_equivalent(x[1,2,], c(0,0,0,0)) + expect_equivalent(x[1,3,], rep(0.25, 4)) + expect_equivalent(x[1,4,], c(1,0,0,0)) + expect_equivalent(x[1,5,], c(0,0,1,0)) + expect_equivalent(y[1,], c(0,0,0,1)) + + expect_equivalent(x[2,1,], rep(0.25, 4)) + expect_equivalent(x[2,2,], c(1,0,0,0)) + expect_equivalent(x[2,3,], c(0,0,1,0)) + expect_equivalent(x[2,4,], c(0,0,0,1)) + expect_equivalent(x[2,5,], c(0,0,1,0)) + expect_equivalent(y[2,], c(1,0,0,0)) + + # use character string as input + z <- seq_encoding_lm(sequence = NULL, + maxlen = 5, + vocabulary = c("a", "c", "g", "t"), + start_ind = c(1,3), + ambiguous_nuc = "zero", + target_len = 1, + output_format = "target_right", + char_sequence = "ACTaaTNTNaZ") + + + x <- z[[1]] + y <- z[[2]] + + expect_equivalent(apply(x[1,,], 1, which.max), c(1,2,4,1,1)) + expect_equivalent(apply(x[2,,], 1, which.max), c(4,1,1,4,1)) + expect_equivalent(y[1,], c(0,0,0,1)) + expect_equivalent(y[2,], c(0,0,0,1)) + + x <- seq_encoding_label(sequence = c(1,0,5,1,3,4,3,1,4,1,2), + maxlen = 5, + vocabulary = c("a", "c", "g", "t"), + start_ind = c(1,3), + ambiguous_nuc = "equal") + + expect_equivalent(x[1,1,], c(1,0,0,0)) + expect_equivalent(x[1,2,], c(0,0,0,0)) + expect_equivalent(x[1,3,], rep(0.25, 4)) + expect_equivalent(x[1,4,], c(1,0,0,0)) + expect_equivalent(x[1,5,], c(0,0,1,0)) + + expect_equivalent(x[2,1,], rep(0.25, 4)) + expect_equivalent(x[2,2,], c(1,0,0,0)) + expect_equivalent(x[2,3,], c(0,0,1,0)) + expect_equivalent(x[2,4,], c(0,0,0,1)) + expect_equivalent(x[2,5,], c(0,0,1,0)) + + # use character string as input + x <- seq_encoding_label(maxlen = 5, + vocabulary = c("a", "c", "g", "t"), + start_ind = c(1,3), + ambiguous_nuc = "equal", + char_sequence = "ACTaaTNTNaZ") + + expect_equivalent(apply(x[1,,], 1, which.max), c(1,2,4,1,1)) + expect_equivalent(apply(x[2,,], 1, which.max), c(4,1,1,4,1)) + expect_equivalent(x[2,5,], rep(0.25, 4)) + +})