--- a +++ b/deepvariant/pileup_image_native_test.cc @@ -0,0 +1,260 @@ +/* + * Copyright 2024 Google LLC. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its + * contributors may be used to endorse or promote products derived from this + * software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include "deepvariant/pileup_image_native.h" + +#include <memory> +#include <string> +#include <vector> + +#include "deepvariant/protos/deepvariant.pb.h" +#include "deepvariant/testing_utils.h" +#include <gmock/gmock-generated-matchers.h> +#include <gmock/gmock-matchers.h> +#include <gmock/gmock-more-matchers.h> + +#include "tensorflow/core/platform/test.h" +#include "absl/log/log.h" +#include "third_party/nucleus/protos/reads.pb.h" + +namespace learning { +namespace genomics { +namespace deepvariant { + +using Read = nucleus::genomics::v1::Read; + +struct BuildPileupForOneSampleTestData { + std::string test_name; + DeepVariantCall dv_call; + std::string ref_bases; + std::vector<Read> reads; + int image_start_pos; + std::vector<std::string> alt_alleles; + std::vector<ImageRow> expected_image_rows; +}; + +class BuildPileupForOneSampleTest + : public testing::TestWithParam<BuildPileupForOneSampleTestData> {}; + +TEST_P(BuildPileupForOneSampleTest, BuildPileupForOneSampleTestCases) { + const BuildPileupForOneSampleTestData& param = GetParam(); + PileupImageOptions pileup_image_options = MakeDefaultPileupImageOptions( + /*width=*/ 11, /*height=*/ 4, /*ref_band_height=*/ 1); + auto channels = pileup_image_options.mutable_channels(); + channels->Add("read_base"); + channels->Add("base_quality"); + channels->Add("mapping_quality"); + pileup_image_options.set_num_channels(channels->size()); + + std::vector<const Read*> read_ptrs; + for (const Read& read : param.reads) { + read_ptrs.push_back(&read); + } + + std::vector<std::unique_ptr<ImageRow>> image_rows = + PileupImageEncoderNative(pileup_image_options).BuildPileupForOneSample( + param.dv_call, + param.ref_bases, + read_ptrs, + param.image_start_pos, + param.alt_alleles, + SampleOptions(), + 0.0, nullptr, {}); + // Convert to a vector of ImageRow to make it easier to compare with the + // expected image rows. + std::vector<ImageRow> image_rows_vec; + for (const std::unique_ptr<ImageRow>& image_row : image_rows) { + image_rows_vec.push_back(*image_row); + } + + // Output image rows in human readable format. This is useful for debugging + // failed tests. + for (const ImageRow& image_row : image_rows_vec) { + for (int channel = 0; channel < image_row.num_channels; ++channel) { + std::string channel_row; + for (int pos = 0; pos < image_row.width; ++pos) { + channel_row += ", " + + std::to_string(image_row.channel_data[channel][pos]); + } + LOG(INFO) << channel_row; + } + } + + EXPECT_THAT(image_rows_vec, + testing::UnorderedElementsAreArray(param.expected_image_rows)); +}; + +// For all test cases the pileup image options are the same. Width is 11, height +// is 4, ref_band_height is 1. +// Tests compare the image rows in the same order as in the expected_image_rows. +// expected image rows are created using MakeImageRow, the first row is the +// reference band, the rest are the main part of the image. +INSTANTIATE_TEST_SUITE_P( + BuildPileupForOneSampleTests, BuildPileupForOneSampleTest, + testing::ValuesIn(std::vector<BuildPileupForOneSampleTestData>({ + { + .test_name = "simple_case", + .dv_call = MakeDeepVariantCall(MakeVariant("A", {"G"}, 5)), + .ref_bases = "ACGTACTCCCA", + .reads = { + MakeRead("chr1", 0, "ACGTGCTCCCA", {"11M"}, "read_2"), + MakeRead("chr1", 0, "ACGTGCTCCCA", {"11M"}, "read_3"), + }, + .image_start_pos = 0, + .alt_alleles = {"G"}, + .expected_image_rows = { + *MakeImageRow({ + {250, 30, 180, 100, 250, 30, 100, 30, 30, 30, 250}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 180, 30, 100, 30, 30, 30, 250}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 180, 30, 100, 30, 30, 30, 250}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 190}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + 11, 3).get(), + } + } + , { + .test_name = "no_reads", + .dv_call = MakeDeepVariantCall(MakeVariant("A", {"G"}, 5)), + .ref_bases = "ACGTACTCCCA", + .reads = {}, + .image_start_pos = 0, + .alt_alleles = {"G"}, + .expected_image_rows = { + *MakeImageRow({ + {250, 30, 180, 100, 250, 30, 100, 30, 30, 30, 250}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + 11, 3).get(), + *MakeImageRow({ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + 11, 3).get(), + *MakeImageRow({ + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}}, + 11, 3).get(), + } + } + , { + .test_name = "numer_of_reads_greater_than_max_reads", + .dv_call = MakeDeepVariantCall(MakeVariant("A", {"AGG"}, 5)), + .ref_bases = "ACGTACTCCCA", + .reads = { + MakeRead("chr1", 0, "ACGTAGGCTCCCA", {"5M", "2I", "5M"}, "read_2"), + MakeRead("chr1", 0, "ACGTAGGCTCCCA", {"5M", "2I", "5M"}, "read_3"), + MakeRead("chr1", 0, "ACGTAGGGCTCCCA", {"5M", "3I", "5M"}, "read_4"), + MakeRead("chr1", 0, "ACGTAGGGCTCCCA", {"5M", "3I", "5M"}, "read_5"), + }, + .image_start_pos = 0, + .alt_alleles = {"G"}, + .expected_image_rows = { + *MakeImageRow({ + {250, 30, 180, 100, 250, 30, 100, 30, 30, 30, 250}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + } + } + , { + .test_name = "image_creation_with_haplotype_sorting", + .dv_call = MakeDeepVariantCall(MakeVariant("A", {"AGG", "AGGG"}, 5)), + .ref_bases = "ACGTACTCCCA", + .reads = { + MakeRead("chr1", 0, "ACGTAGGCTCCCA", {"5M", "2I", "5M"}, "read_2", 2), + MakeRead("chr1", 0, "TCGTAGGCTCCCA", {"5M", "2I", "5M"}, "read_3", 0), + MakeRead("chr1", 0, "CCGTAGGGCTCCCA", {"5M", "3I", "5M"}, "read_4", + 1), + }, + .image_start_pos = 0, + .alt_alleles = {"G"}, + .expected_image_rows = { + *MakeImageRow({ + {250, 30, 180, 100, 250, 30, 100, 30, 30, 30, 250}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 254}}, + 11, 3).get(), + *MakeImageRow({ + {250, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + *MakeImageRow({ + {100, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + *MakeImageRow({ + {30, 30, 180, 100, 0, 30, 100, 30, 30, 30, 0}, + {190, 190, 190, 190, 190, 190, 190, 190, 190, 190, 0}, + {254, 254, 254, 254, 254, 254, 254, 254, 254, 254, 0}}, + 11, 3).get(), + } + } +}))); + +} // namespace deepvariant +} // namespace genomics +} // namespace learning