Diff of /scripts/count_ashm_freq.R [000000] .. [fcb5af]

Switch to unified view

a b/scripts/count_ashm_freq.R
1
library(GAMBLR)
2
library(tidyverse)
3
4
setwd("/projects/rmorin/projects/gambl-repos/gambl-kdreval/")
5
6
pathologies <- c("DLBCL", "FL", "CLL", "BL")
7
8
9
metadata <- get_gambl_metadata(seq_type_filter = "genome") %>%
10
    filter(pathology %in% pathologies)
11
12
generate_table <- function(
13
    this_meta,
14
    projection = "grch37"
15
){
16
    maf <- get_ssm_by_samples(
17
        these_samples_metadata = this_meta,
18
        projection = projection,
19
        subset_from_merge = TRUE
20
    )
21
22
    if(projection == "grch37"){
23
        regions <- grch37_ashm_regions %>%
24
            mutate(
25
                chr_name = gsub("chr", "", chr_name)
26
            )
27
    }else{
28
        regions <- hg38_ashm_regions
29
    }
30
31
    annotated <- cool_overlaps(
32
        maf,
33
        regions %>%
34
            mutate(
35
                name = paste(gene, region, sep = "_")
36
            ),
37
        columns2 = colnames(regions)[1:3]
38
    )
39
40
    mutated_counts <- annotated %>%
41
        count(
42
            gene, name, Tumor_Sample_Barcode
43
        )
44
45
    pathology_counts <- metadata %>%
46
        select(Tumor_Sample_Barcode, pathology) %>%
47
        group_by(pathology) %>%
48
        mutate(total = n()) %>%
49
        ungroup
50
51
    all_counts <- left_join(
52
        mutated_counts,
53
        pathology_counts
54
    )
55
56
    all_counts <- all_counts %>%
57
        group_by(pathology, name) %>%
58
        mutate(mut = n()) %>%
59
        ungroup %>%
60
        select(-c(Tumor_Sample_Barcode, n)) %>%
61
        distinct
62
63
    all_counts <- all_counts %>%
64
        mutate(pc = round((mut/total)*100, 2)) %>%
65
        pivot_wider(
66
            names_from = pathology,
67
            names_glue = "{pathology}_{.value}",
68
            values_from = c(total, mut, pc)
69
        ) %>% select(-gene)
70
71
    output <- left_join(
72
        regions %>%
73
            mutate(
74
                name = paste(gene, region, sep = "_")
75
            ) %>%
76
            select(all_of(c("name", colnames(regions)[1:3]))),
77
        all_counts
78
    ) %>%
79
        replace_na(
80
            list(
81
                DLBCL_total = pull(unique(pathology_counts[pathology_counts$pathology=="DLBCL", "total"])),
82
                FL_total = pull(unique(pathology_counts[pathology_counts$pathology=="FL", "total"])),
83
                CLL_total = pull(unique(pathology_counts[pathology_counts$pathology=="CLL", "total"])),
84
                BL_total = pull(unique(pathology_counts[pathology_counts$pathology=="BL", "total"]))
85
            )
86
        ) %>%
87
        replace(is.na(.), 0)
88
89
    output <- output %>%
90
        select(
91
            name,
92
            colnames(regions)[1:3],
93
            contains("DLBCL"),
94
            contains("FL"),
95
            contains("CLL"),
96
            contains("BL")
97
        )
98
}
99
100
grch37_table <- generate_table(
101
    this_meta = metadata
102
)
103
104
hg38_table <- generate_table(
105
    this_meta = metadata,
106
    projection = "hg38"
107
)
108
109
write_tsv(
110
    grch37_table,
111
    "~/my_dir/repos/LLMPP/resources/curated/somatic_hypermutation_locations_with_DLBCL_frequencies_grch37.tsv"
112
)
113
114
write_tsv(
115
    hg38_table,
116
    "~/my_dir/repos/LLMPP/resources/curated/somatic_hypermutation_locations_with_DLBCL_frequencies_hg38.tsv"
117
)