Diff of /R/amaretto_initialize.R [000000] .. [2ab972]

Switch to unified view

a b/R/amaretto_initialize.R
1
#' CreateRegulatorData
2
#'
3
#' Determine potential regulator genes.
4
#' @return result
5
#' @keywords internal
6
CreateRegulatorData <- function(MA_matrix = MA_matrix, CNV_matrix = NULL, MET_matrix = NULL, Driver_list = NULL, PvalueThreshold = 0.001, RsquareThreshold = 0.1, method = "union") {
7
    if (is.null(Driver_list)) 
8
        DriversList <- NULL
9
    if (is.null(CNV_matrix)) 
10
        CNV_matrix <- matrix(0, nrow = 0, ncol = 0)
11
    if (nrow(CNV_matrix) > 1) {
12
        GeneVariances = rowVars(CNV_matrix)
13
        CNV_matrix = CNV_matrix[GeneVariances >= 1e-04,]
14
    }
15
    if (nrow(CNV_matrix) > 0) {
16
        CNV_matrix = FindTranscriptionallyPredictive_CNV(MA_matrix, CNV_matrix, PvalueThreshold = PvalueThreshold, RsquareThreshold = RsquareThreshold)
17
    }
18
    cat("\tFound", length(rownames(CNV_matrix)), "CNV driver genes.\n")
19
    if (is.null(MET_matrix)) 
20
        MET_matrix <- matrix(0, nrow = 0, ncol = 0)
21
    if (nrow(MET_matrix) > 1) {
22
        METcounts = apply(MET_matrix, 1, function(x) length(unique(x)))
23
        MET_matrix = MET_matrix[METcounts == 2, ]
24
        GeneVariances = rowVars(MET_matrix)
25
        MET_matrix = MET_matrix[GeneVariances >= 1e-04,]
26
    }
27
    MET_drivers <- intersect(rownames(MET_matrix),rownames(MA_matrix))
28
    cat("\tFound", length(MET_drivers), "MethylMix driver genes.\n")
29
    if (!is.null(Driver_list)) {
30
        DriversList <- Driver_list
31
        DriversList <- intersect(DriversList, rownames(MA_matrix))
32
        cat("\tFound", length(DriversList), "driver genes from the input list.\n")
33
    }
34
    if (is.null(Driver_list)) 
35
        Drivers <- unique(c(rownames(CNV_matrix), MET_drivers,DriversList))
36
    dataset_drivers <- unique(c(rownames(CNV_matrix),MET_drivers))
37
    if (!is.null(Driver_list) & method == "union") 
38
        Drivers <- union(dataset_drivers, DriversList)
39
    if (!is.null(Driver_list) & method == "intersect") 
40
        Drivers <- intersect(dataset_drivers, DriversList)
41
    cat("\tFound a total of", length(Drivers), "unique drivers with your selected method.\n")
42
    if (length(Drivers) == 0) {
43
        cat("AMARETTO doesn't find any driver genes.")
44
        return("No driver")
45
    } else {
46
        if (length(Drivers) == 1) {
47
            RegulatorData_temp <- matrix(0, 1, ncol(MA_matrix))
48
            colnames(RegulatorData_temp) <- colnames(MA_matrix)
49
            rownames(RegulatorData_temp) <- Drivers
50
            RegulatorData_temp[1, ] <- MA_matrix[Drivers,]
51
            RegulatorData <- RegulatorData_temp
52
        } else {
53
            RegulatorData = MA_matrix[Drivers, ]
54
        }
55
        RegulatorData = t(scale(t(RegulatorData)))
56
        MET_aberrations <- matrix(0, ncol = 3, nrow = length(MET_drivers))
57
        rownames(MET_aberrations) <- MET_drivers
58
        colnames(MET_aberrations)<-c("Hyper-methylated","Hypo-methylated","No_change")
59
        if (length(MET_drivers) > 0) {
60
            MET_aberrations[, "Hyper-methylated"] <- rowSums(MET_matrix[MET_drivers,] > 0)/ncol(MET_matrix)
61
            MET_aberrations[, "Hypo-methylated"] <- rowSums(MET_matrix[MET_drivers,] < 0)/ncol(MET_matrix)
62
            MET_aberrations[, "No_change"] <- rowSums(MET_matrix[MET_drivers,] == 0)/ncol(MET_matrix)
63
        }
64
        CNV_alterations <- matrix(0, ncol = 3, nrow = nrow(CNV_matrix))
65
        rownames(CNV_alterations) <- rownames(CNV_matrix)
66
        colnames(CNV_alterations)<-c("Amplification","Deletion","No_change")
67
        if (nrow(CNV_alterations) > 0) {
68
            CNV_alterations[, "Amplification"] <- rowSums(CNV_matrix > 0)/ncol(CNV_matrix)
69
            CNV_alterations[, "Deletion"] <- rowSums(CNV_matrix < 0)/ncol(CNV_matrix)
70
            CNV_alterations[, "No_change"] <- rowSums(CNV_matrix == 0)/ncol(CNV_matrix)
71
        }
72
        driverList_alterations <- matrix(1, ncol = 1, nrow = length(DriversList))
73
        rownames(driverList_alterations) <- DriversList
74
        Alterations <- matrix(0, nrow = length(Drivers), ncol = 3)
75
        colnames(Alterations) <- c("CNV", "MET", "Driver List")
76
        rownames(Alterations) <- Drivers
77
        Alterations[which(Drivers %in% rownames(CNV_matrix)), 1] <- rep(1, length(which(Drivers %in% rownames(CNV_matrix))))
78
        Alterations[which(Drivers %in% rownames(MET_matrix)), 2] <- rep(1, length(which(Drivers %in% rownames(MET_matrix))))
79
        Alterations[which(Drivers %in% rownames(driverList_alterations)), 3] <- rep(1, length(which(Drivers %in% rownames(driverList_alterations))))
80
        Alterations <- list(MET = MET_aberrations, 
81
                            CNV = CNV_alterations,
82
                            Driver_list = driverList_alterations, 
83
                            Summary = Alterations)
84
        return(list(RegulatorData = RegulatorData, Alterations = Alterations))
85
    }
86
}
87
88
#' FindTranscriptionallyPredictive_CNV
89
#'
90
#' Function to identify which genes CNV significantly predict expression of that gene.
91
#' @return result
92
#' @keywords internal
93
FindTranscriptionallyPredictive_CNV <- function(MA_matrix, 
94
    CNV_matrix, PvalueThreshold = 0.001, RsquareThreshold = 0.1) {
95
    OverlapGenes = Reduce(intersect, list(rownames(MA_matrix), 
96
        rownames(CNV_matrix)))
97
    OverlapSamples = Reduce(intersect, list(colnames(MA_matrix), 
98
        colnames(CNV_matrix)))
99
    if (length(OverlapGenes) == 1) {
100
        CNV_TCGA_temp = MA_matrix_temp <- matrix(0, 
101
            length(OverlapGenes), length(OverlapSamples))
102
        rownames(CNV_TCGA_temp) = rownames(MA_matrix_temp) <- OverlapGenes
103
        colnames(CNV_TCGA_temp) = colnames(MA_matrix_temp) <- OverlapSamples
104
        CNV_TCGA_temp[1, ] <- CNV_matrix[OverlapGenes, 
105
            OverlapSamples]
106
        MA_matrix_temp[1, ] <- MA_matrix[OverlapGenes, 
107
            OverlapSamples]
108
        CNV_matrix = CNV_TCGA_temp
109
        MA_matrix = MA_matrix_temp
110
    } else {
111
        CNV_matrix = CNV_matrix[OverlapGenes, OverlapSamples]
112
        MA_matrix = MA_matrix[OverlapGenes, OverlapSamples]
113
    }
114
    if (length(OverlapGenes) > 0 && length(OverlapSamples) > 
115
        0) {
116
        CNVdrivers = c()
117
        for (i in 1:length(rownames(CNV_matrix))) {
118
            res = lm(MA_matrix[i, ] ~ CNV_matrix[i, 
119
                ])
120
            res.summary = summary(res)
121
            if (res$coefficients[2] > 0 & res.summary$coefficients[2, 
122
                4] < PvalueThreshold & res.summary$r.squared > 
123
                RsquareThreshold) {
124
                CNVdrivers = c(CNVdrivers, rownames(CNV_matrix)[i])
125
            }
126
        }
127
        if (length(CNVdrivers) == 1) {
128
            CNV_matrix_temp <- matrix(0, 1, ncol(CNV_matrix))
129
            colnames(CNV_matrix_temp) = colnames(CNV_matrix)
130
            rownames(CNV_matrix_temp) = CNVdrivers
131
            CNV_matrix_temp[1, ] <- CNV_matrix[CNVdrivers, 
132
                ]
133
            CNV_matrix = CNV_matrix_temp
134
        } else {
135
            CNV_matrix = CNV_matrix[CNVdrivers, ]
136
        }
137
    }
138
    return(CNV_matrix = CNV_matrix)
139
}
140
141
#' geneFiltering
142
#'
143
#' Function to filter gene expression matrix
144
#' @return result
145
#' @keywords internal
146
geneFiltering <- function(Type, MAdata, Percentage) {
147
    switch(Type, Variance = {
148
        GeneVariances = rowVars(MAdata)
149
        tmpResult = sort(GeneVariances, decreasing = TRUE)
150
        SortedGenes = names(tmpResult)
151
        tmpNrGenes = round(length(rownames(MAdata)) * 
152
            Percentage/100)
153
        MAdata_Filtered = MAdata[SortedGenes[1:tmpNrGenes], 
154
            ]
155
    }, MAD = {
156
        GeneVariances = rowMads(MAdata)
157
        names(GeneVariances) = rownames(MAdata)
158
        tmpResult = sort(GeneVariances, decreasing = TRUE)
159
        SortedGenes = names(tmpResult)
160
        tmpNrGenes = round(length(rownames(MAdata)) * 
161
            Percentage/100)
162
        MAdata_Filtered = MAdata[SortedGenes[1:tmpNrGenes], 
163
            ]
164
    })
165
    return(MAdata_Filtered)
166
}
167
168
#' read_gct
169
#' 
170
#' Function to turn a .gct data files into a matrix format
171
#'
172
#' @param file_address Address of the input gct file. 
173
#'
174
#' @importFrom utils read.delim
175
#' @return result
176
#' @export
177
#' @examples
178
#' data_matrix<-read_gct(file_address="")
179
read_gct <- function(file_address){
180
  if(file_address==""){
181
    print("No gct file address is provided.")
182
    return(NULL)
183
  }
184
  else{
185
    data_fr <-read.delim(file_address, skip=2, sep="\t", header=TRUE, row.names=1)
186
    data_fr <- as.matrix(subset(data_fr, select=-c(Description)))
187
    return(data_fr)
188
  }
189
}
190
191
#' printf
192
#'
193
#' Wrapper function for C-style formatted output.
194
#' @return result
195
#' @keywords internal
196
printf <- function(...) {
197
    cat(sprintf(...))
198
}