|
a |
|
b/common_scripts/featurematrix/compute.pairwise.R |
|
|
1 |
regulon.feats=function(fm, genelist, cnv_annot=NULL, filter.val=3, filtertypes=NULL){ |
|
|
2 |
fmname=rownames(fm) |
|
|
3 |
|
|
|
4 |
if(!is.null(filtertypes)){ |
|
|
5 |
fmname=fmname[!substr(fmname, 1,6)%in%filtertypes] |
|
|
6 |
} |
|
|
7 |
feature.gene=suppressWarnings(do.call(rbind, strsplit(fmname, ":"))[,3]) |
|
|
8 |
|
|
|
9 |
if(any(grepl(":", genelist)))genelist[grepl(":", genelist)]=do.call(rbind, strsplit(genelist[grepl(":", genelist)], ":"))[,3] |
|
|
10 |
|
|
|
11 |
if(!is.null(cnv_annot)){ |
|
|
12 |
l.genes=strsplit(cnv_annot[,2], ",") |
|
|
13 |
names(l.genes)=cnv_annot[,1] |
|
|
14 |
st=stack(l.genes) |
|
|
15 |
st=st[st[,1]%in%genelist,] |
|
|
16 |
st[,2]=as.character(st[,2]) |
|
|
17 |
} |
|
|
18 |
|
|
|
19 |
nested.features=function(gene, fmname, feature.gene){ |
|
|
20 |
direct=fmname[feature.gene%in%gene] |
|
|
21 |
if(!is.null(cnv_annot))direct=c(direct, st[st[,1]%in%gene,2]) |
|
|
22 |
|
|
|
23 |
# order this to numeric, then to gexp-cnv-meth-gnab |
|
|
24 |
n=unique(direct) |
|
|
25 |
types=substr(n, 1, 6) |
|
|
26 |
ord=c("N:GEXP","B:GEXP","B:GNAB", "N:CNVR", "B:CNVR","N:METH", "N:MIRN", "N:SAMP", "B:SAMP", "N:CLIN", "B:CLIN", "N:DRUG") |
|
|
27 |
ord=ord[ord%in%types] |
|
|
28 |
|
|
|
29 |
r=unlist(lapply(ord, function(o) n[types%in%o])) |
|
|
30 |
|
|
|
31 |
return(r) |
|
|
32 |
} |
|
|
33 |
feats=lapply(genelist, nested.features, fmname, feature.gene) |
|
|
34 |
|
|
|
35 |
# Filter if not enough data: |
|
|
36 |
m=fm[rownames(fm)%in%unlist(feats),] |
|
|
37 |
s=is.na(m) |
|
|
38 |
m=m[!rowSums(s)==dim(m)[1]&rowSums(!s)>=filter.val, !colSums(s)==dim(m)[2]] |
|
|
39 |
|
|
|
40 |
feats=lapply(feats, function(n)n[n%in%rownames(m)]) |
|
|
41 |
names(feats)=genelist |
|
|
42 |
|
|
|
43 |
feats=feats[!unlist(lapply(feats, length))<1] |
|
|
44 |
|
|
|
45 |
return(feats) |
|
|
46 |
} |
|
|
47 |
|
|
|
48 |
get.feat.within.region=function(INPUT, FEAT){ |
|
|
49 |
|
|
|
50 |
temp=tempfile() |
|
|
51 |
temp2=tempfile() |
|
|
52 |
|
|
|
53 |
# write out bed format temp file: |
|
|
54 |
write.table(INPUT, temp, quote = F, row.names = F, col.names = F, sep="\t") |
|
|
55 |
|
|
|
56 |
# intersect with features |
|
|
57 |
system(paste0("bedtools intersect -a ",FEAT, " -b ",temp, " -wa -wb > ", temp2)) # genes in region |
|
|
58 |
|
|
|
59 |
# read in R |
|
|
60 |
d = read.delim(temp2, header=F, stringsAsFactors = F) |
|
|
61 |
|
|
|
62 |
d[,4]=gsub(":.*.", "", d[,4]) |
|
|
63 |
|
|
|
64 |
d2=d[,c(10, 4)] |
|
|
65 |
|
|
|
66 |
unlink(temp) |
|
|
67 |
unlink(temp2) |
|
|
68 |
|
|
|
69 |
return(d2) |
|
|
70 |
} |
|
|
71 |
|
|
|
72 |
fun.correlation.regulon=function(i, l.regulon.gene, fm, filter.val=3, method.cor="spearman"){ |
|
|
73 |
feats=l.regulon.gene[[i]] |
|
|
74 |
genename=names(l.regulon.gene)[i] |
|
|
75 |
x=data.matrix(t(as.matrix(fm[match(feats,rownames(fm)),,drop=F]))) |
|
|
76 |
class(x) <- "numeric" |
|
|
77 |
|
|
|
78 |
if(dim(x)[2]<2)return(NULL) # this one did not have pairs to test |
|
|
79 |
|
|
|
80 |
# filter if binary has less than n samples |
|
|
81 |
if(any(grepl("^B:", colnames(x)))){ |
|
|
82 |
check=colSums(x[,grepl("^B:", colnames(x)),drop=F], na.rm = T) |
|
|
83 |
x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F] |
|
|
84 |
} |
|
|
85 |
|
|
|
86 |
# filter if numeric has less than n non-zero samples |
|
|
87 |
if(any(grepl("^N:", colnames(x)))){ |
|
|
88 |
check=colSums(x[,grepl("^N:", colnames(x)),drop=F]!=0, na.rm = T) |
|
|
89 |
x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F] |
|
|
90 |
} |
|
|
91 |
|
|
|
92 |
if(dim(x)[2]<2)return(NULL) # this one did not have pairs to test |
|
|
93 |
|
|
|
94 |
cor_rho=cor(x,use = "pairwise.complete.obs", method = method.cor) |
|
|
95 |
cor_pval=cor.test.p.m(x, method = method.cor, use = "pairwise.complete.obs") |
|
|
96 |
results=flat_cor_mat(cor_rho, cor_pval) |
|
|
97 |
|
|
|
98 |
# respect original order: |
|
|
99 |
results=do.call(rbind, lapply(colnames(x), function(n)results[results[,1]%in%n,])) |
|
|
100 |
|
|
|
101 |
rownames(results)=NULL |
|
|
102 |
|
|
|
103 |
# filter pairwise result: |
|
|
104 |
results=results[!duplicated(t(apply(results[,1:2], 1, sort))),] |
|
|
105 |
|
|
|
106 |
# add gene to cnvr: |
|
|
107 |
# results[grepl(":CNVR:", results[,1]),1]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,1]),1]) |
|
|
108 |
# results[grepl(":CNVR:", results[,2]),2]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,2]),2]) |
|
|
109 |
|
|
|
110 |
return(results) |
|
|
111 |
} |
|
|
112 |
|
|
|
113 |
fun.correlation.extrafeatures=function(i, l.regulon.gene, fm, extrafeatures, filter.val=3, method.cor="spearman"){ |
|
|
114 |
feats=l.regulon.gene[[i]] |
|
|
115 |
genename=names(l.regulon.gene)[i] |
|
|
116 |
x=data.matrix(t(as.matrix(fm[match(feats,rownames(fm)),,drop=F]))) |
|
|
117 |
class(x) <- "numeric" |
|
|
118 |
|
|
|
119 |
# filter if binary has less than n samples |
|
|
120 |
if(any(grepl("^B:", colnames(x)))){ |
|
|
121 |
check=colSums(x[,grepl("^B:", colnames(x)),drop=F], na.rm = T) |
|
|
122 |
x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F] |
|
|
123 |
} |
|
|
124 |
|
|
|
125 |
# filter if numeric has less than n non-zero samples |
|
|
126 |
if(any(grepl("^N:", colnames(x)))){ |
|
|
127 |
check=colSums(x[,grepl("^N:", colnames(x)),drop=F]!=0, na.rm = T) |
|
|
128 |
x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F] |
|
|
129 |
} |
|
|
130 |
|
|
|
131 |
# remove if in both lists |
|
|
132 |
extrafeatures=extrafeatures[!extrafeatures%in%feats] |
|
|
133 |
|
|
|
134 |
# Filter if not enough data (less than 3 not NA): |
|
|
135 |
m2=fm[rownames(fm)%in%extrafeatures,] |
|
|
136 |
s=is.na(m2) |
|
|
137 |
m2=m2[!rowSums(s)==dim(m2)[2]&rowSums(!s)>=filter.val, ] |
|
|
138 |
y=data.matrix(t(as.matrix(m2))) |
|
|
139 |
class(y) <- "numeric" |
|
|
140 |
|
|
|
141 |
y=cbind(x,y) |
|
|
142 |
|
|
|
143 |
# filter if binary has less than n 3 samples |
|
|
144 |
if(any(grepl("^B:", colnames(y)))){ |
|
|
145 |
check=colSums(y[,grepl("^B:", colnames(y)),drop=F], na.rm = T) |
|
|
146 |
y=y[,!colnames(y)%in%names(check[check<filter.val])] |
|
|
147 |
} |
|
|
148 |
|
|
|
149 |
# filter if numeric has less than n non-zero samples |
|
|
150 |
if(any(grepl("^N:", colnames(y)))){ |
|
|
151 |
check=colSums(y[,grepl("^N:", colnames(y)),drop=F]!=0, na.rm = T) |
|
|
152 |
y=y[,!colnames(y)%in%names(check[check<filter.val]),drop=F] |
|
|
153 |
} |
|
|
154 |
|
|
|
155 |
if(dim(x)[2]==0|dim(y)[2]==0)return(NULL) # no pairs to test |
|
|
156 |
|
|
|
157 |
cor_rho=cor(x,y,use = "pairwise.complete.obs", method = method.cor) |
|
|
158 |
|
|
|
159 |
# compute cor pval |
|
|
160 |
cor_pval=t(apply(x, 2, function(v1){ |
|
|
161 |
apply(y, 2, function(v2){ |
|
|
162 |
if(sum(complete.cases(cbind(v1, v2)))<filter.val)return(NA) |
|
|
163 |
cor.test.p(v1, v2, method = method.cor, use = "pairwise.complete.obs") |
|
|
164 |
}) |
|
|
165 |
})) |
|
|
166 |
|
|
|
167 |
results=flat_cor_mat(data.matrix(cor_rho), data.matrix(cor_pval)) |
|
|
168 |
|
|
|
169 |
results=do.call(rbind, lapply(colnames(x), function(n)results[results[,1]%in%n,])) |
|
|
170 |
|
|
|
171 |
rownames(results)=NULL |
|
|
172 |
|
|
|
173 |
# remove regulonfeatures, they are coming from another test: |
|
|
174 |
results=results[!(results[,1]%in%colnames(x)&results[,2]%in%colnames(x)),] |
|
|
175 |
|
|
|
176 |
# filter pairwise result: |
|
|
177 |
results=results[!duplicated(t(apply(results[,1:2], 1, sort))),] |
|
|
178 |
|
|
|
179 |
# add gene to cnvr: BUG for wilcox |
|
|
180 |
# results[grepl(":CNVR:", results[,1]),1]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,1]),1]) |
|
|
181 |
|
|
|
182 |
return(results) |
|
|
183 |
} |
|
|
184 |
|
|
|
185 |
|
|
|
186 |
p.adjust.datatype=function(results, adjust.method="BH", log10=F, prettyNumbers=T, orderdata=T){ |
|
|
187 |
|
|
|
188 |
# number of features tested: |
|
|
189 |
datatypes=suppressWarnings(do.call(rbind, strsplit(results[,2], ":"))[,2]) # sometimes more than 3 columns |
|
|
190 |
datatypes2=suppressWarnings(do.call(rbind, strsplit(results[,1], ":"))[,2]) |
|
|
191 |
datatypes3=apply(cbind(datatypes2,datatypes),1,paste, collapse=":") |
|
|
192 |
datatypes=datatypes3 |
|
|
193 |
|
|
|
194 |
results=results[order(datatypes),] |
|
|
195 |
datatypes=datatypes[order(datatypes)] |
|
|
196 |
|
|
|
197 |
vals=seq(datatypes) |
|
|
198 |
|
|
|
199 |
vals=unlist(lapply(unique(datatypes), function(d)p.adjust(results$p[datatypes%in%d], method = adjust.method))) |
|
|
200 |
|
|
|
201 |
if(log10){ |
|
|
202 |
vals=abs(log10(vals)) |
|
|
203 |
} |
|
|
204 |
if(prettyNumbers){ |
|
|
205 |
vals=prettyNum(signif(vals,2)) |
|
|
206 |
results[,3]=prettyNum(signif(results[,3],2)) |
|
|
207 |
results[,4]=prettyNum(signif(results[,4],2)) |
|
|
208 |
}else{ |
|
|
209 |
vals=as.numeric(vals) |
|
|
210 |
results[,3]=as.numeric(results[,3]) |
|
|
211 |
results[,4]=as.numeric(results[,4]) |
|
|
212 |
} |
|
|
213 |
|
|
|
214 |
results$adj.p=vals |
|
|
215 |
results$signifCode="" |
|
|
216 |
results$signifCode[as.numeric(vals)<0.1]="." |
|
|
217 |
results$signifCode[as.numeric(vals)<0.05]="*" |
|
|
218 |
results$signifCode[as.numeric(vals)<0.01]="**" |
|
|
219 |
results$signifCode[as.numeric(vals)<0.001]="***" |
|
|
220 |
results$signifCode[as.numeric(vals)<0.0001]="****" |
|
|
221 |
|
|
|
222 |
results$datapairs=datatypes |
|
|
223 |
|
|
|
224 |
if(orderdata)results=results[order(results[,1], datatypes, -as.numeric(results$cor)>0, as.numeric(results$adj.p), decreasing = F),] |
|
|
225 |
|
|
|
226 |
return(results) |
|
|
227 |
} |
|
|
228 |
|
|
|
229 |
|
|
|
230 |
ft.alt <- function(a, b, c, d, alternative="greater") { |
|
|
231 |
as.numeric(fisher.test(matrix(c(a, b, c, d), 2), alternative = alternative)$p.value) |
|
|
232 |
} |
|
|
233 |
|
|
|
234 |
fisher.test.pwpw=function(results, find, fm, fisher.alternative, cores){ |
|
|
235 |
results.o=results |
|
|
236 |
# remove gene anno if CNV type, to match with FM |
|
|
237 |
results.o[,1]=gsub("CNVR:.*.@", "CNVR:", results.o[,1]) |
|
|
238 |
results.o[,2]=gsub("CNVR:.*.@", "CNVR:", results.o[,2]) |
|
|
239 |
|
|
|
240 |
featsA=as.list(as.data.frame(t(fm[results.o[find,1],]))) |
|
|
241 |
featsB=as.list(as.data.frame(t(fm[results.o[find,2],]))) |
|
|
242 |
|
|
|
243 |
grid=data.frame(do.call(rbind, mclapply(seq(find), function(i)c(table(featsA[[i]], featsB[[i]])), mc.cores=cores))) |
|
|
244 |
colnames(grid)=c("a", "b", "c", "d") |
|
|
245 |
|
|
|
246 |
if(fisher.alternative=="two.sided"){ |
|
|
247 |
p.ft <- with(grid, HighSpeedStats::ultrafastfet(a, b, c, d)) |
|
|
248 |
results$p[find]=p.ft |
|
|
249 |
|
|
|
250 |
}else{ |
|
|
251 |
p.ft <- with(grid, mapply(ft.alt, a, b, c, d, fisher.alternative)) |
|
|
252 |
results$p[find]=p.ft |
|
|
253 |
} |
|
|
254 |
results$test.method[find]=paste0("Fisher´s exact (", fisher.alternative, ")") |
|
|
255 |
return(results) |
|
|
256 |
} |
|
|
257 |
|
|
|
258 |
# pairwise.correlation=function(l.regulon.gene, fm, extrafeatures=NULL, filter.val=3, cores=5, adjust.method="BH", method.cor="spearman", prettyNumbers=T, use.fisher=T, fisher.alternative="two.sided"){ |
|
|
259 |
# |
|
|
260 |
# #****************************** Test within regulon ****************************** |
|
|
261 |
# r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.regulon,l.regulon.gene, fm, filter.val=filter.val,method.cor=method.cor, mc.cores=cores) |
|
|
262 |
# results=do.call(rbind, r) |
|
|
263 |
# results=results[!is.na(results$p),,drop=F] |
|
|
264 |
# |
|
|
265 |
# if(!is.null(results)){ |
|
|
266 |
# results$test.method=paste(method.cor, "correlation") |
|
|
267 |
# results$test.group="gene.features" |
|
|
268 |
# |
|
|
269 |
# # if any pairs are binary-binary, add fisher test p-value |
|
|
270 |
# fa=substr(results[,1], 0, 1)=="B" |
|
|
271 |
# fb=substr(results[,2], 0, 1)=="B" |
|
|
272 |
# find=which(fa&fb) |
|
|
273 |
# findl=fa&fb |
|
|
274 |
# |
|
|
275 |
# if(use.fisher&any(findl)){ |
|
|
276 |
# results=fisher.test.pwpw(results, find, fm, fisher.alternative, cores) |
|
|
277 |
# results1=p.adjust.datatype(results[findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
278 |
# results2=p.adjust.datatype(results[!findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
279 |
# results=rbind(results1, results2) |
|
|
280 |
# }else{ |
|
|
281 |
# results=p.adjust.datatype(results, orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
282 |
# } |
|
|
283 |
# } |
|
|
284 |
# #********************************************************************************* |
|
|
285 |
# |
|
|
286 |
# # Test against extrafeatures if they exist: |
|
|
287 |
# if(!is.null(extrafeatures)){ |
|
|
288 |
# r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.extrafeatures, l.regulon.gene, fm, extrafeatures=extrafeatures, filter.val=filter.val,method.cor=method.cor, mc.cores=cores) |
|
|
289 |
# results.e=do.call(rbind, r) |
|
|
290 |
# results.e=results.e[!is.na(results.e$p),] |
|
|
291 |
# results.e$test.method=paste(method.cor, "correlation") |
|
|
292 |
# results.e$test.group="extra.features" |
|
|
293 |
# |
|
|
294 |
# # if any pairs are binary-binary, add fisher test p-value |
|
|
295 |
# fa=substr(results.e[,1], 0, 1)=="B" |
|
|
296 |
# fb=substr(results.e[,2], 0, 1)=="B" |
|
|
297 |
# find=which(fa&fb) |
|
|
298 |
# findl=fa&fb |
|
|
299 |
# |
|
|
300 |
# if(use.fisher&any(findl)){ |
|
|
301 |
# results.e=fisher.test.pwpw(results.e, find,fm, fisher.alternative, cores) |
|
|
302 |
# results1=p.adjust.datatype(results.e[findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
303 |
# if(sum(!findl)>0){ |
|
|
304 |
# results2=p.adjust.datatype(results.e[!findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
305 |
# results.e=rbind(results1, results2) |
|
|
306 |
# }else{ |
|
|
307 |
# results.e=results1 |
|
|
308 |
# } |
|
|
309 |
# }else{ |
|
|
310 |
# results.e=p.adjust.datatype(results.e, orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
311 |
# } |
|
|
312 |
# results=rbind(results, results.e) |
|
|
313 |
# } |
|
|
314 |
# |
|
|
315 |
# if(adjust.method=="BH")adj="FDR" |
|
|
316 |
# if(adjust.method=="bonferroni")adj="FWER" |
|
|
317 |
# colnames(results)[7]=adj |
|
|
318 |
# results[grepl("Fisher", results$test.method),3]="" |
|
|
319 |
# |
|
|
320 |
# # order columnts: |
|
|
321 |
# cols=c("featureA", "featureB", "cor", "p", adj, "signifCode", "test.method", "test.group", "datapairs") |
|
|
322 |
# results=results[,cols] |
|
|
323 |
# return(results) |
|
|
324 |
# } |
|
|
325 |
|
|
|
326 |
filter.pairwise.res=function(results, filter.table=NULL, FDR=0.1){ |
|
|
327 |
if(is.null(filter.table)){ |
|
|
328 |
filter.table=data.frame("pair"=unique(results$datapairs), "FDR"=rep(FDR, length(unique(results$datapairs))), stringsAsFactors = F) |
|
|
329 |
type1=gsub(":.*.", "", filter.table[,1]) |
|
|
330 |
type2=gsub(".*.:", "", filter.table[,1]) |
|
|
331 |
|
|
|
332 |
# usually fewer features |
|
|
333 |
filter.table$FDR[type1=="GNAB"|type2=="GNAB"]=0.25 # mutations can be interesting, high P-value kept |
|
|
334 |
filter.table$FDR[type1=="SAMP"&type2=="SAMP"]=0.1 # these can be very different |
|
|
335 |
filter.table$FDR[type1=="CLIN"&type2=="CLIN"]=0.1 # these can be very different |
|
|
336 |
|
|
|
337 |
# exclude, not usually meaningful |
|
|
338 |
filter.table$FDR[type1=="CNVR"&type2=="CNVR"]=0 # remove, highly correlated |
|
|
339 |
filter.table$FDR[type1=="METH"&type2=="METH"]=0 # remove, highly correlated |
|
|
340 |
|
|
|
341 |
# SAMP special rules: |
|
|
342 |
filter.table$FDR[type1=="SAMP"&type2=="GEXP"]=1e-30 |
|
|
343 |
filter.table$FDR[type1=="SAMP"&type2=="MIRN"]=1e-30 |
|
|
344 |
filter.table$FDR[type1=="SAMP"&type2=="METH"]=1e-8 |
|
|
345 |
|
|
|
346 |
print("default table used: ") |
|
|
347 |
print(filter.table) |
|
|
348 |
} |
|
|
349 |
|
|
|
350 |
pairs=results$datapairs |
|
|
351 |
p=unique(pairs) |
|
|
352 |
|
|
|
353 |
# make sure all pairs are found in filter.table |
|
|
354 |
if(!all(p%in%filter.table[,1]))stop(paste("Not all pairs were found: ", p[!p%in%filter.table[,1],])) |
|
|
355 |
|
|
|
356 |
filter.table=filter.table[match(p, filter.table[,1]),] |
|
|
357 |
|
|
|
358 |
# p-value per pairs: |
|
|
359 |
vals=do.call(rbind, lapply(seq(p), function(i){ |
|
|
360 |
|
|
|
361 |
r=results[pairs%in%p[i],] |
|
|
362 |
|
|
|
363 |
filt=as.numeric(r[,5])<as.numeric(filter.table[i,2]) |
|
|
364 |
|
|
|
365 |
r=r[filt,] |
|
|
366 |
})) |
|
|
367 |
|
|
|
368 |
return(vals) |
|
|
369 |
} |
|
|
370 |
|
|
|
371 |
pairwise.correlation=function(l.regulon.gene, fm, extrafeatures=NULL, filter.val=3, cores=5, adjust.method="BH", method.cor="spearman", prettyNumbers=T, use.fisher=T, use.wilcox=T, fisher.alternative="two.sided", wilcox.alternative="two.sided"){ |
|
|
372 |
|
|
|
373 |
#****************************** Test within regulon ****************************** |
|
|
374 |
r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.regulon,l.regulon.gene, fm, filter.val=filter.val,method.cor=method.cor, mc.cores=cores) |
|
|
375 |
results=do.call(rbind, r) |
|
|
376 |
results=results[!is.na(results$p),,drop=F] |
|
|
377 |
|
|
|
378 |
if(!is.null(results)){ |
|
|
379 |
results$test.method=paste(method.cor, "correlation") |
|
|
380 |
results$test.group="gene.features" |
|
|
381 |
|
|
|
382 |
# if any pairs are binary-binary, add fisher test p-value |
|
|
383 |
fa=substr(results[,1], 0, 1)=="B" |
|
|
384 |
fb=substr(results[,2], 0, 1)=="B" |
|
|
385 |
find=which(fa&fb) |
|
|
386 |
findl=fa&fb |
|
|
387 |
|
|
|
388 |
# if any pairs are binary-numeric, add wilcoxon test p-value |
|
|
389 |
fc=substr(results[,1], 0, 1)=="N" |
|
|
390 |
fd=substr(results[,2], 0, 1)=="B" |
|
|
391 |
fe=substr(results[,1], 0, 1)=="B" |
|
|
392 |
ff=substr(results[,2], 0, 1)=="N" |
|
|
393 |
|
|
|
394 |
findw=fc&fd|fe&ff |
|
|
395 |
|
|
|
396 |
# compute p-value depending on test type: |
|
|
397 |
if(use.fisher&any(findl)){ |
|
|
398 |
# update B vs B tests: |
|
|
399 |
results=fisher.test.pwpw(results, find, fm, fisher.alternative, cores) |
|
|
400 |
} |
|
|
401 |
|
|
|
402 |
# compute p-value depending on test type: |
|
|
403 |
if(use.wilcox&any(findw)) { |
|
|
404 |
# update B vs N tests: |
|
|
405 |
results=test.feats.wilcox(features = results, rows = findw, data = fm, wilcox.alternative = wilcox.alternative, cores = cores) |
|
|
406 |
} |
|
|
407 |
|
|
|
408 |
# go through each test and adjust p-value per data type: |
|
|
409 |
results=do.call(rbind, lapply(unique(results$test.method), function(type){ |
|
|
410 |
p.adjust.datatype(results[results$test.method%in%type,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
411 |
})) |
|
|
412 |
} |
|
|
413 |
#********************************************************************************* |
|
|
414 |
|
|
|
415 |
# Test against extrafeatures if they exist: |
|
|
416 |
if(!is.null(extrafeatures)){ |
|
|
417 |
r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.extrafeatures, l.regulon.gene, fm, extrafeatures=extrafeatures, filter.val=filter.val,method.cor=method.cor, mc.cores=cores) |
|
|
418 |
results.e=do.call(rbind, r) |
|
|
419 |
results.e=results.e[!is.na(results.e$p),] |
|
|
420 |
results.e$test.method=paste(method.cor, "correlation") |
|
|
421 |
results.e$test.group="extra.features" |
|
|
422 |
|
|
|
423 |
# if any pairs are binary-binary, add fisher test p-value |
|
|
424 |
fa=substr(results.e[,1], 0, 1)=="B" |
|
|
425 |
fb=substr(results.e[,2], 0, 1)=="B" |
|
|
426 |
find=which(fa&fb) |
|
|
427 |
findl=fa&fb |
|
|
428 |
|
|
|
429 |
# if any pairs are binary-numeric, add wilcoxon test p-value |
|
|
430 |
fc=substr(results.e[,1], 0, 1)=="N" |
|
|
431 |
fd=substr(results.e[,2], 0, 1)=="B" |
|
|
432 |
fe=substr(results.e[,1], 0, 1)=="B" |
|
|
433 |
ff=substr(results.e[,2], 0, 1)=="N" |
|
|
434 |
findw=fc&fd|fe&ff |
|
|
435 |
|
|
|
436 |
# compute p-value depending on test type: |
|
|
437 |
if(use.fisher&any(findl)){ |
|
|
438 |
# update B vs B tests: |
|
|
439 |
results.e=fisher.test.pwpw(results.e, find, fm, fisher.alternative, cores) |
|
|
440 |
} |
|
|
441 |
|
|
|
442 |
# compute p-value depending on test type: |
|
|
443 |
if(use.wilcox&any(findw)) { |
|
|
444 |
# update B vs N tests: |
|
|
445 |
results.e=test.feats.wilcox(features = results.e, rows = findw, data = fm, wilcox.alternative = wilcox.alternative, cores = cores) |
|
|
446 |
} |
|
|
447 |
|
|
|
448 |
# go through each test and adjust p-value per data type |
|
|
449 |
results.e=do.call(rbind, lapply(unique(results.e$test.method), function(type){ |
|
|
450 |
p.adjust.datatype(results.e[results.e$test.method%in%type,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers) |
|
|
451 |
})) |
|
|
452 |
|
|
|
453 |
results=rbind(results, results.e) |
|
|
454 |
} |
|
|
455 |
|
|
|
456 |
if(adjust.method=="BH")adj="FDR" |
|
|
457 |
if(adjust.method=="bonferroni")adj="FWER" |
|
|
458 |
colnames(results)[7]=adj |
|
|
459 |
results[grepl("Fisher", results$test.method),3]="" |
|
|
460 |
|
|
|
461 |
# order columnts: |
|
|
462 |
cols=c("featureA", "featureB", "cor", "p", adj, "signifCode", "test.method", "test.group", "datapairs") |
|
|
463 |
results=results[,cols] |
|
|
464 |
return(results) |
|
|
465 |
} |
|
|
466 |
|
|
|
467 |
filter.pairwise.res=function(results, filter.table=NULL, FDR=0.1){ |
|
|
468 |
if(is.null(filter.table)){ |
|
|
469 |
filter.table=data.frame("pair"=unique(results$datapairs), "FDR"=rep(FDR, length(unique(results$datapairs))), stringsAsFactors = F) |
|
|
470 |
type1=gsub(":.*.", "", filter.table[,1]) |
|
|
471 |
type2=gsub(".*.:", "", filter.table[,1]) |
|
|
472 |
|
|
|
473 |
# usually fewer features |
|
|
474 |
filter.table$FDR[type1=="GNAB"|type2=="GNAB"]=0.25 # mutations can be interesting, high P-value kept |
|
|
475 |
filter.table$FDR[type1=="SAMP"&type2=="SAMP"]=0.1 # these can be very different |
|
|
476 |
filter.table$FDR[type1=="CLIN"&type2=="CLIN"]=0.1 # these can be very different |
|
|
477 |
|
|
|
478 |
# exclude, not usually meaningful |
|
|
479 |
filter.table$FDR[type1=="CNVR"&type2=="CNVR"]=0 # remove, highly correlated |
|
|
480 |
filter.table$FDR[type1=="METH"&type2=="METH"]=0 # remove, highly correlated |
|
|
481 |
|
|
|
482 |
# SAMP special rules: |
|
|
483 |
filter.table$FDR[type1=="SAMP"&type2=="GEXP"]=1e-30 |
|
|
484 |
filter.table$FDR[type1=="SAMP"&type2=="MIRN"]=1e-30 |
|
|
485 |
filter.table$FDR[type1=="SAMP"&type2=="METH"]=1e-8 |
|
|
486 |
|
|
|
487 |
print("default table used: ") |
|
|
488 |
print(filter.table) |
|
|
489 |
} |
|
|
490 |
|
|
|
491 |
pairs=results$datapairs |
|
|
492 |
p=unique(pairs) |
|
|
493 |
|
|
|
494 |
# make sure all pairs are found in filter.table |
|
|
495 |
if(!all(p%in%filter.table[,1]))stop(paste("Not all pairs were found: ", p[!p%in%filter.table[,1],])) |
|
|
496 |
|
|
|
497 |
filter.table=filter.table[match(p, filter.table[,1]),] |
|
|
498 |
|
|
|
499 |
# p-value per pairs: |
|
|
500 |
vals=do.call(rbind, lapply(seq(p), function(i){ |
|
|
501 |
|
|
|
502 |
r=results[pairs%in%p[i],] |
|
|
503 |
|
|
|
504 |
filt=as.numeric(r[,5])<as.numeric(filter.table[i,2])&!is.na(r[,5]) |
|
|
505 |
|
|
|
506 |
r=r[filt,] |
|
|
507 |
})) |
|
|
508 |
|
|
|
509 |
return(vals) |
|
|
510 |
} |
|
|
511 |
|
|
|
512 |
test.feats.wilcox <- function(features, rows=NA, data, wilcox.alternative="two.sided", cores=1) { |
|
|
513 |
if(length(rows)>=1) { |
|
|
514 |
pairs <- features[rows,] |
|
|
515 |
} else pairs <- features |
|
|
516 |
|
|
|
517 |
p=unlist(parallel::mclapply(1:nrow(pairs), function(i){ |
|
|
518 |
binary=substr(pairs[i,2], 0, 1)=="B" |
|
|
519 |
|
|
|
520 |
if(binary) { |
|
|
521 |
|
|
|
522 |
x <- as.numeric(data[pairs[i,1],]) |
|
|
523 |
y <- as.logical(as.numeric(data[pairs[i,2],])) |
|
|
524 |
|
|
|
525 |
}else { |
|
|
526 |
|
|
|
527 |
x <- as.numeric(data[pairs[i,2],]) |
|
|
528 |
y <- as.logical(as.numeric(data[pairs[i,1],])) |
|
|
529 |
|
|
|
530 |
} |
|
|
531 |
|
|
|
532 |
a <- vector() |
|
|
533 |
b <- vector() |
|
|
534 |
|
|
|
535 |
for(j in 1:length(y)) { |
|
|
536 |
|
|
|
537 |
if(is.na(y[j])) next |
|
|
538 |
if(y[j]) a <- c(a, x[j]) |
|
|
539 |
if(!y[j]) b <- c(b, x[j]) |
|
|
540 |
|
|
|
541 |
} |
|
|
542 |
|
|
|
543 |
if((is.numeric(a) && is.numeric(b))&&(length(a)>0&&length(b>0))&&sum(is.na(a))!=length(a)&&sum(is.na(b))!=length(b)) { |
|
|
544 |
p <- wilcox.test(a,b, alternative = wilcox.alternative)$p.value |
|
|
545 |
} else { |
|
|
546 |
p=NA # not enough data |
|
|
547 |
} |
|
|
548 |
return(p) |
|
|
549 |
}, mc.cores=cores)) |
|
|
550 |
|
|
|
551 |
pairs$test.method=paste0("Wilcoxon test (", wilcox.alternative, ")") |
|
|
552 |
pairs$p<- as.numeric(as.character(p)) |
|
|
553 |
|
|
|
554 |
# return original table but with updated p and test.method |
|
|
555 |
features[rows,]=pairs |
|
|
556 |
|
|
|
557 |
return(features) |
|
|
558 |
} |