a b/common_scripts/featurematrix/compute.pairwise.R
1
regulon.feats=function(fm, genelist, cnv_annot=NULL, filter.val=3, filtertypes=NULL){
2
  fmname=rownames(fm)
3
  
4
  if(!is.null(filtertypes)){
5
    fmname=fmname[!substr(fmname, 1,6)%in%filtertypes]
6
  }
7
  feature.gene=suppressWarnings(do.call(rbind, strsplit(fmname, ":"))[,3])
8
  
9
  if(any(grepl(":", genelist)))genelist[grepl(":", genelist)]=do.call(rbind, strsplit(genelist[grepl(":", genelist)], ":"))[,3]
10
  
11
  if(!is.null(cnv_annot)){
12
    l.genes=strsplit(cnv_annot[,2], ",")
13
    names(l.genes)=cnv_annot[,1]
14
    st=stack(l.genes)
15
    st=st[st[,1]%in%genelist,]
16
    st[,2]=as.character(st[,2])
17
  }
18
  
19
  nested.features=function(gene, fmname, feature.gene){
20
    direct=fmname[feature.gene%in%gene]
21
    if(!is.null(cnv_annot))direct=c(direct, st[st[,1]%in%gene,2])
22
    
23
    # order this to numeric, then to gexp-cnv-meth-gnab
24
    n=unique(direct)
25
    types=substr(n, 1, 6)
26
    ord=c("N:GEXP","B:GEXP","B:GNAB", "N:CNVR", "B:CNVR","N:METH", "N:MIRN", "N:SAMP", "B:SAMP", "N:CLIN", "B:CLIN", "N:DRUG")
27
    ord=ord[ord%in%types]
28
    
29
    r=unlist(lapply(ord, function(o) n[types%in%o]))
30
    
31
    return(r)
32
  }
33
  feats=lapply(genelist, nested.features, fmname, feature.gene)
34
  
35
  # Filter if not enough data:
36
  m=fm[rownames(fm)%in%unlist(feats),]
37
  s=is.na(m)
38
  m=m[!rowSums(s)==dim(m)[1]&rowSums(!s)>=filter.val, !colSums(s)==dim(m)[2]]
39
  
40
  feats=lapply(feats, function(n)n[n%in%rownames(m)])
41
  names(feats)=genelist
42
  
43
  feats=feats[!unlist(lapply(feats, length))<1]
44
  
45
  return(feats)
46
}
47
48
get.feat.within.region=function(INPUT, FEAT){
49
  
50
  temp=tempfile()
51
  temp2=tempfile()
52
  
53
  # write out bed format temp file:
54
  write.table(INPUT, temp, quote = F, row.names = F, col.names = F, sep="\t")
55
  
56
  # intersect with features
57
  system(paste0("bedtools intersect -a ",FEAT, " -b ",temp, " -wa -wb > ", temp2))  # genes in region
58
  
59
  # read in R
60
  d = read.delim(temp2, header=F, stringsAsFactors = F)
61
  
62
  d[,4]=gsub(":.*.", "", d[,4])
63
  
64
  d2=d[,c(10, 4)]
65
  
66
  unlink(temp)
67
  unlink(temp2)
68
  
69
  return(d2)
70
}
71
72
fun.correlation.regulon=function(i, l.regulon.gene, fm, filter.val=3, method.cor="spearman"){
73
  feats=l.regulon.gene[[i]]
74
  genename=names(l.regulon.gene)[i]
75
  x=data.matrix(t(as.matrix(fm[match(feats,rownames(fm)),,drop=F])))
76
  class(x) <- "numeric"
77
  
78
  if(dim(x)[2]<2)return(NULL) # this one did not have pairs to test
79
  
80
  # filter if binary has less than n samples
81
  if(any(grepl("^B:", colnames(x)))){
82
    check=colSums(x[,grepl("^B:", colnames(x)),drop=F], na.rm = T)
83
    x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F]
84
  }
85
  
86
  # filter if numeric has less than n non-zero samples
87
  if(any(grepl("^N:", colnames(x)))){
88
    check=colSums(x[,grepl("^N:", colnames(x)),drop=F]!=0, na.rm = T)
89
    x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F]
90
  }
91
  
92
  if(dim(x)[2]<2)return(NULL) # this one did not have pairs to test
93
  
94
  cor_rho=cor(x,use = "pairwise.complete.obs", method = method.cor)
95
  cor_pval=cor.test.p.m(x, method = method.cor, use = "pairwise.complete.obs")
96
  results=flat_cor_mat(cor_rho, cor_pval)
97
  
98
  # respect original order:
99
  results=do.call(rbind, lapply(colnames(x), function(n)results[results[,1]%in%n,]))
100
  
101
  rownames(results)=NULL
102
  
103
  # filter pairwise result:
104
  results=results[!duplicated(t(apply(results[,1:2], 1, sort))),]
105
  
106
  # add gene to cnvr:
107
  # results[grepl(":CNVR:", results[,1]),1]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,1]),1])
108
  # results[grepl(":CNVR:", results[,2]),2]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,2]),2])
109
  
110
  return(results)
111
}
112
113
fun.correlation.extrafeatures=function(i, l.regulon.gene, fm, extrafeatures, filter.val=3, method.cor="spearman"){
114
  feats=l.regulon.gene[[i]]
115
  genename=names(l.regulon.gene)[i]
116
  x=data.matrix(t(as.matrix(fm[match(feats,rownames(fm)),,drop=F])))
117
  class(x) <- "numeric"
118
  
119
  # filter if binary has less than n samples
120
  if(any(grepl("^B:", colnames(x)))){
121
    check=colSums(x[,grepl("^B:", colnames(x)),drop=F], na.rm = T)
122
    x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F]
123
  }
124
  
125
  # filter if numeric has less than n non-zero samples
126
  if(any(grepl("^N:", colnames(x)))){
127
    check=colSums(x[,grepl("^N:", colnames(x)),drop=F]!=0, na.rm = T)
128
    x=x[,!colnames(x)%in%names(check[check<filter.val]),drop=F]
129
  }
130
  
131
  # remove if in both lists
132
  extrafeatures=extrafeatures[!extrafeatures%in%feats]
133
  
134
  # Filter if not enough data (less than 3 not NA):
135
  m2=fm[rownames(fm)%in%extrafeatures,]
136
  s=is.na(m2)
137
  m2=m2[!rowSums(s)==dim(m2)[2]&rowSums(!s)>=filter.val, ]
138
  y=data.matrix(t(as.matrix(m2)))
139
  class(y) <- "numeric"
140
  
141
  y=cbind(x,y)
142
  
143
  # filter if binary has less than n 3 samples
144
  if(any(grepl("^B:", colnames(y)))){
145
    check=colSums(y[,grepl("^B:", colnames(y)),drop=F], na.rm = T)
146
    y=y[,!colnames(y)%in%names(check[check<filter.val])]
147
  }
148
  
149
  # filter if numeric has less than n non-zero samples
150
  if(any(grepl("^N:", colnames(y)))){
151
    check=colSums(y[,grepl("^N:", colnames(y)),drop=F]!=0, na.rm = T)
152
    y=y[,!colnames(y)%in%names(check[check<filter.val]),drop=F]
153
  }
154
  
155
  if(dim(x)[2]==0|dim(y)[2]==0)return(NULL) # no pairs to test
156
  
157
  cor_rho=cor(x,y,use = "pairwise.complete.obs", method = method.cor)
158
  
159
  # compute cor pval
160
  cor_pval=t(apply(x, 2, function(v1){
161
    apply(y, 2, function(v2){
162
      if(sum(complete.cases(cbind(v1, v2)))<filter.val)return(NA)
163
      cor.test.p(v1, v2, method = method.cor, use = "pairwise.complete.obs")
164
    })
165
  }))
166
  
167
  results=flat_cor_mat(data.matrix(cor_rho), data.matrix(cor_pval))
168
  
169
  results=do.call(rbind, lapply(colnames(x), function(n)results[results[,1]%in%n,]))
170
  
171
  rownames(results)=NULL
172
  
173
  # remove regulonfeatures, they are coming from another test:
174
  results=results[!(results[,1]%in%colnames(x)&results[,2]%in%colnames(x)),]
175
  
176
  # filter pairwise result:
177
  results=results[!duplicated(t(apply(results[,1:2], 1, sort))),]
178
  
179
  # add gene to cnvr: BUG for wilcox
180
  # results[grepl(":CNVR:", results[,1]),1]=gsub(":CNVR:", paste0(":CNVR:",genename, "@"), results[grepl(":CNVR:", results[,1]),1])
181
  
182
  return(results)
183
}
184
185
186
p.adjust.datatype=function(results, adjust.method="BH", log10=F, prettyNumbers=T, orderdata=T){
187
  
188
  # number of features tested:
189
  datatypes=suppressWarnings(do.call(rbind, strsplit(results[,2], ":"))[,2]) # sometimes more than 3 columns
190
  datatypes2=suppressWarnings(do.call(rbind, strsplit(results[,1], ":"))[,2])
191
  datatypes3=apply(cbind(datatypes2,datatypes),1,paste, collapse=":")
192
  datatypes=datatypes3
193
  
194
  results=results[order(datatypes),]
195
  datatypes=datatypes[order(datatypes)]
196
  
197
  vals=seq(datatypes)
198
  
199
  vals=unlist(lapply(unique(datatypes), function(d)p.adjust(results$p[datatypes%in%d], method = adjust.method)))
200
  
201
  if(log10){
202
    vals=abs(log10(vals))
203
  }
204
  if(prettyNumbers){
205
    vals=prettyNum(signif(vals,2))
206
    results[,3]=prettyNum(signif(results[,3],2))
207
    results[,4]=prettyNum(signif(results[,4],2))
208
  }else{
209
    vals=as.numeric(vals)
210
    results[,3]=as.numeric(results[,3])
211
    results[,4]=as.numeric(results[,4])
212
  }
213
  
214
  results$adj.p=vals
215
  results$signifCode=""
216
  results$signifCode[as.numeric(vals)<0.1]="."
217
  results$signifCode[as.numeric(vals)<0.05]="*"
218
  results$signifCode[as.numeric(vals)<0.01]="**"
219
  results$signifCode[as.numeric(vals)<0.001]="***"
220
  results$signifCode[as.numeric(vals)<0.0001]="****"
221
  
222
  results$datapairs=datatypes
223
  
224
  if(orderdata)results=results[order(results[,1], datatypes, -as.numeric(results$cor)>0, as.numeric(results$adj.p), decreasing = F),]
225
  
226
  return(results)
227
}
228
229
230
ft.alt <- function(a, b, c, d, alternative="greater") {
231
  as.numeric(fisher.test(matrix(c(a, b, c, d), 2), alternative = alternative)$p.value)
232
}
233
234
fisher.test.pwpw=function(results, find, fm, fisher.alternative, cores){
235
  results.o=results
236
  # remove gene anno if CNV type, to match with FM
237
  results.o[,1]=gsub("CNVR:.*.@", "CNVR:", results.o[,1])
238
  results.o[,2]=gsub("CNVR:.*.@", "CNVR:", results.o[,2])
239
  
240
  featsA=as.list(as.data.frame(t(fm[results.o[find,1],])))
241
  featsB=as.list(as.data.frame(t(fm[results.o[find,2],])))
242
  
243
  grid=data.frame(do.call(rbind, mclapply(seq(find), function(i)c(table(featsA[[i]], featsB[[i]])), mc.cores=cores)))
244
  colnames(grid)=c("a", "b", "c", "d")
245
  
246
  if(fisher.alternative=="two.sided"){
247
    p.ft <- with(grid, HighSpeedStats::ultrafastfet(a, b, c, d))
248
    results$p[find]=p.ft
249
250
  }else{
251
    p.ft <- with(grid, mapply(ft.alt, a, b, c, d, fisher.alternative))
252
    results$p[find]=p.ft
253
  }
254
  results$test.method[find]=paste0("Fisher´s exact (", fisher.alternative, ")")
255
  return(results)
256
}
257
258
# pairwise.correlation=function(l.regulon.gene, fm, extrafeatures=NULL, filter.val=3, cores=5, adjust.method="BH", method.cor="spearman", prettyNumbers=T, use.fisher=T, fisher.alternative="two.sided"){
259
#   
260
#   #****************************** Test within regulon ******************************
261
#   r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.regulon,l.regulon.gene, fm, filter.val=filter.val,method.cor=method.cor, mc.cores=cores)
262
#   results=do.call(rbind, r)
263
#   results=results[!is.na(results$p),,drop=F]
264
#   
265
#   if(!is.null(results)){
266
#     results$test.method=paste(method.cor, "correlation")
267
#     results$test.group="gene.features"
268
#     
269
#     # if any pairs are binary-binary, add fisher test p-value
270
#     fa=substr(results[,1], 0, 1)=="B"
271
#     fb=substr(results[,2], 0, 1)=="B"
272
#     find=which(fa&fb)
273
#     findl=fa&fb
274
#     
275
#     if(use.fisher&any(findl)){
276
#       results=fisher.test.pwpw(results, find, fm, fisher.alternative, cores)
277
#       results1=p.adjust.datatype(results[findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
278
#       results2=p.adjust.datatype(results[!findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
279
#       results=rbind(results1, results2)
280
#     }else{
281
#       results=p.adjust.datatype(results, orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
282
#     }
283
#   }
284
#   #*********************************************************************************
285
#   
286
#   # Test against extrafeatures if they exist:
287
#   if(!is.null(extrafeatures)){
288
#     r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.extrafeatures, l.regulon.gene, fm, extrafeatures=extrafeatures, filter.val=filter.val,method.cor=method.cor, mc.cores=cores)
289
#     results.e=do.call(rbind, r)
290
#     results.e=results.e[!is.na(results.e$p),]
291
#     results.e$test.method=paste(method.cor, "correlation")
292
#     results.e$test.group="extra.features"
293
#     
294
#     # if any pairs are binary-binary, add fisher test p-value
295
#     fa=substr(results.e[,1], 0, 1)=="B"
296
#     fb=substr(results.e[,2], 0, 1)=="B"
297
#     find=which(fa&fb)
298
#     findl=fa&fb
299
#     
300
#     if(use.fisher&any(findl)){
301
#       results.e=fisher.test.pwpw(results.e, find,fm, fisher.alternative, cores)
302
#       results1=p.adjust.datatype(results.e[findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
303
#       if(sum(!findl)>0){
304
#         results2=p.adjust.datatype(results.e[!findl,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
305
#         results.e=rbind(results1, results2)
306
#       }else{
307
#         results.e=results1
308
#       }
309
#     }else{
310
#       results.e=p.adjust.datatype(results.e, orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
311
#     }
312
#     results=rbind(results, results.e)
313
#   }
314
#   
315
#   if(adjust.method=="BH")adj="FDR"
316
#   if(adjust.method=="bonferroni")adj="FWER"
317
#   colnames(results)[7]=adj
318
#   results[grepl("Fisher", results$test.method),3]=""
319
#   
320
#   # order columnts:
321
#   cols=c("featureA", "featureB", "cor", "p", adj, "signifCode", "test.method", "test.group", "datapairs")
322
#   results=results[,cols]
323
#   return(results)
324
# }
325
326
filter.pairwise.res=function(results, filter.table=NULL, FDR=0.1){
327
  if(is.null(filter.table)){
328
    filter.table=data.frame("pair"=unique(results$datapairs), "FDR"=rep(FDR, length(unique(results$datapairs))), stringsAsFactors = F)
329
    type1=gsub(":.*.", "", filter.table[,1])
330
    type2=gsub(".*.:", "", filter.table[,1])
331
    
332
    # usually fewer features
333
    filter.table$FDR[type1=="GNAB"|type2=="GNAB"]=0.25 # mutations can be interesting, high P-value kept
334
    filter.table$FDR[type1=="SAMP"&type2=="SAMP"]=0.1 # these can be very different
335
    filter.table$FDR[type1=="CLIN"&type2=="CLIN"]=0.1 # these can be very different
336
    
337
    # exclude, not usually meaningful
338
    filter.table$FDR[type1=="CNVR"&type2=="CNVR"]=0 # remove, highly correlated
339
    filter.table$FDR[type1=="METH"&type2=="METH"]=0 # remove, highly correlated
340
    
341
    # SAMP special rules:
342
    filter.table$FDR[type1=="SAMP"&type2=="GEXP"]=1e-30 
343
    filter.table$FDR[type1=="SAMP"&type2=="MIRN"]=1e-30 
344
    filter.table$FDR[type1=="SAMP"&type2=="METH"]=1e-8 
345
    
346
    print("default table used: ")
347
    print(filter.table)
348
  }
349
  
350
  pairs=results$datapairs
351
  p=unique(pairs)
352
  
353
  # make sure all pairs are found in filter.table
354
  if(!all(p%in%filter.table[,1]))stop(paste("Not all pairs were found: ", p[!p%in%filter.table[,1],]))
355
  
356
  filter.table=filter.table[match(p, filter.table[,1]),]
357
  
358
  # p-value per pairs:
359
  vals=do.call(rbind, lapply(seq(p), function(i){
360
    
361
    r=results[pairs%in%p[i],]
362
    
363
    filt=as.numeric(r[,5])<as.numeric(filter.table[i,2])
364
    
365
    r=r[filt,]
366
  }))
367
  
368
  return(vals)
369
}
370
371
pairwise.correlation=function(l.regulon.gene, fm, extrafeatures=NULL, filter.val=3, cores=5, adjust.method="BH", method.cor="spearman", prettyNumbers=T, use.fisher=T, use.wilcox=T, fisher.alternative="two.sided", wilcox.alternative="two.sided"){
372
  
373
  #****************************** Test within regulon ******************************
374
  r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.regulon,l.regulon.gene, fm, filter.val=filter.val,method.cor=method.cor, mc.cores=cores)
375
  results=do.call(rbind, r)
376
  results=results[!is.na(results$p),,drop=F]
377
  
378
  if(!is.null(results)){
379
    results$test.method=paste(method.cor, "correlation")
380
    results$test.group="gene.features"
381
    
382
    # if any pairs are binary-binary, add fisher test p-value
383
    fa=substr(results[,1], 0, 1)=="B"
384
    fb=substr(results[,2], 0, 1)=="B"
385
    find=which(fa&fb)
386
    findl=fa&fb
387
    
388
    # if any pairs are binary-numeric, add wilcoxon test p-value
389
    fc=substr(results[,1], 0, 1)=="N"
390
    fd=substr(results[,2], 0, 1)=="B"
391
    fe=substr(results[,1], 0, 1)=="B"
392
    ff=substr(results[,2], 0, 1)=="N"
393
    
394
    findw=fc&fd|fe&ff
395
    
396
    # compute p-value depending on test type:
397
    if(use.fisher&any(findl)){
398
      # update B vs B tests:
399
      results=fisher.test.pwpw(results, find, fm, fisher.alternative, cores)
400
    }
401
    
402
    # compute p-value depending on test type:
403
    if(use.wilcox&any(findw)) {
404
      # update B vs N tests:
405
      results=test.feats.wilcox(features = results, rows = findw, data = fm, wilcox.alternative = wilcox.alternative, cores = cores)
406
    }
407
    
408
    # go through each test and adjust p-value per data type:
409
    results=do.call(rbind, lapply(unique(results$test.method), function(type){
410
      p.adjust.datatype(results[results$test.method%in%type,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
411
    }))
412
  }
413
  #*********************************************************************************
414
  
415
  # Test against extrafeatures if they exist:
416
  if(!is.null(extrafeatures)){
417
    r=parallel::mclapply(seq(l.regulon.gene),fun.correlation.extrafeatures, l.regulon.gene, fm, extrafeatures=extrafeatures, filter.val=filter.val,method.cor=method.cor, mc.cores=cores)
418
    results.e=do.call(rbind, r)
419
    results.e=results.e[!is.na(results.e$p),]
420
    results.e$test.method=paste(method.cor, "correlation")
421
    results.e$test.group="extra.features"
422
    
423
    # if any pairs are binary-binary, add fisher test p-value
424
    fa=substr(results.e[,1], 0, 1)=="B"
425
    fb=substr(results.e[,2], 0, 1)=="B"
426
    find=which(fa&fb)
427
    findl=fa&fb
428
    
429
    # if any pairs are binary-numeric, add wilcoxon test p-value
430
    fc=substr(results.e[,1], 0, 1)=="N"
431
    fd=substr(results.e[,2], 0, 1)=="B"
432
    fe=substr(results.e[,1], 0, 1)=="B"
433
    ff=substr(results.e[,2], 0, 1)=="N"
434
    findw=fc&fd|fe&ff
435
  
436
    # compute p-value depending on test type:
437
    if(use.fisher&any(findl)){
438
    # update B vs B tests:
439
    results.e=fisher.test.pwpw(results.e, find, fm, fisher.alternative, cores)
440
    }
441
    
442
    # compute p-value depending on test type:
443
    if(use.wilcox&any(findw)) {
444
    # update B vs N tests:
445
    results.e=test.feats.wilcox(features = results.e, rows = findw, data = fm, wilcox.alternative = wilcox.alternative, cores = cores)
446
    }
447
    
448
    # go through each test and adjust p-value per data type
449
    results.e=do.call(rbind, lapply(unique(results.e$test.method), function(type){
450
      p.adjust.datatype(results.e[results.e$test.method%in%type,], orderdata=T, adjust.method=adjust.method, prettyNumbers=prettyNumbers)
451
    }))
452
    
453
    results=rbind(results, results.e)
454
  }
455
  
456
  if(adjust.method=="BH")adj="FDR"
457
  if(adjust.method=="bonferroni")adj="FWER"
458
  colnames(results)[7]=adj
459
  results[grepl("Fisher", results$test.method),3]=""
460
  
461
  # order columnts:
462
  cols=c("featureA", "featureB", "cor", "p", adj, "signifCode", "test.method", "test.group", "datapairs")
463
  results=results[,cols]
464
  return(results)
465
}
466
467
filter.pairwise.res=function(results, filter.table=NULL, FDR=0.1){
468
  if(is.null(filter.table)){
469
    filter.table=data.frame("pair"=unique(results$datapairs), "FDR"=rep(FDR, length(unique(results$datapairs))), stringsAsFactors = F)
470
    type1=gsub(":.*.", "", filter.table[,1])
471
    type2=gsub(".*.:", "", filter.table[,1])
472
    
473
    # usually fewer features
474
    filter.table$FDR[type1=="GNAB"|type2=="GNAB"]=0.25 # mutations can be interesting, high P-value kept
475
    filter.table$FDR[type1=="SAMP"&type2=="SAMP"]=0.1 # these can be very different
476
    filter.table$FDR[type1=="CLIN"&type2=="CLIN"]=0.1 # these can be very different
477
    
478
    # exclude, not usually meaningful
479
    filter.table$FDR[type1=="CNVR"&type2=="CNVR"]=0 # remove, highly correlated
480
    filter.table$FDR[type1=="METH"&type2=="METH"]=0 # remove, highly correlated
481
    
482
    # SAMP special rules:
483
    filter.table$FDR[type1=="SAMP"&type2=="GEXP"]=1e-30
484
    filter.table$FDR[type1=="SAMP"&type2=="MIRN"]=1e-30
485
    filter.table$FDR[type1=="SAMP"&type2=="METH"]=1e-8
486
    
487
    print("default table used: ")
488
    print(filter.table)
489
  }
490
  
491
  pairs=results$datapairs
492
  p=unique(pairs)
493
  
494
  # make sure all pairs are found in filter.table
495
  if(!all(p%in%filter.table[,1]))stop(paste("Not all pairs were found: ", p[!p%in%filter.table[,1],]))
496
  
497
  filter.table=filter.table[match(p, filter.table[,1]),]
498
  
499
  # p-value per pairs:
500
  vals=do.call(rbind, lapply(seq(p), function(i){
501
    
502
    r=results[pairs%in%p[i],]
503
    
504
    filt=as.numeric(r[,5])<as.numeric(filter.table[i,2])&!is.na(r[,5])
505
    
506
    r=r[filt,]
507
  }))
508
  
509
  return(vals)
510
}
511
512
test.feats.wilcox <- function(features, rows=NA, data, wilcox.alternative="two.sided", cores=1) {
513
  if(length(rows)>=1) {
514
    pairs <- features[rows,]
515
  } else pairs <- features
516
  
517
  p=unlist(parallel::mclapply(1:nrow(pairs), function(i){
518
    binary=substr(pairs[i,2], 0, 1)=="B"
519
    
520
    if(binary) {
521
      
522
      x <- as.numeric(data[pairs[i,1],])
523
      y <- as.logical(as.numeric(data[pairs[i,2],]))
524
      
525
    }else {
526
      
527
      x <- as.numeric(data[pairs[i,2],])
528
      y <- as.logical(as.numeric(data[pairs[i,1],]))
529
      
530
    }
531
    
532
    a <- vector()
533
    b <- vector()
534
    
535
    for(j in 1:length(y)) {
536
      
537
      if(is.na(y[j])) next
538
      if(y[j]) a <- c(a, x[j])
539
      if(!y[j]) b <- c(b, x[j])
540
      
541
    }
542
    
543
    if((is.numeric(a) && is.numeric(b))&&(length(a)>0&&length(b>0))&&sum(is.na(a))!=length(a)&&sum(is.na(b))!=length(b)) {
544
      p <- wilcox.test(a,b, alternative = wilcox.alternative)$p.value
545
    } else {
546
      p=NA # not enough data
547
    }
548
    return(p)
549
  }, mc.cores=cores))
550
  
551
  pairs$test.method=paste0("Wilcoxon test (", wilcox.alternative, ")")
552
  pairs$p<- as.numeric(as.character(p))
553
  
554
  # return original table but with updated p and test.method
555
  features[rows,]=pairs
556
  
557
  return(features)
558
}