--- a
+++ b/common_scripts/statistics/functions_statistics.R
@@ -0,0 +1,639 @@
+fisher.2x2=function(lv1, lv2, alternative="two.sided", usefast=T){
+  cont=table(lv1, lv2)
+  cont[is.na(cont)]=0
+  
+  if(usefast){
+    p=HighSpeedStats::ultrafastfet(cont[1,1], cont[1,2], cont[2,1], cont[2,2])
+    odds=NA
+  }
+  
+  else{
+    ft=fisher.test(cont, alternative=alternative)
+    p=ft$p.value
+    odds=ft$estimate
+  }
+  
+  data.frame("p"=p, "log.odds"=odds, stringsAsFactors = F)
+}
+
+TestGeneWilcox=function(gene, datam, logicalVectors, logicalVector_normals=NULL, ALTERNATIVE="greater", prettynum=T){
+  if(!gene%in%rownames(datam)) return(NULL)
+  D = as.numeric(datam[rownames(datam)%in%gene,])
+  
+  d_list=lapply(logicalVectors, function(v){
+    D[v][!is.na(D[v])]
+  })
+  
+  # in this case, compute against rest of the samples for the logicalV
+  if(is.null(logicalVector_normals)){
+    logicalVector_normals=lapply(logicalVectors, '!') # negate
+    names(logicalVector_normals)=rep("Rest", length(logicalVector_normals))
+    
+    d_list2=lapply(logicalVector_normals, function(v){
+      D[v][!is.na(D[v])]
+    })
+    
+    stats=do.call(rbind, lapply(seq(d_list), function(i, d_list, d_list2){
+      l=d_list[[i]]
+      name1=names(d_list)[i]
+      name2=names(d_list2)[i]
+      
+      P.value=sapply(seq(d_list2)[i], function(j, l, d_list2){
+        l2=d_list2[[j]]
+        signif(wilcox.test(l, l2, ALTERNATIVE)$p.value,2)
+      }, l, d_list2)
+      
+      FC=sapply(d_list2[i], function(l2, l1){
+        2^(mean(l)-mean(l2))
+      })
+      
+      df=data.frame("Gene"=gene, "Group1"=name1, "Group2"=name2, FC, "p"=P.value, "alternative"=ALTERNATIVE, stringsAsFactors=F)
+      rownames(df)=NULL
+      df=df[!df$Group1==df$Group2,]
+      
+      if(prettynum){
+        df[,4]=prettyNum(signif(df[,4], 2))
+        df[,5]=prettyNum(signif(df[,5], 2))
+      }
+      
+      return(df)
+    },d_list, d_list2))
+    return(stats)
+  }
+  
+  d_list2=lapply(logicalVector_normals, function(v){
+    D[v][!is.na(D[v])]
+  })
+  
+  stats=do.call(rbind, lapply(seq(d_list), function(i, d_list, d_list2){
+    l=d_list[[i]]
+    name1=names(d_list)[i]
+    name2=names(d_list2)
+    
+    P.value=sapply(seq(d_list2), function(j, l, d_list2){
+      l2=d_list2[[j]]
+      signif(wilcox.test(l, l2, ALTERNATIVE)$p.value,2)
+    }, l, d_list2)
+    
+    FC=sapply(d_list2, function(l2, l1){
+      2^(mean(l)-mean(l2))
+    })
+    
+    df=data.frame("Gene"=gene, "Group1"=name1, "Group2"=name2, FC, "p"=P.value, "alternative"=ALTERNATIVE, stringsAsFactors=F)
+    rownames(df)=NULL
+    df=df[!df$Group1==df$Group2,]
+    
+    if(prettynum){
+      df[,4]=prettyNum(signif(df[,4], 2))
+      df[,5]=prettyNum(signif(df[,5], 2))
+    }
+    
+    return(df)
+  },d_list, d_list2))
+  
+  return(stats)
+}
+
+hgt.enrichment=function(genes, logicalVector, profile = profile, adjust.method="bonferroni") {
+  
+  # Initialize stuff
+  genes = genes[genes%in%colnames(profile)]
+  P = profile[,colnames(profile)%in%genes]
+
+  R = as.data.frame(matrix(NA, nrow = length(genes), ncol = 3))
+  colnames(R) = c("gene", "pvalue", "adj.pvalue")
+  R[,1] = genes
+  
+  
+  #************ Hyperg test, disease association ***************
+  FUN=function(j){
+    r = genes[j]
+    k = sum(logicalVector)
+    x = P[,colnames(P)==r]
+    m = sum(x)
+    n = length(x) - m
+    q = sum(x[logicalVector])
+    phyper(q-1,m,n,k, lower.tail = F)
+  }
+  
+  R[,2]=unlist(mclapply(1:nrow(R), FUN, mc.cores=CORES))
+  
+  R[,3] = p.adjust(R[,2], adjust.method)
+  
+  # log10 transformation and filtering
+  R = R[order(R$adj.pvalue),]
+  R$pvalue = round(abs(log10(R$pvalue)),1)
+  R$adj.pvalue = round(abs(log10(R$adj.pvalue)),1)
+  return(R)
+}
+
+fisher.matrix.lv=function(logicalVector.list, data, features=NULL, fisher.alternative="greater", adjust.method=NULL, to.log10=T, cores=5){
+  if(!is.null(features))data=data[rownames(data)%in%features,]
+  
+  m=do.call(cbind, mclapply(logicalVector.list, function(lv){
+    
+    featsA=lv
+    featsB=as.list(as.data.frame(t(data)))
+    
+    grid=data.frame(do.call(rbind, mclapply(seq(featsB), function(i)c(table(featsA, featsB[[i]])), mc.cores=1)))
+    colnames(grid)=c("a", "b", "c", "d")
+    
+    if(fisher.alternative=="two.sided"){
+      p.ft <- with(grid, HighSpeedStats::ultrafastfet(a, b, c, d))
+
+    }else{
+      p.ft <- with(grid, mapply(ft.alt, a, b, c, d, fisher.alternative))
+    }  
+  }, mc.cores=5))
+  
+  rownames(m)=rownames(data)
+  colnames(m)=names(logicalVector.list)
+  
+  # order data;
+  m=m[do.call(order, as.data.frame(m)),]
+  
+  if(to.log10)m=round(abs(log10(m)),1)
+  
+  return(m)
+}
+
+FUN_HGT_MW_TEST=function(genes, logicalVector,logicalVector_normals, name, TEST_FAILS, ALTERNATIVE="greater", data = data, profile = profile, CORES=3, HG_PVAL=0.001, MW_PVAL=0.001, P.ADJUST.METH="bonferroni") {
+  
+  # transpose if needed
+  if(any(grepl("TP53", rownames(data))))data=t(data)
+  if(any(grepl("TP53", rownames(profile))))profile=t(profile)
+  
+  # Check some prerequisites
+  if(nrow(data)!=nrow(profile)) stop("data and profile dimension mismatch")
+  if(length(logicalVector)!=nrow(data)) stop("logicalVector dimension mismatch.")
+  
+  # Initialize stuff
+  genes = genes[genes%in%colnames(data)]
+  P = profile[,colnames(profile)%in%genes]
+  D = data[,colnames(data)%in%genes]
+  
+  R = as.data.frame(matrix(NA, nrow = length(genes), ncol = 3))
+  colnames(R) = c("gene", "pvalue", "adj.pvalue")
+  R[,1] = genes
+  
+  
+  #************ Hyperg test, disease association ***************
+  FUN=function(j){
+    r = genes[j]
+    k = sum(logicalVector)
+    x = P[,colnames(P)==r]
+    m = sum(x)
+    n = length(x) - m
+    q = sum(x[logicalVector])
+    phyper(q-1,m,n,k, lower.tail = F)
+  }
+  
+  R[,2]=unlist(mclapply(1:nrow(R), FUN, mc.cores=CORES))
+  
+  R[,3] = p.adjust(R[,2], P.ADJUST.METH)
+  
+  # log10 transformation and filtering
+  R = R[order(R$adj.pvalue),]
+  R = R[R$adj.pvalue < HG_PVAL,] # FILTER
+  R$pvalue = round(abs(log10(R$pvalue)),1)
+  R$adj.pvalue = round(abs(log10(R$adj.pvalue)),1)
+  
+  #******************************************************************
+  
+  if(dim(R)[1]>0){
+    
+    # Mann-whitney, higher expression in normal
+    MW = do.call(rbind, mclapply(1:dim(R)[1],function(i) {
+      x=R[i,]
+      gene = as.character(x[1])
+      S = as.numeric(D[,colnames(D)==gene])
+      logicalVector2=as.numeric(P[,colnames(D)==gene])
+      
+      # choose
+      S_D = S[logicalVector&logicalVector2]
+      S_D = S[logicalVector]
+      
+      ttp=sapply(logicalVector_normals, function(lv){
+        S_normal = D[lv,colnames(D)==gene]
+        tt = wilcox.test(S_D, S_normal,alternative=ALTERNATIVE)
+        return(tt$p.value)
+      })
+      
+      ttp = p.adjust(ttp, P.ADJUST.METH)
+    }))
+    
+    testsig=MW>MW_PVAL
+    
+    name_normals=names(logicalVector_normals)
+    
+    res=do.call(cbind, lapply(seq(name_normals), function(i){
+      ifelse(testsig[,i], name_normals[i], "")
+    }))
+    
+    fails=apply(res, 1, function(v)paste(v[!v==""], collapse=","))
+    
+    # also compute fold change against all normals
+    options("scipen"=100, "digits"=3)
+    FC = do.call(rbind, mclapply(1:dim(R)[1],function(i) {
+      x=R[i,]
+      gene = as.character(x[1])
+      S = as.numeric(D[,colnames(D)==gene])
+      logicalVector2=as.numeric(P[,colnames(D)==gene])
+      
+      # choose
+      S_D = S[logicalVector&logicalVector2]
+      S_D = S[logicalVector]
+      
+      fc=sapply(logicalVector_normals, function(lv){
+        S_normal = D[lv,colnames(D)==gene]
+        FC=mean(S_normal)-mean(S_D)
+      })
+    }, mc.cores=CORES))
+    
+    FoldChange=round(rowMeans(FC)*-1,1)
+    
+    FoldChange_min=round(apply(FC, 1, min)*-1,1)
+    FoldChange_max=round(apply(FC, 1, max)*-1,1)
+    
+    # report all of these
+    df=data.frame(R, FoldChange, fails, FoldChange_min, FoldChange_max)
+    
+  }else{
+    return(NULL)
+  }
+  
+  # output
+  if(dim(df)[1]>0)  return(cbind(df, name, stringsAsFactors=F))
+}
+
+fun.kapplanMeier=function(TIME, STATUS, GROUPS=NULL, CONTINUOUS=NULL, CONTINUOUS_SUMMARY=c("maxrank", "5quantiles","4quantiles","3quantiles","85th_15th_percentile", "80th_20th_percentile", "75th_25th_percentile", "median"), NAME="", MONTHS=F, PVAL=1, INDIVIDUAL_GROUPS=T, LWD=3, labels=NULL, COLORV=NULL, conf.int=F,font.size=8, xlab="Time in months"){
+  
+  CONTINUOUS_SUMMARY <- match.arg(CONTINUOUS_SUMMARY)
+  
+  # cutpoints https://github.com/kassambara/survminer/issues/41
+  if(MONTHS){
+    TIME=as.numeric(TIME)*0.0328767
+  }
+  
+  if(is.null(GROUPS)&!is.null(CONTINUOUS)){
+    df=data.frame("time"=TIME, "status"=STATUS, "continuous"=CONTINUOUS, stringsAsFactors = F)
+    
+    NAME=paste(NAME, CONTINUOUS_SUMMARY)
+    
+    if(CONTINUOUS_SUMMARY=="maxrank"){
+      library(survminer)
+      cutoff <- surv_cutpoint(
+        df,
+        time = "time",
+        event = "status",
+        variables = c("continuous")
+      )
+      # print(plot(cutoff))
+      GROUPS=rep(paste0("low (<", signif(cutoff$cutpoint$cutpoint,1), ")"), length(CONTINUOUS))
+      GROUPS[CONTINUOUS>cutoff$cutpoint$cutpoint]=paste0("High (>=", signif(cutoff$cutpoint$cutpoint,1), ")")
+    }
+    
+    if(CONTINUOUS_SUMMARY=="4quantiles"){
+      
+      library(dplyr)
+      GROUPS <- paste0("Q",ntile(CONTINUOUS, 4))
+      
+    }
+    
+    if(CONTINUOUS_SUMMARY=="3quantiles"){
+      
+      library(dplyr)
+      GROUPS <- paste0("Q",ntile(CONTINUOUS, 3))
+      
+    }
+    
+    if(CONTINUOUS_SUMMARY=="5quantiles"){
+      
+      library(dplyr)
+      GROUPS <- paste0("Q",ntile(CONTINUOUS, 5))
+      
+    }
+    
+    
+    if(CONTINUOUS_SUMMARY=="85th_15th_percentile"){
+      
+      q=quantile(CONTINUOUS, probs = c(0.15, 0.85)) # quartile
+      GROUPS=rep("Q2", length(CONTINUOUS))
+      GROUPS[CONTINUOUS>q[2]]="Q3"
+      GROUPS[CONTINUOUS<q[1]]="Q1"
+      
+    }
+    
+    if(CONTINUOUS_SUMMARY=="80th_20th_percentile"){
+      
+      q=quantile(CONTINUOUS, probs = c(0.2, 0.8)) # quartile
+      GROUPS=rep("Q2", length(CONTINUOUS))
+      GROUPS[CONTINUOUS>q[2]]="Q3"
+      GROUPS[CONTINUOUS<q[1]]="Q1"
+      
+    }
+    
+    if(CONTINUOUS_SUMMARY=="75th_25th_percentile"){
+      
+      q=quantile(CONTINUOUS, probs = c(0.25, 0.75)) # quartile
+      GROUPS=rep("Q2", length(CONTINUOUS))
+      GROUPS[CONTINUOUS>q[2]]="Q3"
+      GROUPS[CONTINUOUS<q[1]]="Q1"
+      
+    }
+    
+    if(CONTINUOUS_SUMMARY=="median"){
+      
+      library(dplyr)
+      GROUPS <- paste0("Q",ntile(CONTINUOUS, 2))
+      
+    }
+  }
+  NAME=gsub("_", " ", NAME)
+  classes=as.character(GROUPS)
+  size=c(length(unique(classes)))
+  
+  if(!is.null(COLORV)){
+    col1 <- COLORV
+  }else{
+    size=c(length(unique(classes)))
+    col1=c("red", "gray75")
+    if(size>2){
+      col1 <- colorRampPalette(c(brewer.pal(min(size, 9), "Set1"), brewer.pal(min(size, 8), "Set2"), brewer.pal(min(size, 9), "Set3")))(size)
+      col1=col1[order(unique(classes))]
+    }
+    if(all(unique(classes)%in%c("Q1", "Q2", "Q3"))){
+      unique(classes)
+      
+      col1=data.frame(c("Q1", "Q2", "Q3"), c("darkblue", "grey75", "red"), stringsAsFactors = F)
+      
+      col1=col1[match(unique(classes), col1[,1]),2]
+    }
+  }
+  
+  if(!INDIVIDUAL_GROUPS){
+    DF.surv2=data.frame("time"=as.numeric(TIME), "status"=as.character(STATUS)=="DECEASED"|as.character(STATUS)=="1"|as.character(STATUS)=="Dead", "x"=factor(GROUPS, levels=unique(GROUPS)), stringsAsFactors=T)
+    d.surv2 <- survfit(Surv(time,status) ~ x, data = DF.surv2, type="kaplan-meier", conf.type="log")
+    
+    
+    # OLD
+    # plot(d.surv2, col=col1, lwd=rep(as.numeric(LWD), length(col1)), mark.time=c(as.numeric(TIME)), mark=3, cex=LWD/2, fun="log", conf.int=F, ylim=c(0, 1), ylab="Percentage Surviving", xlab="Months")
+    # legend("topright", paste(names(d.surv2$strata)," n=",d.surv2$n, sep=""), lwd=rep(LWD, length(col1)), col=col1)
+    # title(paste("Kaplan-Meier Curves\n", NAME))
+    
+    theme0 <- function(...) theme( 
+      # legend.position = "none",
+      panel.background = element_blank(),
+      panel.grid.major = element_blank(),
+      panel.grid.minor = element_blank(),
+      axis.line = element_line(),
+      plot.margin=unit(c(1,1,1,1), "mm"),
+      axis.text.y = element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),  
+      axis.text.x = element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),
+      text =   element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),
+      plot.title = element_text(size=font.size, face = "plain", family="Helvetica")
+      # panel.spacing = unit(0,"null"),
+      # axis.ticks = element_blank(),
+      # axis.text.x = element_blank(),
+      # axis.text.y = element_blank(),
+      # axis.title.x = element_blank(),
+      # axis.title.y = element_blank(),
+      # axis.ticks.length = unit(0,"null"),
+      # panel.border=element_rect(color=NA)
+    )
+    
+    ggsurv <- survminer::ggsurvplot(
+      d.surv2,                     # survfit object with calculated statistics.
+      data = DF.surv2,             # data used to fit survival curves.
+      risk.table = F,       # show risk table.
+      pval = TRUE,             # show p-value of log-rank test.
+      conf.int = conf.int,         # show confidence intervals for 
+      # point estimates of survival curves.
+      palette = col1,
+      pval.size=3,
+      pval.method = T,
+      size = LWD,
+      censor.size = LWD*1.5,
+      censor.shape="|",
+      title=NAME,
+      # xlim = c(0,500),         # present narrower X axis, but not affect
+      # survival estimates.
+      xlab = xlab,
+      # break.time.by = 100,     # break X axis in time intervals by 500.
+      ggtheme = theme0(), # customize plot and risk table with a theme.
+      risk.table.y.text.col = F,# colour risk table text annotations.
+      risk.table.height = 0.25, # the height of the risk table
+      risk.table.y.text = FALSE,# show bars instead of names in text annotations
+      # in legend of risk table.
+      ncensor.plot = F,      # plot the number of censored subjects at time t
+      ncensor.plot.height = 0.25,
+      conf.int.style = "step",  # customize style of confidence intervals
+      # surv.median.line = "hv",  # add the median survival pointer.
+      legend.labs = paste0(unique(DF.surv2$x)," n=", d.surv2$n) # change legend labels.
+    )
+    return(ggsurv)
+    
+  }else{
+    
+    unfeat=unique(as.character(GROUPS))
+    
+    plots=lapply(unfeat, function(a){
+      group=ifelse(GROUPS%in%a, a, "Rest")
+      
+      
+      DF.surv2=data.frame("time"=as.numeric(TIME), "status"=as.character(STATUS)=="DECEASED"|as.character(STATUS)=="1"|as.character(STATUS)=="Dead", "x"=as.character(group), stringsAsFactors=F)
+      d.surv2 <- survfit(Surv(time,status) ~ x, data = DF.surv2, type="kaplan-meier", conf.type="log")
+      
+      classes=as.character(GROUPS)
+      size=c(length(unique(classes)))
+      
+      if(!is.null(COLORV)){
+        col1 <- COLORV
+      }else{
+        size=c(length(unique(classes)))
+        col1=c("red", "gray75")
+      }
+      
+      # why is this here?
+      # if(all(table(DF.surv2[,2], DF.surv2[,3])>=2)){
+      Hazard.t=coxph(Surv(time,status) ~ x, data = DF.surv2)
+      add=signif(summary(Hazard.t)$logtest["pvalue"],2)[1]
+      # }else{
+      #   add=NA
+      # }
+      
+      if(!is.na(add)&add<=PVAL){
+        
+        # plot(d.surv2, col=col1, lwd=rep(as.numeric(LWD), length(col1)), mark.time=c(as.numeric(TIME)), mark=3,cex=LWD/2, fun="log", conf.int=F, ylim=c(0, 1), xlab="Months", ylab="Percentage Surviving")
+        # legend("topright", paste(gsub("x=", "", names(d.surv2$strata))," n=",d.surv2$n, " ", c("", paste0("p.value=", add)), sep=""), lwd=rep(LWD, length(col1)), col=col1)
+        # title(paste("Kaplan-Meier Curves\n", NAME))
+        theme0 <- function(...) theme( 
+          # legend.position = "none",
+          panel.background = element_blank(),
+          panel.grid.major = element_blank(),
+          panel.grid.minor = element_blank(),
+          axis.line = element_line(),
+          plot.margin=unit(c(1,1,1,1), "mm"),
+          axis.text.y = element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),  
+          axis.text.x = element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),
+          text =   element_text(colour="grey20",size=font.size,face="plain", family="Helvetica"),
+          plot.title = element_text(size=font.size, face = "plain", family="Helvetica")
+          # panel.spacing = unit(0,"null"),
+          # axis.ticks = element_blank(),
+          # axis.text.x = element_blank(),
+          # axis.text.y = element_blank(),
+          # axis.title.x = element_blank(),
+          # axis.title.y = element_blank(),
+          # axis.ticks.length = unit(0,"null"),
+          # panel.border=element_rect(color=NA)
+        )
+        
+        
+        ggsurv <- survminer::ggsurvplot(
+          d.surv2,                     # survfit object with calculated statistics.
+          data = DF.surv2,             # data used to fit survival curves.
+          risk.table = F,       # show risk table.
+          pval = TRUE,             # show p-value of log-rank test.
+          conf.int = conf.int,         # show confidence intervals for 
+          # point estimates of survival curves.
+          palette = col1,
+          pval.size=3,
+          pval.method = T,
+          title=NAME,
+          size = LWD,
+          censor.size = LWD*1.5,
+          censor.shape="|",
+          # xlim = c(0,500),         # present narrower X axis, but not affect
+          # survival estimates.
+          xlab = xlab,
+          # break.time.by = 100,     # break X axis in time intervals by 500.
+          ggtheme = theme0(), # customize plot and risk table with a theme.
+          risk.table.y.text.col = F,# colour risk table text annotations.
+          risk.table.height = 0.25, # the height of the risk table
+          risk.table.y.text = FALSE,# show bars instead of names in text annotations
+          # in legend of risk table.
+          ncensor.plot = F,      # plot the number of censored subjects at time t
+          ncensor.plot.height = 0.25,
+          conf.int.style = "step",  # customize style of confidence intervals
+          # surv.median.line = "hv",  # add the median survival pointer.
+          legend.labs = paste0(unique(unique(group))," n=", d.surv2$n) # change legend labels.
+        )
+        # print(ggsurv)
+        return(ggsurv)
+      }
+    })
+    plots.together=arrange_ggsurvplots(plots, print = TRUE,
+                                       ncol = 2, nrow = ceiling(length(plots)/2))
+    return(plots.together)
+  }
+}
+
+ft.alt <- function(a, b, c, d, alternative="greater") {
+  as.numeric(fisher.test(matrix(c(a, b, c, d), 2), alternative = alternative)$p.value)
+}
+
+cor.test.p=function(x, y, method="spearman", use="pairwise.complete.obs") cor.test(x, y, method=method, use=use)[["p.value"]]
+
+cor.test.p.m <- function(x, method="spearman", use="pairwise.complete.obs"){
+  
+  z <- outer(
+    colnames(x),
+    colnames(x),
+    Vectorize(function(i,j){
+      no.pairs=sum(complete.cases(x[,j]==x[,i]))<3
+      if(no.pairs)return(NA)
+      cor.test.p(x[,i], x[,j], method=method, use=use)
+    })
+  )
+  dimnames(z) <- list(colnames(x), colnames(x))
+  z
+}
+
+flat_cor_mat <- function(cor_rho, cor_pval){
+  # cor_3 <- rcorr(as.matrix(mtcars[, 1:7]))
+  #This function provides a simple formatting of a correlation matrix
+  #into a table with 4 columns containing :
+  # Column 1 : row names (variable 1 for the correlation test)
+  # Column 2 : column names (variable 2 for the correlation test)
+  # Column 3 : the correlation coefficients
+  # Column 4 : the p-values of the correlations
+  library(tidyr)
+  library(dplyr)
+  library(tibble)
+  options(scipen=4)
+  cor_rho <- rownames_to_column(as.data.frame(cor_rho), var = "row")
+  cor_rho <- gather(cor_rho, column, cor, -1)
+  cor_pval <- rownames_to_column(as.data.frame(cor_pval), var = "row")
+  cor_pval <- gather(cor_pval, column, p, -1)
+  cor_pval_matrix <- left_join(cor_rho, cor_pval, by = c("row", "column"))
+  colnames(cor_pval_matrix)[1:2]=c("featureA", "featureB")
+  cor_pval_matrix=cor_pval_matrix[!cor_pval_matrix[,1]==cor_pval_matrix[,2],]
+  cor_pval_matrix=cor_pval_matrix[order(cor_pval_matrix[,1], cor_pval_matrix[,3]),]
+  rownames(cor_pval_matrix)=NULL
+  cor_pval_matrix
+}
+
+chart.Correlation.custom=function (R,filterv=NULL, histogram = TRUE, method = c("pearson", "kendall","spearman"), ...){
+  x = checkData(R, method = "matrix")
+  if (missing(method))
+    method = method[1]
+  panel.cor <- function(x, y, digits = 2, prefix = "", use = "pairwise.complete.obs",
+                        method, cex.cor, ...) {
+    usr <- par("usr")
+    on.exit(par(usr))
+    par(usr = c(0, 1, 0, 1))
+    r <- cor(x, y, use = use, method = method)
+    txt <- format(c(r, 0.123456789), digits = digits)[1]
+    txt <- paste(prefix, txt, sep = "")
+    if (missing(cex.cor))
+      cex <- 0.8/strwidth(txt)
+    test <- cor.test(x, y, method = method)
+    Signif <- symnum(test$p.value, corr = FALSE, na = FALSE,
+                     cutpoints = c(0, 0.001, 0.01, 0.05, 0.1, 1), symbols = c("***",
+                                                                              "**", "*", ".", " "))
+    text(0.5, 0.5, txt, cex = cex * (abs(r) + 0.3)/1.3)
+    text(0.8, 0.8, Signif, cex = cex, col = 2)
+  }
+  f <- function(t) {
+    dnorm(t, mean = mean(x), sd = sd.xts(x))
+  }
+  hist.panel = function(x, ...) {
+    par(new = TRUE)
+    hist(x, col = "light gray", probability = TRUE, axes = FALSE,
+         main = "", breaks = "FD")
+    lines(density(x, na.rm = TRUE), col = "red", lwd = 1)
+    rug(x)
+  }
+  if (histogram)
+    pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
+          diag.panel = hist.panel, method = method, subset=colnames(x)%in%rownames(test)[1:5])
+  else pairs(x, gap = 0, lower.panel = panel.smooth, upper.panel = panel.cor,
+             method = method, ...)
+}
+
+# make sure  data is on a linear non-log scale
+# check 0 values, if a lot between 0-1 better to use add.one. If counts, remove works too quite well
+# zero fix methods: https://arxiv.org/pdf/1806.06403.pdf
+gm_mean=function(x, na.rm=TRUE, zero.handle=c("remove", "add.one", "zero.propagate")){
+  zero.handle=match.arg(zero.handle)
+  if(any(x < 0, na.rm = TRUE)){
+    warning("Negative values produced NaN - is the data on linear - non-log scale?")
+    return(NaN)
+  }
+  
+  if(zero.handle=="remove"){
+    return(exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)))
+  }
+  
+  if(zero.handle=="add.one"){
+    return(exp(mean(log(x+1), na.rm = na.rm))-1)
+  }
+  
+  if(zero.handle=="zero.propagate"){
+    if(any(x == 0, na.rm = TRUE)){
+      return(0)
+    }
+    return(exp(mean(log(x), na.rm = na.rm)))
+  }
+  
+}
\ No newline at end of file