Switch to unified view

a b/Functions/analysis.diff.survival.TCGA.R
1
analysis.diff.survival.TCGA <- function(interest.gene, diff.gene.pro, exp.data.process, clin.data, group.by = "median", EnhancedVolcano.plot = T, Box.plot = T, main = NULL, meta.signature = F, single.signature = T){
2
  require(ggpubr)
3
4
  # require interest.gene to be data.frame
5
  if(EnhancedVolcano.plot){
6
    require(EnhancedVolcano)
7
    gene.overlap <- intersect(rownames(diff.gene.pro), rownames(interest.gene))
8
    diff.gene.pro.sig.gene <- diff.gene.pro[gene.overlap,]
9
    interest.gene.sig <- interest.gene[gene.overlap,]
10
11
    up.gene <- which(diff.gene.pro.sig.gene$log2FoldChange>0)
12
    if(length(up.gene)>0){
13
      down.gene <- setdiff(1:nrow(diff.gene.pro.sig.gene), up.gene)
14
      interest.gene.sig$label <- rep("down", nrow(diff.gene.pro.sig.gene))
15
      interest.gene.sig$label[up.gene] <- "up"
16
    }else{
17
      down.gene <- 1:nrow(diff.gene.pro.sig.gene)
18
      interest.gene.sig$label <- rep("down", nrow(diff.gene.pro.sig.gene))
19
    }
20
    
21
    interest.gene.sig$TCGA_log2FoldChange <- diff.gene.pro.sig.gene$log2FoldChange
22
    interest.gene.sig$TCGA_padj <- diff.gene.pro.sig.gene$padj
23
    
24
    p <- EnhancedVolcano(diff.gene.pro.sig.gene,
25
                  lab = rownames(diff.gene.pro.sig.gene),
26
                  selectLab = rownames(diff.gene.pro.sig.gene),
27
                  x = 'log2FoldChange',
28
                  y = 'padj', pCutoff = 0.05, drawConnectors = TRUE, FCcutoff = 1,
29
                  widthConnectors = 0.2,
30
                  title = "Tumor VS Normal", xlab = "", ylab = "-Log10(padj)")
31
    print(p)
32
33
    if(length(gene.overlap) < nrow(interest.gene)){
34
      a <- setdiff(rownames(interest.gene), gene.overlap)
35
      cat("gene ", paste(a, sep = ";"), " not in TCGA!\n")
36
    }else{
37
      cat("All gene in TCGA data!\n")
38
    }
39
  }else{ 
40
    #Require interest.gene to be a character vector
41
    gene.overlap <- intersect(interest.gene, rownames(diff.gene.pro))
42
    diff.gene.pro.sig.gene <- diff.gene.pro[gene.overlap,]
43
    interest.gene.sig <- data.frame(Gene = gene.overlap)
44
    if(length(gene.overlap) < length(interest.gene)){
45
      a <- setdiff(interest.gene, gene.overlap)
46
      cat("gene ", paste(a, sep = ";"), " not in TCGA!\n")
47
    }else{
48
      cat("All gene in TCGA data!\n")
49
    }
50
  }
51
52
    ##Draw the expression barplot of each gene
53
    exp.data.interest <- exp.data.process[gene.overlap,]
54
    sample.id <- colnames(exp.data.process)
55
    sample.id <- substr(sample.id, 1, 16) 
56
    sample.type <- substr(sample.id, 14, 15)
57
58
    normal.index <- which(sample.type == "11") #72
59
    cancer.index <- which(sample.type == "01") #533
60
    patient.type <- rep("Tumor", ncol(exp.data.process))
61
    patient.type[normal.index] <- "Normal"  
62
63
  if(Box.plot){
64
  cat("Normal sample: ", length(normal.index), "\n")
65
  cat("Tumor sample: ", length(cancer.index), "\n")
66
    box.res <- sapply(gene.overlap, function(x){
67
      interest.gene.matrix <- exp.data.process[x,]
68
      genes <- data.frame(type = patient.type, expressionValue = interest.gene.matrix)
69
      p <- ggboxplot(genes, x = "type", y = "expressionValue", ylab = "Normalized Count", color = "type", palette = "jco", add = "jitter", title = x) + stat_compare_means()
70
      print(p)
71
      })
72
  }     
73
74
    ##对应表达和生存数据
75
    sample.id <- substr(sample.id, 1, 15)
76
    patient.overlap <- intersect(clin.data$Sample, sample.id) # 533 tumor samples
77
    exp.data.process.pro <- exp.data.process[,match(patient.overlap, sample.id)]
78
    clin.info <- clin.data[match(patient.overlap, clin.data$Sample),]
79
    
80
  #all gene as the signature
81
  if(meta.signature){
82
    interest.matrix <- exp.data.process.pro[gene.overlap,]
83
    signature.score <- colSums(interest.matrix)/nrow(interest.matrix)
84
    median.score <- median(signature.score)
85
    high.group <- which(signature.score > median.score)
86
    sample.label <- rep("Low group", length(signature.score))
87
    sample.label[high.group] <- "High group"
88
    OS.data <- data.frame(Patient_ID = patient.overlap, event = clin.info$OS, time = clin.info$OS_time, sample.label = sample.label)
89
    DFS.data <- data.frame(Patient_ID = patient.overlap, event = clin.info$DFS, time = clin.info$DFS_time, sample.label = sample.label)
90
    p1 <- plot.surv(OS.data, risk.table = T, HR = T, ylab = "Overall Survival", main = main, surv.median.line = "hv", xlab = "Time (Month)")
91
    print(p1)
92
    p2 <- plot.surv(DFS.data, risk.table = T, HR = T, ylab = "Disease-Free Survival", main = main, surv.median.line = "hv", xlab = "Time (Month)")
93
    print(p2)
94
  }
95
96
    ## single gene model
97
  if(single.signature){
98
    suv.res <- sapply(gene.overlap, function(x){
99
      gene.expression <- as.numeric(exp.data.process.pro[x,])   
100
101
      if(group.by == "top33"){
102
        quantile.value <- quantile(gene.expression, seq(0,1,0.33))
103
        top33 <- as.numeric(quantile.value[3])
104
        bottom33 <- as.numeric(quantile.value[2])
105
        index1 <- which(gene.expression >= top33)
106
        index2 <- which(gene.expression <= bottom33)
107
        sample.label <- c(rep("Low group", length(index1)), rep("High group", length(index2)))
108
        clin.info <- clin.info[c(index2,index1),]
109
        OS.data <- data.frame(Patient_ID = patient.overlap[c(index2,index1)], event = clin.info$OS, time = clin.info$OS_time, sample.label = sample.label)
110
        DFS.data <- data.frame(Patient_ID = patient.overlap[c(index2,index1)], event = clin.info$DFS, time = clin.info$DFS_time, sample.label = sample.label)
111
      }else{
112
        # median as threshold
113
        med.exp <- median(gene.expression)
114
        high.group <- which(gene.expression>med.exp)
115
        sample.label <- rep("Low group", length(gene.expression))
116
        sample.label[high.group] <- "High group"
117
        OS.data <- data.frame(Patient_ID = patient.overlap, event = clin.info$OS, time = clin.info$OS_time, sample.label = sample.label)
118
        DFS.data <- data.frame(Patient_ID = patient.overlap, event = clin.info$DFS, time = clin.info$DFS_time, sample.label = sample.label)
119
      }
120
      p1 <- plot.surv(OS.data, risk.table = T, HR = T, ylab = "Overall Survival", main = x, surv.median.line = "hv", xlab = "Time (Month)")
121
      print(p1)
122
      p2 <- plot.surv(DFS.data, risk.table = T, HR = T, ylab = "Disease-Free Survival", main = x, surv.median.line = "hv", xlab = "Time (Month)")
123
      print(p2)
124
      
125
      OS.obj <- coxph(Surv(time, event)~sample.label, data=OS.data)
126
      DFS.obj <- coxph(Surv(time, event)~sample.label, data=DFS.data)
127
      return(c(summary(OS.obj)$logtest[3], summary(DFS.obj)$logtest[3]))
128
    })
129
    interest.gene.sig$OS_logrank_p <- suv.res[1,]
130
    interest.gene.sig$DFS_logrank_p <- suv.res[2,] 
131
  }
132
133
  return(interest.gene.sig)
134
}
135
136
plot.surv <- function(clinical.data, upper.time = NULL, xscale = 1, xlab = "Time", median.time = TRUE, 
137
                      surv.median.line = "none", HR = FALSE, risk.table = TRUE, pval = TRUE, 
138
                      conf.int = FALSE, main = NULL, ylab = "Survival probability", colors = c("#D95319", "#3B6793","#EA4335","#4285F4","#34A853","#000000")) {
139
  
140
  #load R package
141
  require(survival)
142
  require(survminer)
143
  require(RColorBrewer)
144
  require(gridExtra)
145
  
146
  # Determine the type of event and the unit of time
147
  # survival.event <- survival.event[1];
148
  # unit.xlabel <- unit.xlabel[1];
149
  
150
  # If upper.time is set, the samples whose survival time exceeds upper.time will be removed
151
  if (!is.null(upper.time)) clinical.data <- clinical.data[clinical.data$time <= upper.time,]
152
  
153
  # #date format
154
  # xSL <- data.frame(xScale=c(1,7,30,365.25),xLab=c("Days","Weeks","Months","Years"), stringsAsFactors=FALSE)
155
  # switch(unit.xlabel, year={xScale <- 365.25;}, month={xScale <- 30;}, week={xScale <- 7;}, day={xScale <- 1})
156
  # xLab <- xSL[which(xSL[,1]==xScale),2];
157
  
158
  # color
159
  if (!is.factor(clinical.data$sample.label)) 
160
    clinical.data$sample.label <- as.factor(clinical.data$sample.label)
161
  
162
  t.name <- levels(clinical.data$sample.label)
163
  
164
  if (length(t.name) > 6) stop("Sample grouping >6, exceeding the function acceptance range!")
165
  t.col <- colors[1:length(t.name)]
166
  
167
  # 构造生存对象
168
  km.curves <- survfit(Surv(time, event)~sample.label, data=clinical.data)
169
  
170
  # HR and 95%CI
171
  if (length(t.name) == 2) {
172
    if (HR) {
173
      cox.obj <- coxph(Surv(time, event)~sample.label, data=clinical.data)
174
      tmp <- summary(cox.obj)
175
      HRs <- round(tmp$coefficients[ ,2], digits = 2)
176
      HR.confint.lower <- round(tmp$conf.int[,"lower .95"], 2)
177
      HR.confint.upper <- round(tmp$conf.int[,"upper .95"], 2)
178
      HRs <- paste0(HRs, " (", HR.confint.lower, "-", HR.confint.upper, ")")      
179
    }
180
  }
181
  
182
  # Construct the legend display text in the survival image
183
  legend.content <- substr(names(km.curves$strata), start = 14, stop = 1000)
184
  
185
  # x-axis scale unit conversion
186
  if (is.numeric(xscale) | (xscale %in% c("d_m", "d_y", "m_d", "m_y", "y_d", "y_m"))) {
187
    xscale = xscale
188
  } else {
189
    stop('xscale should be numeric or one of c("d_m", "d_y", "m_d", "m_y", "y_d", "y_m").')
190
  }
191
  
192
  # conversion of survival time units
193
  .format_xticklabels <- function(labels, xscale){
194
    # 1 year = 365.25 days
195
    # 1 month = 365.25/12 = 30.4375 days
196
    if (is.numeric(xscale)) xtrans <- 1/xscale
197
    else
198
      xtrans <- switch(xscale,
199
                       d_m = 12/365.25,
200
                       d_y = 1/365.25,
201
                       m_d = 365.25/12,
202
                       m_y = 1/12,
203
                       y_d = 365.25,
204
                       y_m = 12,
205
                       1
206
      )
207
    round(labels*xtrans,2)
208
  }
209
  
210
  # Add the median survival time and its 95% CI in the figure and place it in the subtitle position
211
  subtitle <- NULL
212
  if (median.time) {
213
    if (is.numeric(xscale)) {
214
      median.km.obj = km.curves
215
    } else if (xscale %in% c("d_m", "d_y", "m_d", "m_y", "y_d", "y_m")) {
216
      clinical.data$time <- .format_xticklabels(labels = clinical.data$time, xscale = xscale)
217
      median.km.obj <- survfit(Surv(time, event)~sample.label, data=clinical.data)
218
    }
219
    survival.time.info <- NULL
220
    survival.time.info <- rbind(survival.time.info, summary(median.km.obj)$table)
221
    median.survival <- round(survival.time.info[!duplicated(survival.time.info[,7:9]),7:9], digits = 2) # 注意:这里取得的置信区间上界可能为NA
222
    if (length(levels(clinical.data$sample.label)) == 1) {
223
      tmp1 <- levels(clinical.data$sample.label)
224
    } else {
225
      tmp1 <- do.call(rbind,strsplit(rownames(summary(median.km.obj)$table), split = "="))[,2]
226
    }
227
    tmp2 <- paste(median.survival[,1], "(", median.survival[,2], "-", median.survival[,3], ")")
228
    subtitle <- paste(tmp1, tmp2, sep = ":", collapse = "\n")
229
  }
230
  
231
  # ggsurvplot
232
  ggsurv <- ggsurvplot(km.curves,               # survfit object with calculated statistics.
233
                       data = clinical.data,             # data used to fit survival curves.
234
                       palette = t.col,
235
                       
236
                       risk.table = risk.table,       # show risk table.
237
                       pval = pval,             # show p-value of log-rank test.
238
                       surv.median.line = surv.median.line,  # add the median survival pointer.
239
                       title = main,     #main title
240
                       subtitle = subtitle, #sub title
241
                       font.main = 15,                 
242
                       xlab = xlab,   # customize X axis label.
243
                       ylab = ylab,   # customize Y axis label
244
                       xscale = xscale,
245
                       
246
                       
247
                       #legend
248
                       legend.title = "", 
249
                       legend.labs = legend.content, 
250
                       legend = c(0.8,0.9), 
251
                       font.legend = 9,     
252
                       
253
                       #risk table
254
                       tables.theme = theme_cleantable(),
255
                       risk.table.title = "No. at risk:",
256
                       risk.table.y.text.col = T, 
257
                       risk.table.y.text = FALSE, 
258
                       tables.height = 0.15,      
259
                       risk.table.fontsize = 3  
260
  )
261
  if (length(t.name) == 2) {
262
    if (HR) 
263
      ggsurv$plot <- ggsurv$plot + ggplot2::annotate("text", x = max(km.curves$time)/12, 
264
                                                     y = 0.15, size = 5, label = paste("HR=", HRs))
265
  } 
266
267
  ggsurv$plot <- ggsurv$plot + theme(plot.title = element_text(hjust = 0.5), plot.subtitle = element_text(size = 10), 
268
                                     plot.margin = unit(c(5.5, 5.5, 5.5, 50), "points"))
269
270
  ggsurv$table <- ggsurv$table + theme(plot.title = element_text(hjust = -0.04),
271
                                       plot.margin = unit(c(5.5, 5.5, 5.5, 50), "points"))
272
  
273
274
  if(length(t.name) > 2) {
275
    # pairwise: log rank P value
276
    res <- pairwise_survdiff(Surv(time, event)~sample.label, data=clinical.data);
277
    pairwise.pvalue <- round(res$p.value, digits = 4);
278
    pairwise.pvalue[which(pairwise.pvalue < 0.0001)] <- "<0.0001";
279
    pairwise.pvalue[is.na(pairwise.pvalue)] <- "-"
280
    
281
    # add table
282
    tt <- ttheme_minimal(core = list(fg_params = list(col = "black"),bg_params = list(fill = NA, col = "black")),
283
                         colhead = list(fg_params = list(col = NA),bg_params = list(fill = t.col, col = "black")),
284
                         rowhead = list(fg_params = list(col = NA, hjust = 1),bg_params = list(fill = c("white",t.col[-1]), col = "black"))
285
    )
286
    pairwise.table <- tableGrob(pairwise.pvalue, theme = tt)
287
    ggsurv <- ggarrange(ggarrange(ggsurv$plot, ggsurv$table, nrow=2, heights=c(2,0.5)),
288
                        pairwise.table, nrow=2, heights = c(2,0.5),
289
                        labels = c("","p from pairwise comparisons"),
290
                        hjust = 0, font.label = list(size = 15, face = "plain"))
291
  }
292
  
293
  ggsurv    
294
}