[28aa3b]: / R / cleanMetadata.GSE34171.R

Download this file

131 lines (124 with data), 4.4 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
#' @rdname cleanMetadata
#' @details
#' GSE34171:\cr
#' The cleanup of GSE34171 (MDFCI) adds three batches corresponding to each
#' platform (HG-U133 plus 2, HG-U133A, HG-U133B).
#' @export
cleanMetadata.GSE34171 <- function(meta_data) {
message("Cleaning GSE34171 (MDFCI)!")
# Generic clean
suppressMessages(meta_data <- cleanMetadata.data.frame(meta_data))
# Added factor describing the batches and CEL files
meta_data$Batch <- factor(meta_data$platform_id,
levels = c("GPL570", "GPL96", "GPL97"))
meta_data$CEL <-
gsub("^.+/(GSM[0-9]+)\\..+$", "\\1", meta_data$supplementary_file)
meta_data$GSM <- as.character(meta_data$geo_accession)
rownames(meta_data) <- meta_data$CEL
return(meta_data)
}
# meta_data_Clinical <-
# read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
# "GSE34171_clinical_info.txt"),
# header = TRUE, stringsAsFactors = FALSE)
#
# meta_data_Outcome <-
# read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
# "GSE34171_outcome_data.txt"),
# skip = 2, stringsAsFactors = FALSE)
# colnames(meta_data_Outcome) <- c("Title", "Class", "os", "Followup")
#
# meta_data_Sample <-
# read.csv(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
# "sample.csv"),
# stringsAsFactors = FALSE)
#
# meta_data <- merge(merge(meta_data_Sample,
# meta_data_Outcome, all.x=TRUE, all.y=TRUE),
# meta_data_Clinical, all.x=TRUE)
#
# table(meta_data$Title, meta_data$Platform)
#
# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
# meta_data_Sample$Platform == "GPL570" , c(1, 2)]
#
# colnames(xx)[1] <- "HGU133Plus2"
# meta_data <- merge(meta_data, xx, all.x = TRUE)
#
# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
# meta_data_Sample$Platform == "GPL6801" , c(1, 2)]
#
# colnames(xx)[1] <- "GenomeWideSNP6"
# meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#
# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
# meta_data_Sample$Platform == "GPL96" , c(1, 2)]
#
# colnames(xx)[1] <- "HGU133A"
# meta_data <- merge(meta_data, xx, all.x = TRUE)
#
# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
# meta_data_Sample$Platform == "GPL97" , c(1, 2)]
#
# colnames(xx)[1] <- "HGU133B"
# meta_data <- merge(meta_data, xx, all.x = TRUE)
#
#
#
#
# meta_data <- meta_data[!duplicated(meta_data$Title),]
#
# meta_data$IPI <- as.factor(as.numeric(meta_data$IPI))
#
# meta_data$ipi.hl <- ifelse(is.na(meta_data$IPI), NA, "0-1")
# meta_data$ipi.hl[meta_data$IPI %in% c(2, 3)] <- "2-3"
# meta_data$ipi.hl[meta_data$IPI %in% c(4, 5)] <- "4-5"
#
# table(meta_data[, c("IPI", "ipi.hl")])
#
# Monti.Suppl5 <-
# read.delim("../Litterature/Monti_Suppl5_NIHMS398769-supplement-06(1).txt",
# stringsAsFactors = FALSE)
#
# rownames(Monti.Suppl5) <- Monti.Suppl5$CaseID
#
# colnames(Monti.Suppl5)[5] <- "WrightClass"
#
#
#
# meta_data <- merge(meta_data, Monti.Suppl5,
# by.x = "Title", all.x=TRUE,
# by.y = "CaseID")
#
#
# meta_data$TP53.mut[meta_data$TP53.mut == "na"] <- NA
#
#
#
#
#
# ABCGCBclass <- read.delim(file.path(MDFCI.ext.dir,
# "../ABCGCBclassification",
# "ABCGCBclass.txt"))
# meta_data$WrightClass_own <-
# ABCGCBclass[meta_data$Title,2]
#
# ABCGCBclass[paste(meta_data$GPL570, ".CEL", sep = ""),2]
#
#
# meta_data$OS <- Surv(meta_data$os/365.25, meta_data$Followup)
#
# os5 <- ifelse(meta_data$os/365.25 > 5, 5, meta_data$os/365.25)
# ios5 <- pmin(ifelse(meta_data$os/365.25 > 5, 0, 1), meta_data$Followup)
#
# meta_data$OS5 <- Surv(as.numeric(os5), ios5)
#
#
# meta_data$HGU133Plus2[meta_data$HGU133Plus2 == "GSM844275"] <- NA
#
# meta_data <- meta_data[, c(
# "Title", "Class", "os", "Followup", "Type", "Entity", "Primary",
# "IPI", "HGU133Plus2", "GenomeWideSNP6", "HGU133A", "HGU133B", "ipi.hl",
# "SNP.ScanID", "GEP.SampleID", "CCC..Best.10.13.", "WrightClass",
# "WrightClass_own", "TP53.mut")]