--- a +++ b/R/cleanMetadata.GSE34171.R @@ -0,0 +1,130 @@ +#' @rdname cleanMetadata +#' @details +#' GSE34171:\cr +#' The cleanup of GSE34171 (MDFCI) adds three batches corresponding to each +#' platform (HG-U133 plus 2, HG-U133A, HG-U133B). +#' @export +cleanMetadata.GSE34171 <- function(meta_data) { + message("Cleaning GSE34171 (MDFCI)!") + + # Generic clean + suppressMessages(meta_data <- cleanMetadata.data.frame(meta_data)) + + # Added factor describing the batches and CEL files + meta_data$Batch <- factor(meta_data$platform_id, + levels = c("GPL570", "GPL96", "GPL97")) + meta_data$CEL <- + gsub("^.+/(GSM[0-9]+)\\..+$", "\\1", meta_data$supplementary_file) + meta_data$GSM <- as.character(meta_data$geo_accession) + rownames(meta_data) <- meta_data$CEL + + return(meta_data) +} + + +# meta_data_Clinical <- +# read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata", +# "GSE34171_clinical_info.txt"), +# header = TRUE, stringsAsFactors = FALSE) +# +# meta_data_Outcome <- +# read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata", +# "GSE34171_outcome_data.txt"), +# skip = 2, stringsAsFactors = FALSE) +# colnames(meta_data_Outcome) <- c("Title", "Class", "os", "Followup") +# +# meta_data_Sample <- +# read.csv(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata", +# "sample.csv"), +# stringsAsFactors = FALSE) +# +# meta_data <- merge(merge(meta_data_Sample, +# meta_data_Outcome, all.x=TRUE, all.y=TRUE), +# meta_data_Clinical, all.x=TRUE) +# +# table(meta_data$Title, meta_data$Platform) +# +# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title & +# meta_data_Sample$Platform == "GPL570" , c(1, 2)] +# +# colnames(xx)[1] <- "HGU133Plus2" +# meta_data <- merge(meta_data, xx, all.x = TRUE) +# +# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title & +# meta_data_Sample$Platform == "GPL6801" , c(1, 2)] +# +# colnames(xx)[1] <- "GenomeWideSNP6" +# meta_data <- merge(meta_data, xx, all.x = TRUE) +# +# +# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title & +# meta_data_Sample$Platform == "GPL96" , c(1, 2)] +# +# colnames(xx)[1] <- "HGU133A" +# meta_data <- merge(meta_data, xx, all.x = TRUE) +# +# xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title & +# meta_data_Sample$Platform == "GPL97" , c(1, 2)] +# +# colnames(xx)[1] <- "HGU133B" +# meta_data <- merge(meta_data, xx, all.x = TRUE) +# +# +# +# +# meta_data <- meta_data[!duplicated(meta_data$Title),] +# +# meta_data$IPI <- as.factor(as.numeric(meta_data$IPI)) +# +# meta_data$ipi.hl <- ifelse(is.na(meta_data$IPI), NA, "0-1") +# meta_data$ipi.hl[meta_data$IPI %in% c(2, 3)] <- "2-3" +# meta_data$ipi.hl[meta_data$IPI %in% c(4, 5)] <- "4-5" +# +# table(meta_data[, c("IPI", "ipi.hl")]) +# +# Monti.Suppl5 <- +# read.delim("../Litterature/Monti_Suppl5_NIHMS398769-supplement-06(1).txt", +# stringsAsFactors = FALSE) +# +# rownames(Monti.Suppl5) <- Monti.Suppl5$CaseID +# +# colnames(Monti.Suppl5)[5] <- "WrightClass" +# +# +# +# meta_data <- merge(meta_data, Monti.Suppl5, +# by.x = "Title", all.x=TRUE, +# by.y = "CaseID") +# +# +# meta_data$TP53.mut[meta_data$TP53.mut == "na"] <- NA +# +# +# +# +# +# ABCGCBclass <- read.delim(file.path(MDFCI.ext.dir, +# "../ABCGCBclassification", +# "ABCGCBclass.txt")) +# meta_data$WrightClass_own <- +# ABCGCBclass[meta_data$Title,2] +# +# ABCGCBclass[paste(meta_data$GPL570, ".CEL", sep = ""),2] +# +# +# meta_data$OS <- Surv(meta_data$os/365.25, meta_data$Followup) +# +# os5 <- ifelse(meta_data$os/365.25 > 5, 5, meta_data$os/365.25) +# ios5 <- pmin(ifelse(meta_data$os/365.25 > 5, 0, 1), meta_data$Followup) +# +# meta_data$OS5 <- Surv(as.numeric(os5), ios5) +# +# +# meta_data$HGU133Plus2[meta_data$HGU133Plus2 == "GSM844275"] <- NA +# +# meta_data <- meta_data[, c( +# "Title", "Class", "os", "Followup", "Type", "Entity", "Primary", +# "IPI", "HGU133Plus2", "GenomeWideSNP6", "HGU133A", "HGU133B", "ipi.hl", +# "SNP.ScanID", "GEP.SampleID", "CCC..Best.10.13.", "WrightClass", +# "WrightClass_own", "TP53.mut")] +