Switch to unified view

a b/R/cleanMetadata.GSE34171.R
1
#' @rdname cleanMetadata
2
#' @details
3
#'    GSE34171:\cr
4
#'    The cleanup of GSE34171 (MDFCI) adds three batches corresponding to each
5
#'    platform (HG-U133 plus 2, HG-U133A, HG-U133B).
6
#' @export
7
cleanMetadata.GSE34171 <- function(meta_data) {
8
  message("Cleaning GSE34171 (MDFCI)!")
9
10
  # Generic clean
11
  suppressMessages(meta_data <- cleanMetadata.data.frame(meta_data))
12
13
  # Added factor describing the batches and CEL files
14
  meta_data$Batch <- factor(meta_data$platform_id,
15
                            levels = c("GPL570", "GPL96", "GPL97"))
16
  meta_data$CEL <-
17
   gsub("^.+/(GSM[0-9]+)\\..+$", "\\1", meta_data$supplementary_file)
18
  meta_data$GSM <- as.character(meta_data$geo_accession)
19
  rownames(meta_data) <- meta_data$CEL
20
21
  return(meta_data)
22
}
23
24
25
#   meta_data_Clinical <-
26
#     read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
27
#                               "GSE34171_clinical_info.txt"),
28
#                header = TRUE, stringsAsFactors = FALSE)
29
#
30
#   meta_data_Outcome <-
31
#     read.table(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
32
#                               "GSE34171_outcome_data.txt"),
33
#                skip = 2, stringsAsFactors = FALSE)
34
#   colnames(meta_data_Outcome) <- c("Title", "Class", "os", "Followup")
35
#
36
#   meta_data_Sample <-
37
#     read.csv(file=file.path(MDFCI.ext.dir, "../RawData", "Metadata",
38
#                             "sample.csv"),
39
#              stringsAsFactors = FALSE)
40
#
41
#   meta_data <- merge(merge(meta_data_Sample,
42
#                                meta_data_Outcome, all.x=TRUE, all.y=TRUE),
43
#                          meta_data_Clinical, all.x=TRUE)
44
#
45
#   table(meta_data$Title, meta_data$Platform)
46
#
47
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
48
#                                meta_data_Sample$Platform == "GPL570" , c(1, 2)]
49
#
50
#   colnames(xx)[1] <- "HGU133Plus2"
51
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
52
#
53
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
54
#                                meta_data_Sample$Platform == "GPL6801" , c(1, 2)]
55
#
56
#   colnames(xx)[1] <- "GenomeWideSNP6"
57
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
58
#
59
#
60
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
61
#                                meta_data_Sample$Platform == "GPL96" , c(1, 2)]
62
#
63
#   colnames(xx)[1] <- "HGU133A"
64
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
65
#
66
#   xx <- meta_data_Sample[meta_data_Sample$Title %in% meta_data$Title &
67
#                                meta_data_Sample$Platform == "GPL97" , c(1, 2)]
68
#
69
#   colnames(xx)[1] <- "HGU133B"
70
#   meta_data <- merge(meta_data, xx, all.x = TRUE)
71
#
72
#
73
#
74
#
75
#   meta_data <- meta_data[!duplicated(meta_data$Title),]
76
#
77
#   meta_data$IPI <- as.factor(as.numeric(meta_data$IPI))
78
#
79
#   meta_data$ipi.hl <- ifelse(is.na(meta_data$IPI), NA, "0-1")
80
#   meta_data$ipi.hl[meta_data$IPI %in% c(2, 3)] <- "2-3"
81
#   meta_data$ipi.hl[meta_data$IPI %in% c(4, 5)] <- "4-5"
82
#
83
#   table(meta_data[, c("IPI", "ipi.hl")])
84
#
85
#   Monti.Suppl5 <-
86
#     read.delim("../Litterature/Monti_Suppl5_NIHMS398769-supplement-06(1).txt",
87
#                stringsAsFactors = FALSE)
88
#
89
#   rownames(Monti.Suppl5) <- Monti.Suppl5$CaseID
90
#
91
#   colnames(Monti.Suppl5)[5] <- "WrightClass"
92
#
93
#
94
#
95
#   meta_data <- merge(meta_data, Monti.Suppl5,
96
#                          by.x = "Title", all.x=TRUE,
97
#                          by.y = "CaseID")
98
#
99
#
100
#   meta_data$TP53.mut[meta_data$TP53.mut == "na"] <- NA
101
#
102
#
103
#
104
#
105
#
106
#   ABCGCBclass <- read.delim(file.path(MDFCI.ext.dir,
107
#                                       "../ABCGCBclassification",
108
#                                       "ABCGCBclass.txt"))
109
#   meta_data$WrightClass_own <-
110
#     ABCGCBclass[meta_data$Title,2]
111
#
112
#   ABCGCBclass[paste(meta_data$GPL570, ".CEL", sep = ""),2]
113
#
114
#
115
#   meta_data$OS <- Surv(meta_data$os/365.25, meta_data$Followup)
116
#
117
#   os5  <- ifelse(meta_data$os/365.25 > 5, 5, meta_data$os/365.25)
118
#   ios5 <- pmin(ifelse(meta_data$os/365.25 > 5, 0, 1), meta_data$Followup)
119
#
120
#   meta_data$OS5 <- Surv(as.numeric(os5), ios5)
121
#
122
#
123
#   meta_data$HGU133Plus2[meta_data$HGU133Plus2 == "GSM844275"] <- NA
124
#
125
#   meta_data <- meta_data[, c(
126
#     "Title", "Class", "os", "Followup", "Type", "Entity", "Primary",
127
#     "IPI", "HGU133Plus2", "GenomeWideSNP6", "HGU133A", "HGU133B", "ipi.hl",
128
#     "SNP.ScanID", "GEP.SampleID", "CCC..Best.10.13.", "WrightClass",
129
#     "WrightClass_own", "TP53.mut")]
130