|
a |
|
b/R/simplify.R |
|
|
1 |
# simplify TCGA data download workflow |
|
|
2 |
## ------------------------------------ |
|
|
3 |
# Typical Cohorts Structure |
|
|
4 |
|
|
|
5 |
# Given GBM as an example: <https://xenabrowser.net/datapages/?cohort=TCGA%20Glioblastoma%20(GBM)> |
|
|
6 |
# Cohorts |
|
|
7 |
# Copy Number |
|
|
8 |
# gistic2 |
|
|
9 |
# gistic2 thresholded |
|
|
10 |
# Copy Number Segments |
|
|
11 |
# After remove germline cnv |
|
|
12 |
# Before remove germline cnv |
|
|
13 |
# DNA Methylation |
|
|
14 |
# Methylation27k |
|
|
15 |
# Methylation450k |
|
|
16 |
# Exon Expression RNASeq |
|
|
17 |
# IlluminaHiSeq |
|
|
18 |
# Gene Expression Array |
|
|
19 |
# AffyU133a (always change) |
|
|
20 |
# Gene Expression RNASeq |
|
|
21 |
# IlluminaHiSeq |
|
|
22 |
# IlluminaHiSeq pancan normalized |
|
|
23 |
# IlluminaHiSeq percentile |
|
|
24 |
# miRNA Mature Strand Expression RNASeq |
|
|
25 |
# IlluminaHiseq |
|
|
26 |
# PARADIGM Pathway Activity |
|
|
27 |
# expression |
|
|
28 |
# expression (array) + CNV |
|
|
29 |
# expression + CNV |
|
|
30 |
# exprssion (array) |
|
|
31 |
# Phenotype |
|
|
32 |
# Phenotypes |
|
|
33 |
# Protein Expression RPPA |
|
|
34 |
# RPPA |
|
|
35 |
# RPPA (replicate-base normalization) |
|
|
36 |
# Somatic Mutation (SNPs and small INDELs) |
|
|
37 |
# broad |
|
|
38 |
# ucsc automated |
|
|
39 |
# Somatic non-silent mutation (gene-level) |
|
|
40 |
# broad |
|
|
41 |
# PANCAN AWG |
|
|
42 |
# ucsc automated |
|
|
43 |
# Transcription factor regulatory impact |
|
|
44 |
# Agilent, by RABIT |
|
|
45 |
# HiSeqV2, by RABIT |
|
|
46 |
# U133A, by RABIT |
|
|
47 |
|
|
|
48 |
# compiler::setCompilerOptions(suppressAll = TRUE) |
|
|
49 |
# suppress Binding Notes |
|
|
50 |
# suppressBindingNotes <- function(variablesMentionedInNotes) { |
|
|
51 |
# for(variable in variablesMentionedInNotes) { |
|
|
52 |
# assign(variable, NULL, envir = .GlobalEnv) |
|
|
53 |
# } |
|
|
54 |
# } |
|
|
55 |
|
|
|
56 |
# suppressBindingNotes(c("XenaHostNames","XenaCohorts", "ProjectID", "DataType", "FileType")) |
|
|
57 |
|
|
|
58 |
|
|
|
59 |
##' @title Get TCGA Common Data Sets by Project ID and Property |
|
|
60 |
##' @description This is the most useful function for user to download common |
|
|
61 |
##' TCGA datasets, it is similar to `getFirehoseData` function in `RTCGAToolbox` |
|
|
62 |
##' package. |
|
|
63 |
##' @details TCGA Common Data Sets are frequently used for biological analysis. |
|
|
64 |
##' To make easier to achieve these data, this function provide really easy |
|
|
65 |
##' options to choose datasets and behavior. All availble information about |
|
|
66 |
##' datasets of TCGA can access vis `availTCGA()` and check with `showTCGA()`. |
|
|
67 |
##' @author Shixiang Wang <w_shixiang@163.com> |
|
|
68 |
##' @inheritParams downloadTCGA |
|
|
69 |
##' @param clinical logical. if `TRUE`, download clinical information. Default is `TRUE`. |
|
|
70 |
##' @param download logical. if `TRUE`, download data, otherwise return a result list include data |
|
|
71 |
##' information. Default is `FALSE`. You can set this to `FALSE` if you want to check what you will download or |
|
|
72 |
##' use other function provided by `UCSCXenaTools` to filter result datasets you want to download. |
|
|
73 |
##' @param forceDownload logical. if `TRUE`, force to download files no matter if exist. Default is `FALSE`. |
|
|
74 |
##' @param mRNASeq logical. if `TRUE`, download mRNASeq data. Default is `FALSE`. |
|
|
75 |
##' @param mRNAArray logical. if `TRUE`, download mRNA microarray data. Default is `FALSE`. |
|
|
76 |
##' @param mRNASeqType character vector. Can be one, two or three |
|
|
77 |
##' in `c("normalized", "pancan normalized", "percentile")`. |
|
|
78 |
##' @param miRNASeq logical. if `TRUE`, download miRNASeq data. Default is `FALSE`. |
|
|
79 |
##' @param exonRNASeq logical. if `TRUE`, download exon RNASeq data. Default is `FALSE`. |
|
|
80 |
##' @param RPPAArray logical. if `TRUE`, download RPPA data. Default is `FALSE`. |
|
|
81 |
##' @param ReplicateBaseNormalization logical. if `TRUE`, download RPPA data by Replicate Base |
|
|
82 |
##' Normalization (RBN). Default is `FALSE`. |
|
|
83 |
##' @param Methylation logical. if `TRUE`, download DNA Methylation data. Default is `FALSE`. |
|
|
84 |
##' @param MethylationType character vector. Can be one or two in `c("27K", "450K")`. |
|
|
85 |
##' @param GeneMutation logical. if `TRUE`, download gene mutation data. Default is `FALSE`. |
|
|
86 |
##' @param SomaticMutation logical. if `TRUE`, download somatic mutation data. Default is `FALSE`. |
|
|
87 |
##' @param GisticCopyNumber logical. if `TRUE`, download Gistic2 Copy Number data. Default is `FALSE`. |
|
|
88 |
##' @param Gistic2Threshold logical. if `TRUE`, download Threshold Gistic2 data. Default is `TRUE`. |
|
|
89 |
##' @param CopyNumberSegment logical. if `TRUE`, download Copy Number Segment data. Default is `FALSE`. |
|
|
90 |
##' @param RemoveGermlineCNV logical. if `TRUE`, download Copy Number Segment data which has removed |
|
|
91 |
##' germline copy number variation. Default is `TRUE`. |
|
|
92 |
##' @return if `download=TRUE`, return `data.frame` from `XenaDownload`, |
|
|
93 |
##' otherwise return a list including `XenaHub` object and datasets information |
|
|
94 |
##' @export |
|
|
95 |
##' @examples |
|
|
96 |
##' ###### get data, but not download |
|
|
97 |
##' |
|
|
98 |
##' # 1 choose project and data types you wanna download |
|
|
99 |
##' getTCGAdata(project = "LUAD", mRNASeq = TRUE, mRNAArray = TRUE, |
|
|
100 |
##' mRNASeqType = "normalized", miRNASeq = TRUE, exonRNASeq = TRUE, |
|
|
101 |
##' RPPAArray = TRUE, Methylation = TRUE, MethylationType = "450K", |
|
|
102 |
##' GeneMutation = TRUE, SomaticMutation = TRUE) |
|
|
103 |
##' |
|
|
104 |
##' # 2 only choose 'LUAD' and its clinical data |
|
|
105 |
##' getTCGAdata(project = "LUAD") |
|
|
106 |
##' \dontrun{ |
|
|
107 |
##' ###### download datasets |
|
|
108 |
##' |
|
|
109 |
##' # 3 download clinical datasets of LUAD and LUSC |
|
|
110 |
##' getTCGAdata(project = c("LUAD", "LUSC"), clinical = TRUE, download = TRUE) |
|
|
111 |
##' |
|
|
112 |
##' # 4 download clinical, RPPA and gene mutation datasets of LUAD and LUSC |
|
|
113 |
##' # getTCGAdata(project = c("LUAD", "LUSC"), clinical = TRUE, RPPAArray = TRUE, GeneMutation = TRUE) |
|
|
114 |
##' } |
|
|
115 |
getTCGAdata <- function(project = NULL, |
|
|
116 |
clinical = TRUE, |
|
|
117 |
download = FALSE, |
|
|
118 |
forceDownload = FALSE, |
|
|
119 |
destdir = tempdir(), |
|
|
120 |
mRNASeq = FALSE, |
|
|
121 |
mRNAArray = FALSE, |
|
|
122 |
mRNASeqType = "normalized", |
|
|
123 |
miRNASeq = FALSE, |
|
|
124 |
exonRNASeq = FALSE, |
|
|
125 |
RPPAArray = FALSE, |
|
|
126 |
ReplicateBaseNormalization = FALSE, |
|
|
127 |
Methylation = FALSE, |
|
|
128 |
MethylationType = c("27K", "450K"), |
|
|
129 |
GeneMutation = FALSE, |
|
|
130 |
SomaticMutation = FALSE, |
|
|
131 |
GisticCopyNumber = FALSE, |
|
|
132 |
Gistic2Threshold = TRUE, |
|
|
133 |
CopyNumberSegment = FALSE, |
|
|
134 |
RemoveGermlineCNV = TRUE, |
|
|
135 |
...) { |
|
|
136 |
#----- check data type of input |
|
|
137 |
stopifnot(!is.null(project)) |
|
|
138 |
stopifnot(is.logical( |
|
|
139 |
c( |
|
|
140 |
clinical, |
|
|
141 |
mRNASeq, |
|
|
142 |
mRNAArray, |
|
|
143 |
miRNASeq, |
|
|
144 |
RPPAArray, |
|
|
145 |
ReplicateBaseNormalization, |
|
|
146 |
Methylation, |
|
|
147 |
GeneMutation, |
|
|
148 |
SomaticMutation, |
|
|
149 |
GisticCopyNumber, |
|
|
150 |
Gistic2Threshold, |
|
|
151 |
CopyNumberSegment, |
|
|
152 |
RemoveGermlineCNV, |
|
|
153 |
download, |
|
|
154 |
forceDownload |
|
|
155 |
) |
|
|
156 |
)) |
|
|
157 |
|
|
|
158 |
projects <- c( |
|
|
159 |
"LAML", |
|
|
160 |
"ACC", |
|
|
161 |
"CHOL", |
|
|
162 |
"BLCA", |
|
|
163 |
"BRCA", |
|
|
164 |
"CESC", |
|
|
165 |
"COADREAD", |
|
|
166 |
"COAD", |
|
|
167 |
"UCEC", |
|
|
168 |
"ESCA", |
|
|
169 |
"FPPP", |
|
|
170 |
"GBM", |
|
|
171 |
"HNSC", |
|
|
172 |
"KICH", |
|
|
173 |
"KIRC", |
|
|
174 |
"KIRP", |
|
|
175 |
"DLBC", |
|
|
176 |
"LIHC", |
|
|
177 |
"LGG", |
|
|
178 |
"GBMLGG", |
|
|
179 |
"LUAD", |
|
|
180 |
"LUNG", |
|
|
181 |
"LUSC", |
|
|
182 |
"SKCM", |
|
|
183 |
"MESO", |
|
|
184 |
"UVM", |
|
|
185 |
"OV", |
|
|
186 |
"PANCAN", |
|
|
187 |
"PAAD", |
|
|
188 |
"PCPG", |
|
|
189 |
"PRAD", |
|
|
190 |
"READ", |
|
|
191 |
"SARC", |
|
|
192 |
"STAD", |
|
|
193 |
"TGCT", |
|
|
194 |
"THYM", |
|
|
195 |
"THCA", |
|
|
196 |
"UCS" |
|
|
197 |
) |
|
|
198 |
|
|
|
199 |
if (!all(project %in% projects)) { |
|
|
200 |
message("Only following Project valid:") |
|
|
201 |
print(project[project %in% projects]) |
|
|
202 |
stop("Invaild Input!") |
|
|
203 |
} |
|
|
204 |
|
|
|
205 |
tcga_all <- .decodeDataType(Target = "tcgaHub") |
|
|
206 |
|
|
|
207 |
# tcga_all %>% |
|
|
208 |
# filter(ProjectID %in% project) %>% # select project |
|
|
209 |
# filter() |
|
|
210 |
|
|
|
211 |
|
|
|
212 |
res <- subset(tcga_all, ProjectID %in% project) |
|
|
213 |
res %>% |
|
|
214 |
filter( |
|
|
215 |
DataType != "Transcription Factor Regulatory Impact", |
|
|
216 |
DataType != "Signatures", |
|
|
217 |
DataType != "PARADIGM Pathway Activity", |
|
|
218 |
DataType != "iCluster" |
|
|
219 |
) -> res |
|
|
220 |
|
|
|
221 |
|
|
|
222 |
if (clinical) { |
|
|
223 |
quo_cli <- dplyr::quo((FileType == "Clinical Information")) |
|
|
224 |
} else { |
|
|
225 |
quo_cli <- dplyr::quo((FALSE)) |
|
|
226 |
} |
|
|
227 |
|
|
|
228 |
if (mRNASeq) { |
|
|
229 |
if (!all(mRNASeqType %in% c("normalized", "pancan normalized", "percentile"))) { |
|
|
230 |
message("Available mRNASeqType values are:") |
|
|
231 |
print(c("normalized", "pancan normalized", "percentile")) |
|
|
232 |
stop("Not Vaild Input!") |
|
|
233 |
} |
|
|
234 |
|
|
|
235 |
RNA <- c( |
|
|
236 |
"IlluminaHiSeq RNASeqV2", |
|
|
237 |
"IlluminaHiSeq RNASeqV2 pancan normalized", |
|
|
238 |
"IlluminaHiSeq RNASeqV2 in percentile rank" |
|
|
239 |
) |
|
|
240 |
names(RNA) <- c("normalized", "pancan normalized", "percentile") |
|
|
241 |
RNA_select <- c(RNA[mRNASeqType], "Batch effects normalized") |
|
|
242 |
|
|
|
243 |
quo_RNA <- dplyr::quo(( |
|
|
244 |
DataType == "Gene Expression RNASeq" & FileType %in% RNA_select |
|
|
245 |
)) |
|
|
246 |
} else { |
|
|
247 |
quo_RNA <- dplyr::quo((FALSE)) |
|
|
248 |
} |
|
|
249 |
|
|
|
250 |
if (mRNAArray) { |
|
|
251 |
quo_RNAa <- dplyr::quo((DataType == "Gene Expression Array")) |
|
|
252 |
} else { |
|
|
253 |
quo_RNAa <- dplyr::quo((FALSE)) |
|
|
254 |
} |
|
|
255 |
|
|
|
256 |
if (miRNASeq) { |
|
|
257 |
miRNA_select <- c("IlluminaHiSeq RNASeq", "Batch effects normalized") |
|
|
258 |
quo_miRNA <- dplyr::quo(( |
|
|
259 |
DataType == "miRNA Mature Strand Expression RNASeq" & |
|
|
260 |
FileType %in% miRNA_select |
|
|
261 |
)) |
|
|
262 |
} else { |
|
|
263 |
quo_miRNA <- dplyr::quo((FALSE)) |
|
|
264 |
} |
|
|
265 |
|
|
|
266 |
if (exonRNASeq) { |
|
|
267 |
quo_exon <- dplyr::quo(( |
|
|
268 |
DataType == "Exon Expression RNASeq" & |
|
|
269 |
FileType == "IlluminaHiSeq RNASeqV2" |
|
|
270 |
)) |
|
|
271 |
} else { |
|
|
272 |
quo_exon <- dplyr::quo((FALSE)) |
|
|
273 |
} |
|
|
274 |
# Have no miRNA Array? Need Check |
|
|
275 |
# if(miRNAArray){ |
|
|
276 |
# |
|
|
277 |
# } |
|
|
278 |
if (RPPAArray) { |
|
|
279 |
if (ReplicateBaseNormalization) { |
|
|
280 |
RPPA_select <- "RPPA normalized by RBN" |
|
|
281 |
} else { |
|
|
282 |
RPPA_select <- "RPPA" |
|
|
283 |
} |
|
|
284 |
quo_RPPA <- dplyr::quo(( |
|
|
285 |
DataType == "Protein Expression RPPA" & |
|
|
286 |
FileType %in% c(RPPA_select, "RPPA pancan normalized") |
|
|
287 |
)) |
|
|
288 |
} else { |
|
|
289 |
quo_RPPA <- dplyr::quo((FALSE)) |
|
|
290 |
} |
|
|
291 |
|
|
|
292 |
if (Methylation) { |
|
|
293 |
if (!all(MethylationType %in% c("27K", "450K"))) { |
|
|
294 |
message("Available MethylationType values are:") |
|
|
295 |
print(c("27K", "450K")) |
|
|
296 |
stop("Not Vaild Input!") |
|
|
297 |
} |
|
|
298 |
|
|
|
299 |
Methy <- c("Methylation27K", "Methylation450K") |
|
|
300 |
names(Methy) <- c("27K", "450K") |
|
|
301 |
Methy_select <- Methy[MethylationType] |
|
|
302 |
|
|
|
303 |
quo_Methy <- dplyr::quo(( |
|
|
304 |
DataType == "DNA Methylation" & FileType %in% Methy_select |
|
|
305 |
)) |
|
|
306 |
} else { |
|
|
307 |
quo_Methy <- dplyr::quo((FALSE)) |
|
|
308 |
} |
|
|
309 |
|
|
|
310 |
if (GeneMutation) { |
|
|
311 |
quo_genMutation <- dplyr::quo(( |
|
|
312 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
313 |
FileType %in% c("broad automated", "MC3 Public Version") |
|
|
314 |
)) |
|
|
315 |
} else { |
|
|
316 |
quo_genMutation <- dplyr::quo((FALSE)) |
|
|
317 |
} |
|
|
318 |
|
|
|
319 |
if (SomaticMutation) { |
|
|
320 |
quo_somaticMutation <- dplyr::quo(( |
|
|
321 |
DataType == "Somatic Mutation" & |
|
|
322 |
FileType %in% c("broad automated", "MC3 Public Version") |
|
|
323 |
)) |
|
|
324 |
} else { |
|
|
325 |
quo_somaticMutation <- dplyr::quo((FALSE)) |
|
|
326 |
} |
|
|
327 |
|
|
|
328 |
if (GisticCopyNumber) { |
|
|
329 |
if (Gistic2Threshold) { |
|
|
330 |
gistic_select <- "Gistic2 thresholded" |
|
|
331 |
} else { |
|
|
332 |
gistic_select <- "Gistic2" |
|
|
333 |
} |
|
|
334 |
quo_gistic <- dplyr::quo(( |
|
|
335 |
DataType == "Gene Level Copy Number" & FileType == gistic_select |
|
|
336 |
)) |
|
|
337 |
} else { |
|
|
338 |
quo_gistic <- dplyr::quo((FALSE)) |
|
|
339 |
} |
|
|
340 |
|
|
|
341 |
if (CopyNumberSegment) { |
|
|
342 |
if (RemoveGermlineCNV) { |
|
|
343 |
cns_select <- "After remove germline cnv" |
|
|
344 |
} else { |
|
|
345 |
cns_select <- "Before remove germline cnv" |
|
|
346 |
} |
|
|
347 |
quo_cns <- dplyr::quo(( |
|
|
348 |
DataType == "Copy Number Segments" & FileType == cns_select |
|
|
349 |
)) |
|
|
350 |
} else { |
|
|
351 |
quo_cns <- dplyr::quo((FALSE)) |
|
|
352 |
} |
|
|
353 |
|
|
|
354 |
cond_select <- dplyr::quo( |
|
|
355 |
!!quo_cli | |
|
|
356 |
!!quo_RNA | |
|
|
357 |
!!quo_RNAa | |
|
|
358 |
!!quo_miRNA | |
|
|
359 |
!!quo_exon | |
|
|
360 |
!!quo_RPPA | |
|
|
361 |
!!quo_Methy | |
|
|
362 |
!!quo_genMutation | |
|
|
363 |
!!quo_somaticMutation | !!quo_gistic | !!quo_cns |
|
|
364 |
) |
|
|
365 |
res <- filter(res, !!cond_select) |
|
|
366 |
|
|
|
367 |
if (download) { |
|
|
368 |
res %>% |
|
|
369 |
XenaGenerate() %>% |
|
|
370 |
XenaQuery() %>% |
|
|
371 |
XenaDownload(destdir = destdir, force = forceDownload, ...) |
|
|
372 |
} else { |
|
|
373 |
xe <- res %>% XenaGenerate() |
|
|
374 |
list(Xena = xe, DataInfo = res) |
|
|
375 |
} |
|
|
376 |
} |
|
|
377 |
|
|
|
378 |
|
|
|
379 |
|
|
|
380 |
|
|
|
381 |
##' @title Easily Download TCGA Data by Several Options |
|
|
382 |
##' @description TCGA is a very useful database and here we provide this function to |
|
|
383 |
##' download TCGA (include TCGA Pancan) datasets in human-friendly way. Users who are not |
|
|
384 |
##' familiar with R operation will benefit from this. |
|
|
385 |
##' @details All availble information about datasets of TCGA can access vis `availTCGA()` and |
|
|
386 |
##' check with `showTCGA()`. |
|
|
387 |
##' @author Shixiang Wang <w_shixiang@163.com> |
|
|
388 |
##' @param project default is `NULL`. Should be one or more of TCGA project id (character vector) provided by Xena. |
|
|
389 |
##' See all available project id, please use `availTCGA("ProjectID")`. |
|
|
390 |
##' @param data_type default is `NULL`. Should be a character vector specify data type. |
|
|
391 |
##' See all available data types by `availTCGA("DataType")`. |
|
|
392 |
##' @param file_type default is `NULL`. Should be a character vector specify file type. |
|
|
393 |
##' See all available file types by `availTCGA("FileType")`. |
|
|
394 |
##' @inheritParams XenaDownload |
|
|
395 |
##' @return same as `XenaDownload()` function result. |
|
|
396 |
##' @export |
|
|
397 |
##' @examples |
|
|
398 |
##' \dontrun{ |
|
|
399 |
##' # download RNASeq data (use UVM as example) |
|
|
400 |
##' downloadTCGA(project = "UVM", |
|
|
401 |
##' data_type = "Gene Expression RNASeq", |
|
|
402 |
##' file_type = "IlluminaHiSeq RNASeqV2") |
|
|
403 |
##' } |
|
|
404 |
##' @seealso [UCSCXenaTools::XenaQuery()], |
|
|
405 |
##' [UCSCXenaTools::XenaFilter()], |
|
|
406 |
##' [UCSCXenaTools::XenaDownload()], |
|
|
407 |
##' [UCSCXenaTools::XenaPrepare()], |
|
|
408 |
##' [UCSCXenaTools::availTCGA()], |
|
|
409 |
##' [UCSCXenaTools::showTCGA()] |
|
|
410 |
|
|
|
411 |
downloadTCGA <- function(project = NULL, |
|
|
412 |
data_type = NULL, |
|
|
413 |
file_type = NULL, |
|
|
414 |
destdir = tempdir(), |
|
|
415 |
force = FALSE, |
|
|
416 |
...) { |
|
|
417 |
stopifnot( |
|
|
418 |
!is.null(project), |
|
|
419 |
!is.null(data_type), |
|
|
420 |
!is.null(file_type) |
|
|
421 |
) |
|
|
422 |
tcga_all <- .decodeDataType(Target = "tcgaHub") |
|
|
423 |
tcga_projects <- unique(tcga_all$ProjectID) |
|
|
424 |
|
|
|
425 |
# suppress binding notes |
|
|
426 |
ProjectID <- DataType <- FileType <- NULL |
|
|
427 |
|
|
|
428 |
if (!all(project %in% tcga_projects)) { |
|
|
429 |
message( |
|
|
430 |
project, |
|
|
431 |
" are not (all) valid, please select one or more of following valid project ID:" |
|
|
432 |
) |
|
|
433 |
print(tcga_projects, quote = FALSE) |
|
|
434 |
return(invisible(NULL)) |
|
|
435 |
} |
|
|
436 |
|
|
|
437 |
res <- tcga_all %>% |
|
|
438 |
filter( |
|
|
439 |
ProjectID %in% project, |
|
|
440 |
DataType %in% data_type, |
|
|
441 |
FileType %in% file_type |
|
|
442 |
) |
|
|
443 |
|
|
|
444 |
if (nrow(res) == 0) { # nocov start |
|
|
445 |
message("Find nothing about your input, please check it.") |
|
|
446 |
message("availTCGA and showTCGA function may help you.") |
|
|
447 |
return(invisible(NULL)) |
|
|
448 |
} # nocov end |
|
|
449 |
|
|
|
450 |
res %>% |
|
|
451 |
XenaGenerate() %>% |
|
|
452 |
XenaQuery() %>% |
|
|
453 |
XenaDownload(destdir = destdir, force = force, ...) |
|
|
454 |
} |
|
|
455 |
|
|
|
456 |
##' @title Get or Check TCGA Available ProjectID, DataType and FileType |
|
|
457 |
##' @param which a character of `c("All", "ProjectID", "DataType", "FileType")` |
|
|
458 |
##' @author Shixiang Wang <w_shixiang@163.com> |
|
|
459 |
##' @export |
|
|
460 |
##' @examples |
|
|
461 |
##' \donttest{ |
|
|
462 |
##' availTCGA("all") |
|
|
463 |
##' } |
|
|
464 |
availTCGA <- function(which = c("all", "ProjectID", "DataType", "FileType")) { |
|
|
465 |
which <- match.arg(which) |
|
|
466 |
tcga_all <- .decodeDataType(Target = "tcgaHub") |
|
|
467 |
tcga_projects <- unique(tcga_all$ProjectID) |
|
|
468 |
tcga_datatype <- unique(tcga_all$DataType) |
|
|
469 |
tcga_filetype <- unique(tcga_all$FileType) |
|
|
470 |
|
|
|
471 |
if (which == "all") { |
|
|
472 |
message( |
|
|
473 |
"Note not all projects have listed data types and file types, you can use showTCGA function to check if exist" |
|
|
474 |
) |
|
|
475 |
return( |
|
|
476 |
list( |
|
|
477 |
ProjectID = tcga_projects, |
|
|
478 |
DataType = tcga_datatype, |
|
|
479 |
FileType = tcga_filetype |
|
|
480 |
) |
|
|
481 |
) |
|
|
482 |
} |
|
|
483 |
|
|
|
484 |
if (which == "ProjectID") { |
|
|
485 |
return(tcga_projects) |
|
|
486 |
} |
|
|
487 |
if (which == "DataType") { |
|
|
488 |
return(tcga_datatype) |
|
|
489 |
} |
|
|
490 |
if (which == "FileType") { |
|
|
491 |
return(tcga_filetype) |
|
|
492 |
} |
|
|
493 |
} |
|
|
494 |
|
|
|
495 |
##' @title Show TCGA data structure by Project ID or ALL |
|
|
496 |
##' @description This can used to check if data type or file type exist in one or more projects by hand. |
|
|
497 |
##' @param project a character vector. Can be "all" or one or more of TCGA Project IDs. |
|
|
498 |
##' @return a `data.frame` including project data structure information. |
|
|
499 |
##' @author Shixiang Wang <w_shixiang@163.com> |
|
|
500 |
##' @export |
|
|
501 |
##' @examples |
|
|
502 |
##' \donttest{ |
|
|
503 |
##' showTCGA("all") |
|
|
504 |
##' } |
|
|
505 |
##' @seealso [UCSCXenaTools::availTCGA()] |
|
|
506 |
showTCGA <- function(project = "all") { |
|
|
507 |
# suppress binding notes |
|
|
508 |
ProjectID <- DataType <- FileType <- NULL |
|
|
509 |
|
|
|
510 |
tcga_all <- .decodeDataType(Target = "tcgaHub") |
|
|
511 |
if (project == "all") { |
|
|
512 |
# res = data.table::data.table(tcga_all) |
|
|
513 |
# res = res[, .(ProjectID, DataType, FileType)] |
|
|
514 |
res <- tcga_all %>% select(ProjectID, DataType, FileType) |
|
|
515 |
} else { |
|
|
516 |
res <- tcga_all %>% |
|
|
517 |
filter(ProjectID %in% project) %>% |
|
|
518 |
select(ProjectID, DataType, FileType) |
|
|
519 |
# res = data.table::data.table(tcga_all) |
|
|
520 |
# res = res[ProjectID %in% project, .(ProjectID, DataType, FileType)] |
|
|
521 |
} |
|
|
522 |
|
|
|
523 |
if (nrow(res) == 0) { # nocov start |
|
|
524 |
message("Something is wrong in your input, NULL will be returned, please check.") |
|
|
525 |
return(NULL) |
|
|
526 |
} # nocov end |
|
|
527 |
return(res) |
|
|
528 |
} |
|
|
529 |
|
|
|
530 |
|
|
|
531 |
|
|
|
532 |
|
|
|
533 |
# Only works for TCGA |
|
|
534 |
.decodeDataType <- function(XenaData = UCSCXenaTools::XenaData, |
|
|
535 |
Target = "tcgaHub") { |
|
|
536 |
# This TCGA include TCGA PANCAN dataset |
|
|
537 |
if ("tcgaHub" %in% Target) { |
|
|
538 |
Target <- c(Target, "pancanAtlasHub") |
|
|
539 |
} |
|
|
540 |
|
|
|
541 |
# supress binding notes |
|
|
542 |
XenaHostNames <- XenaCohorts <- NULL |
|
|
543 |
|
|
|
544 |
ob <- XenaData %>% filter(XenaHostNames %in% Target) |
|
|
545 |
|
|
|
546 |
if ("tcgaHub" %in% Target) { |
|
|
547 |
# decode project id |
|
|
548 |
ob %>% mutate(ProjectID = sub(".*\\((.*)\\)", "\\1", XenaCohorts)) -> ob |
|
|
549 |
# decode DataType |
|
|
550 |
ob %>% |
|
|
551 |
mutate( |
|
|
552 |
DataType = dplyr::case_when( |
|
|
553 |
grepl("Gistic2_CopyNumber_Gistic2", XenaDatasets) ~ "Gene Level Copy Number", |
|
|
554 |
grepl( |
|
|
555 |
"PANCAN_Genome_Wide_SNP_6_whitelisted.gene.xena", |
|
|
556 |
XenaDatasets |
|
|
557 |
) ~ "Gene Level Copy Number", |
|
|
558 |
# pancan |
|
|
559 |
grepl("SNP6", XenaDatasets) ~ "Copy Number Segments", |
|
|
560 |
grepl( |
|
|
561 |
"PANCAN_Genome_Wide_SNP_6_whitelisted.xena", |
|
|
562 |
XenaDatasets |
|
|
563 |
) ~ "Copy Number Segments", |
|
|
564 |
# pancan |
|
|
565 |
grepl("HumanMethylation", XenaDatasets) ~ "DNA Methylation", |
|
|
566 |
grepl("MethylMix", XenaDatasets) ~ "DNA Methylation", |
|
|
567 |
grepl("HiSeq.*_exon", XenaDatasets) ~ "Exon Expression RNASeq", |
|
|
568 |
grepl("GA_exon", XenaDatasets) ~ "Exon Expression RNASeq", |
|
|
569 |
grepl("GAV2_exon", XenaDatasets) ~ "Exon Expression RNASeq", |
|
|
570 |
grepl("AgilentG", XenaDatasets) ~ "Gene Expression Array", |
|
|
571 |
grepl("HT_HG-U133A", XenaDatasets) ~ "Gene Expression Array", |
|
|
572 |
grepl("GA$", XenaDatasets) & |
|
|
573 |
!grepl("RABIT", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
574 |
grepl("GAV2$", XenaDatasets) & |
|
|
575 |
!grepl("RABIT", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
576 |
grepl("HiSeq$", XenaDatasets) & |
|
|
577 |
!grepl("RABIT", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
578 |
grepl("HiSeqV2$", XenaDatasets) & |
|
|
579 |
!grepl("RABIT", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
580 |
grepl("HiSeqV2_PANCAN$", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
581 |
grepl("HiSeqV2_percentile$", XenaDatasets) ~ "Gene Expression RNASeq", |
|
|
582 |
grepl( |
|
|
583 |
"EB\\+\\+AdjustPANCAN_IlluminaHiSeq_RNASeqV2.geneExp.xena", |
|
|
584 |
XenaDatasets |
|
|
585 |
) ~ "Gene Expression RNASeq", |
|
|
586 |
# pancan |
|
|
587 |
grepl("miRNA", XenaDatasets) ~ "miRNA Mature Strand Expression RNASeq", |
|
|
588 |
grepl( |
|
|
589 |
"pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithUnCorrectMiRs", |
|
|
590 |
XenaDatasets |
|
|
591 |
) ~ "miRNA Mature Strand Expression RNASeq", |
|
|
592 |
# pancan |
|
|
593 |
grepl("Pathway_Paradigm", XenaDatasets) ~ "PARADIGM Pathway Activity", |
|
|
594 |
grepl("erge_merged_reals", XenaDatasets) ~ "PARADIGM Pathway Activity", |
|
|
595 |
# pancan |
|
|
596 |
grepl("clinicalMatrix", XenaDatasets) ~ "Phenotype", |
|
|
597 |
grepl( |
|
|
598 |
"Survival_SupplementalTable_S1_20171025_xena_sp", |
|
|
599 |
XenaDatasets |
|
|
600 |
) ~ "Phenotype", |
|
|
601 |
# pancan |
|
|
602 |
grepl("Subtype_Immune_Model_Based.txt", XenaDatasets) ~ "Phenotype", |
|
|
603 |
# pancan |
|
|
604 |
grepl("TCGASubtype.20170308.tsv", XenaDatasets) ~ "Phenotype", |
|
|
605 |
# pancan |
|
|
606 |
grepl( |
|
|
607 |
"TCGA_phenotype_denseDataOnlyDownload.tsv", |
|
|
608 |
XenaDatasets |
|
|
609 |
) ~ "Phenotype", |
|
|
610 |
# pancan |
|
|
611 |
grepl("gene_expression_subtype", XenaDatasets) ~ "Phenotype", |
|
|
612 |
# OV |
|
|
613 |
grepl("RPPA", XenaDatasets) ~ "Protein Expression RPPA", |
|
|
614 |
grepl("mutation_", XenaDatasets) & |
|
|
615 |
!endsWith(XenaDatasets, "gene") ~ "Somatic Mutation", |
|
|
616 |
grepl("mc3.v0.2.8.PUBLIC.xena", XenaDatasets) ~ "Somatic Mutation", |
|
|
617 |
# pancan |
|
|
618 |
grepl("mutation($|(.*_gene$))", XenaDatasets) ~ "Gene Somatic Non-silent Mutation", |
|
|
619 |
grepl( |
|
|
620 |
"mc3.v0.2.8.PUBLIC.nonsilentGene.xena", |
|
|
621 |
XenaDatasets |
|
|
622 |
) ~ "Gene Somatic Non-silent Mutation", |
|
|
623 |
# pancan |
|
|
624 |
grepl("RABIT", XenaDatasets) ~ "Transcription Factor Regulatory Impact", |
|
|
625 |
grepl("iCluster", XenaDatasets) ~ "iCluster", |
|
|
626 |
grepl( |
|
|
627 |
"Pancan12_GenePrograms_drugTargetCanon_in_Pancan33.tsv", |
|
|
628 |
XenaDatasets |
|
|
629 |
) ~ "Signatures", |
|
|
630 |
# pancan |
|
|
631 |
grepl("TCGA.HRD_withSampleID.txt", XenaDatasets) ~ "Signatures", |
|
|
632 |
# pancan |
|
|
633 |
grepl( |
|
|
634 |
"TCGA_pancancer_10852whitelistsamples_68ImmuneSigs.xena", |
|
|
635 |
XenaDatasets |
|
|
636 |
) ~ "Signatures", |
|
|
637 |
# pancan |
|
|
638 |
grepl("StemnessScores_DNAmeth_20170210.tsv", XenaDatasets) ~ "Signatures", |
|
|
639 |
# pancan |
|
|
640 |
grepl( |
|
|
641 |
"StemnessScores_RNAexp_20170127.2.tsv", |
|
|
642 |
XenaDatasets |
|
|
643 |
) ~ "Signatures" # pancan |
|
|
644 |
) |
|
|
645 |
) -> ob |
|
|
646 |
|
|
|
647 |
# decode file type |
|
|
648 |
ob %>% |
|
|
649 |
mutate( |
|
|
650 |
FileType = dplyr::case_when( |
|
|
651 |
DataType == "Gene Level Copy Number" & |
|
|
652 |
grepl("Gistic2_all_data_by_genes", XenaDatasets) ~ "Gistic2", |
|
|
653 |
DataType == "Gene Level Copy Number" & |
|
|
654 |
grepl("Gistic2_all_thresholded.by_genes", XenaDatasets) ~ "Gistic2 thresholded", |
|
|
655 |
DataType == "Gene Level Copy Number" & |
|
|
656 |
grepl( |
|
|
657 |
"PANCAN_Genome_Wide_SNP_6_whitelisted.gene.xena", |
|
|
658 |
XenaDatasets |
|
|
659 |
) ~ "Tumor copy number", |
|
|
660 |
|
|
|
661 |
|
|
|
662 |
DataType == "Copy Number Segments" & |
|
|
663 |
grepl("SNP6_genomicSegment", XenaDatasets) ~ "Before remove germline cnv", |
|
|
664 |
DataType == "Copy Number Segments" & |
|
|
665 |
grepl("SNP6_nocnv_genomicSegment", XenaDatasets) ~ "After remove germline cnv", |
|
|
666 |
DataType == "Copy Number Segments" & |
|
|
667 |
grepl( |
|
|
668 |
"PANCAN_Genome_Wide_SNP_6_whitelisted.xena", |
|
|
669 |
XenaDatasets |
|
|
670 |
) ~ "After remove germline cnv", |
|
|
671 |
|
|
|
672 |
|
|
|
673 |
DataType == "DNA Methylation" & |
|
|
674 |
grepl("HumanMethylation27", XenaDatasets) ~ "Methylation27K", |
|
|
675 |
DataType == "DNA Methylation" & |
|
|
676 |
grepl("HumanMethylation450", XenaDatasets) ~ "Methylation450K", |
|
|
677 |
DataType == "DNA Methylation" & |
|
|
678 |
grepl("oneoff_TCGA_LGG_MethylMix", XenaDatasets) ~ "MethylMix", |
|
|
679 |
|
|
|
680 |
|
|
|
681 |
DataType == "Exon Expression RNASeq" & |
|
|
682 |
grepl("GA_exon", XenaDatasets) ~ "IlluminaGA RNASeq", |
|
|
683 |
DataType == "Exon Expression RNASeq" & |
|
|
684 |
grepl("GAV2_exon", XenaDatasets) ~ "IlluminaGA RNASeqV2", |
|
|
685 |
DataType == "Exon Expression RNASeq" & |
|
|
686 |
grepl("HiSeq_exon", XenaDatasets) ~ "IlluminaHiSeq RNASeq", |
|
|
687 |
DataType == "Exon Expression RNASeq" & |
|
|
688 |
grepl("HiSeqV2_exon", XenaDatasets) ~ "IlluminaHiSeq RNASeqV2", |
|
|
689 |
|
|
|
690 |
|
|
|
691 |
DataType == "Gene Expression Array" & |
|
|
692 |
grepl("AgilentG4502A", XenaDatasets) ~ "Agilent 244K Microarray", |
|
|
693 |
DataType == "Gene Expression Array" & |
|
|
694 |
grepl("HT_HG-U133A", XenaDatasets) ~ "Affymetrix U133A Microarray", |
|
|
695 |
|
|
|
696 |
DataType == "Gene Expression RNASeq" & |
|
|
697 |
endsWith(XenaDatasets, "GA") ~ "IlluminaGA RNASeq", |
|
|
698 |
DataType == "Gene Expression RNASeq" & |
|
|
699 |
endsWith(XenaDatasets, "GAV2") ~ "IlluminaGA RNASeqV2", |
|
|
700 |
DataType == "Gene Expression RNASeq" & |
|
|
701 |
endsWith(XenaDatasets, "HiSeq") ~ "IlluminaHiSeq RNASeq", |
|
|
702 |
DataType == "Gene Expression RNASeq" & |
|
|
703 |
endsWith(XenaDatasets, "HiSeqV2") ~ "IlluminaHiSeq RNASeqV2", |
|
|
704 |
DataType == "Gene Expression RNASeq" & |
|
|
705 |
endsWith(XenaDatasets, "HiSeqV2_PANCAN") ~ "IlluminaHiSeq RNASeqV2 pancan normalized", |
|
|
706 |
DataType == "Gene Expression RNASeq" & |
|
|
707 |
endsWith(XenaDatasets, "HiSeqV2_percentile") ~ "IlluminaHiSeq RNASeqV2 in percentile rank", |
|
|
708 |
DataType == "Gene Expression RNASeq" & |
|
|
709 |
grepl("AdjustPANCAN_IlluminaHiSeq_RNASeqV2", XenaDatasets) ~ "Batch effects normalized", |
|
|
710 |
|
|
|
711 |
DataType == "miRNA Mature Strand Expression RNASeq" & |
|
|
712 |
endsWith(XenaDatasets, "miRNA_GA_gene") ~ "IlluminaGA RNASeq", |
|
|
713 |
DataType == "miRNA Mature Strand Expression RNASeq" & |
|
|
714 |
endsWith(XenaDatasets, "miRNA_HiSeq_gene") ~ "IlluminaHiSeq RNASeq", |
|
|
715 |
DataType == "miRNA Mature Strand Expression RNASeq" & |
|
|
716 |
grepl( |
|
|
717 |
"pancanMiRs_EBadjOnProtocolPlatformWithoutRepsWithU", |
|
|
718 |
XenaDatasets |
|
|
719 |
) ~ "Batch effects normalized", |
|
|
720 |
|
|
|
721 |
|
|
|
722 |
DataType == "PARADIGM Pathway Activity" & |
|
|
723 |
grepl("merge_merged_reals", XenaDatasets) ~ "Platform-corrected PANCAN12 dataset", |
|
|
724 |
DataType == "PARADIGM Pathway Activity" & |
|
|
725 |
endsWith(XenaDatasets, "Pathway_Paradigm_mRNA") ~ "Use only Microarray", |
|
|
726 |
DataType == "PARADIGM Pathway Activity" & |
|
|
727 |
endsWith( |
|
|
728 |
XenaDatasets, |
|
|
729 |
"Pathway_Paradigm_mRNA_And_Copy_Number" |
|
|
730 |
) ~ "Use Microarray plus Copy Number", |
|
|
731 |
DataType == "PARADIGM Pathway Activity" & |
|
|
732 |
endsWith(XenaDatasets, "Pathway_Paradigm_RNASeq") ~ "Use only RNASeq", |
|
|
733 |
DataType == "PARADIGM Pathway Activity" & |
|
|
734 |
endsWith( |
|
|
735 |
XenaDatasets, |
|
|
736 |
"Pathway_Paradigm_RNASeq_And_Copy_Number" |
|
|
737 |
) ~ "Use RNASeq plus Copy Number", |
|
|
738 |
|
|
|
739 |
|
|
|
740 |
DataType == "Phenotype" & |
|
|
741 |
endsWith(XenaDatasets, "clinicalMatrix") ~ "Clinical Information", |
|
|
742 |
DataType == "Phenotype" & |
|
|
743 |
grepl( |
|
|
744 |
"Survival_SupplementalTable_S1_20171025_xena_sp", |
|
|
745 |
XenaDatasets |
|
|
746 |
) ~ "Clinical Information", |
|
|
747 |
DataType == "Phenotype" & |
|
|
748 |
grepl("gene_expression_subtype", XenaDatasets) ~ "Gene Expression Subtype", |
|
|
749 |
DataType == "Phenotype" & |
|
|
750 |
grepl("Subtype_Immune_Model_Based", XenaDatasets) ~ "Immune Model Based Subtype", |
|
|
751 |
DataType == "Phenotype" & |
|
|
752 |
grepl("TCGASubtype", XenaDatasets) ~ "TCGA Molecular Subtype", |
|
|
753 |
DataType == "Phenotype" & |
|
|
754 |
grepl( |
|
|
755 |
"TCGA_phenotype_denseDataOnlyDownload", |
|
|
756 |
XenaDatasets |
|
|
757 |
) ~ "TCGA Sample Type and Primary Disease", |
|
|
758 |
|
|
|
759 |
DataType == "Protein Expression RPPA" & |
|
|
760 |
endsWith(XenaDatasets, "RPPA") ~ "RPPA", |
|
|
761 |
DataType == "Protein Expression RPPA" & |
|
|
762 |
endsWith(XenaDatasets, "RPPA_RBN") ~ "RPPA normalized by RBN", |
|
|
763 |
DataType == "Protein Expression RPPA" & |
|
|
764 |
grepl("TCGA-RPPA-pancan-clean", XenaDatasets) ~ "RPPA pancan normalized", |
|
|
765 |
|
|
|
766 |
|
|
|
767 |
DataType == "Somatic Mutation" & |
|
|
768 |
grepl("mc3.v0.2.8.PUBLIC.xena", XenaDatasets) ~ "MC3 Public Version", |
|
|
769 |
DataType == "Somatic Mutation" & |
|
|
770 |
endsWith(XenaDatasets, "mutation_bcgsc") ~ "bcgsc automated", |
|
|
771 |
DataType == "Somatic Mutation" & |
|
|
772 |
endsWith(XenaDatasets, "mutation_bcm") ~ "bcm automated", |
|
|
773 |
DataType == "Somatic Mutation" & |
|
|
774 |
endsWith(XenaDatasets, "mutation_bcm_solid") ~ "bcm SOLiD", |
|
|
775 |
DataType == "Somatic Mutation" & |
|
|
776 |
endsWith(XenaDatasets, "mutation_broad") ~ "broad automated", |
|
|
777 |
DataType == "Somatic Mutation" & |
|
|
778 |
endsWith(XenaDatasets, "mutation_curated_bcm") ~ "bcm curated", |
|
|
779 |
DataType == "Somatic Mutation" & |
|
|
780 |
endsWith(XenaDatasets, "mutation_curated_bcm_solid") ~ "bcm SOLiD curated", |
|
|
781 |
DataType == "Somatic Mutation" & |
|
|
782 |
endsWith(XenaDatasets, "mutation_curated_broad") ~ "broad curated", |
|
|
783 |
DataType == "Somatic Mutation" & |
|
|
784 |
endsWith(XenaDatasets, "mutation_curated_wustl") ~ "wustl curated", |
|
|
785 |
DataType == "Somatic Mutation" & |
|
|
786 |
endsWith(XenaDatasets, "mutation_ucsc_maf") ~ "ucsc automated", |
|
|
787 |
DataType == "Somatic Mutation" & |
|
|
788 |
endsWith(XenaDatasets, "mutation_wustl") ~ "wustl automated", |
|
|
789 |
DataType == "Somatic Mutation" & |
|
|
790 |
endsWith(XenaDatasets, "mutation_wustl_hiseq") ~ "wustl hiseq automated", |
|
|
791 |
|
|
|
792 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
793 |
grepl( |
|
|
794 |
"mc3.v0.2.8.PUBLIC.nonsilentGene.xena", |
|
|
795 |
XenaDatasets |
|
|
796 |
) ~ "MC3 Public Version", |
|
|
797 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
798 |
endsWith(XenaDatasets, "mutation") ~ "PANCAN AWG analyzed", |
|
|
799 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
800 |
endsWith(XenaDatasets, "mutation_bcgsc_gene") ~ "bsgsc automated", |
|
|
801 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
802 |
endsWith(XenaDatasets, "mutation_bcm_gene") ~ "bcm automated", |
|
|
803 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
804 |
endsWith(XenaDatasets, "mutation_bcm_solid_gene") ~ "bcm SOLiD", |
|
|
805 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
806 |
endsWith(XenaDatasets, "mutation_broad_gene") ~ "broad automated", |
|
|
807 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
808 |
endsWith(XenaDatasets, "mutation_curated_bcm_gene") ~ "bcm curated", |
|
|
809 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
810 |
endsWith(XenaDatasets, "mutation_curated_bcm_solid_gene") ~ "bcm SOLiD curated", |
|
|
811 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
812 |
endsWith(XenaDatasets, "mutation_curated_broad_gene") ~ "broad curated", |
|
|
813 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
814 |
endsWith(XenaDatasets, "mutation_curated_wustl_gene") ~ "wustl curated", |
|
|
815 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
816 |
endsWith(XenaDatasets, "mutation_ucsc_maf_gene") ~ "ucsc automated", |
|
|
817 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
818 |
endsWith(XenaDatasets, "mutation_wustl_gene") ~ "wustl automated", |
|
|
819 |
DataType == "Gene Somatic Non-silent Mutation" & |
|
|
820 |
endsWith(XenaDatasets, "mutation_wustl_hiseq_gene") ~ "wustl hiseq automated", |
|
|
821 |
|
|
|
822 |
|
|
|
823 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
824 |
grepl("HiSeq.V2$", XenaDatasets) ~ "RABIT Use IlluminaHiSeq RNASeqV2", |
|
|
825 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
826 |
grepl("HiSeq$", XenaDatasets) ~ "RABIT Use IlluminaHiSeq RNASeq", |
|
|
827 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
828 |
grepl("GA.V2$", XenaDatasets) ~ "RABIT Use IlluminaGA RNASeqV2", |
|
|
829 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
830 |
grepl("GA$", XenaDatasets) ~ "RABIT Use IlluminaGA RNASeq", |
|
|
831 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
832 |
grepl("Agilent$", XenaDatasets) ~ "RABIT Use Agilent 244K Microarray", |
|
|
833 |
DataType == "Transcription Factor Regulatory Impact" & |
|
|
834 |
grepl("U133A$", XenaDatasets) ~ "RABIT Use Affymetrix U133A Microarray", |
|
|
835 |
|
|
|
836 |
DataType == "iCluster" & |
|
|
837 |
grepl("TCGA_PanCan33_iCluster_k28_tumor", XenaDatasets) ~ "iCluster cluster assignments", |
|
|
838 |
DataType == "iCluster" & |
|
|
839 |
grepl("lat.vars.iCluster.redo.tumor", XenaDatasets) ~ "iCluster latent variables", |
|
|
840 |
|
|
|
841 |
DataType == "Signatures" & |
|
|
842 |
grepl( |
|
|
843 |
"Pancan12_GenePrograms_drugTargetCanon", |
|
|
844 |
XenaDatasets |
|
|
845 |
) ~ "Pancan Gene Programs", |
|
|
846 |
DataType == "Signatures" & |
|
|
847 |
grepl("StemnessScores_DNAmeth_", XenaDatasets) ~ "DNA methylation based StemnessScore", |
|
|
848 |
DataType == "Signatures" & |
|
|
849 |
grepl("StemnessScores_RNAexp", XenaDatasets) ~ "RNA based StemnessScore", |
|
|
850 |
DataType == "Signatures" & |
|
|
851 |
grepl( |
|
|
852 |
"TCGA_pancancer_10852whitelistsamples_68ImmuneSigs", |
|
|
853 |
XenaDatasets |
|
|
854 |
) ~ "Immune Signature Scores", |
|
|
855 |
DataType == "Signatures" & |
|
|
856 |
grepl("TCGA.HRD_withSampleID.txt", XenaDatasets) ~ "Genome-wide DNA Damage Footprint HRD Score" |
|
|
857 |
) |
|
|
858 |
) -> ob_tcga |
|
|
859 |
} |
|
|
860 |
ob_tcga |
|
|
861 |
} |
|
|
862 |
|
|
|
863 |
|
|
|
864 |
|
|
|
865 |
# grep unique pattern |
|
|
866 |
# ob1 = sub("TCGA.*/(.*)", "\\1", ob$XenaDatasets) %>% table() %>% names() -> uniqueDatasets |
|
|
867 |
# ob1 = tibble(XenaDatasets = uniqueDatasets) |
|
|
868 |
# grep("gene_expression_subtype", ob$XenaDatasets, value = TRUE) |
|
|
869 |
|
|
|
870 |
|
|
|
871 |
utils::globalVariables(c("DataType", "FileType", "ProjectID")) |