Diff of /R/Dataset_Exploration.R [000000] .. [c3b4f8]

Switch to unified view

a b/R/Dataset_Exploration.R
1
# Dataset_exploration.R
2
3
if (!requireNamespace("BiocManager", quietly = TRUE))
4
  install.packages("BiocManager")
5
BiocManager::install("PharmacoGx", version = "3.8")
6
7
8
library(PharmacoGx)
9
library(data.table)
10
??PharmacoSet
11
12
availablePSets()
13
ccle <- PharmacoGx::downloadPSet("CCLE_2013")
14
gdsc <- PharmacoGx::downloadPSet("GDSC_2013")
15
gcsi <- PharmacoGx::downloadPSet("gCSI")
16
17
common = intersectPSet(pSets = list(ccle, gdsc, gcsi),
18
                       intersectOn = c("drugs", "cell.lines"), strictIntersect = T, nthread = 2)
19
intersectPSet(pSets = list(ccle, gdsc),
20
                       intersectOn = "cell.lines", strictIntersect = F, nthread = 2)
21
fNames(ccle, 'rna')
22
fNames(gdsc, 'rna')
23
fNames(gcsi, 'rnaseq')
24
25
sensitivityMeasures(ccle)
26
unique(ccle@sensitivity$info[,-(1:2)])
27
ccle@sensitivity$raw
28
ccle@sensitivity$profiles
29
30
ccle_sum <- summarizeSensitivityProfiles(pSet = ccle, sensitivity.measure = "auc_published",
31
                                         drugs = "lapatinib")
32
ccle_sum[1:5]
33
# common = intersectPSet(pSets = list(ccle, gdsc),
34
#                        intersectOn = c("drugs", "cell.lines"), strictIntersect = T, nthread = 2)
35
36
# common_drugs = intersectPSet(pSets = list(ccle, gdsc, gcsi),
37
#                        intersectOn = c("drugs", "cell.lines"),
38
#                        drugs = c("Erlotinib", "Lapatinib", "Paclitaxel"), nthread = 2)
39
# common_drugs$CCLE@drug
40
# common_drugs$GDSC@drug
41
# common_drugs$GDSC@cell$tissueid
42
common$CCLE@drug$drug.name
43
common$CCLE@cell$cellid[common$CCLE@cell$tissueid == "breast"]
44
45
common$CCLE@drug$drug.name[common$CCLE@cell$tissueid == "breast"]
46
47
sensitivityMeasures(pSet = gdsc)
48
gdsc_auc <-
49
  summarizeSensitivityProfiles(
50
    pSet = gdsc,
51
    sensitivity.measure = "auc_published",
52
    summary.stat = "median",
53
    fill.missing = T
54
  )
55
ccle_auc <-
56
  summarizeSensitivityProfiles(
57
    pSet = ccle,
58
    sensitivity.measure = "auc_published",
59
    summary.stat = "median",
60
    fill.missing = T
61
  )
62
gcsi_auc <-
63
  summarizeSensitivityProfiles(
64
    pSet = gcsi,
65
    sensitivity.measure = "auc_recomputed",
66
    summary.stat = "median",
67
    fill.missing = T
68
  )
69
70
# Find breast tissue cell lines with Lapatinib tested on them
71
ccle@drug$drug.name == "Lapatinib"
72
cell_drug <- sensNumber(ccle)
73
cell_drug <- as.data.table(cell_drug, keep.rownames = T)
74
breast_cells <- ccle@cell$cellid[ccle@cell$tissueid == "breast"]
75
# All the cell lines in CCLE that have a breast origin and test Lapatinib
76
cell_drug[rn %in% breast_cells & lapatinib == 1]$rn
77
78
drugDoseResponseCurve(drug = "Lapatinib", cellline = "HARA",
79
                      pSets = ccle)
80
drugDoseResponseCurve(drug = "lapatinib", cellline = "HARA",
81
                      pSets = ccle)
82
83
84
85
ach <- fread("Data/Achilles/D2_combined_gene_dep_scores.csv")
86
ach <- fread("Data/Achilles/RNAseq_lRPKM_data.csv")
87
dim(ach)
88
ach[1:5, 1:5]
89
# ==== Read DepMap data ====
90
ccle_rna <- fread("Data/DepMap/CCLE_depMap_19Q1_TPM.csv")
91
ccle_transcripts <- fread("Data/DepMap/CCLE_depMap_19Q1_TPM_transcripts.csv")
92
dim(ccle_transcripts)
93
ccle_drug_data <- fread("Data/DepMap/CCLE_NP24.2009_Drug_data_2015.02.24.csv")
94
ccle_line_info <- fread("Data/DepMap/DepMap-2019q1-celllines_v2.csv")
95
dim(ccle_rna)
96
97
ccle_line_info
98
99
depmap_mutation <- fread("Data/DepMap/depmap_19Q1_mutation_calls.csv")
100
# Percentage DepMap cell line mutation data shared with CCLE
101
sum(ccle_rna$V1 %in% depmap_mutation$DepMap_ID) / length(ccle_rna$V1)
102
# Percentage GDSC cell lines in DepMap mutation data
103
sum(colnames(gdsc_auc)[-1] %in% depmap_mutation$DepMap_ID) / (ncol(gdsc_auc)-1)
104
105
gdsc_auc <- fread("Data/DepMap/GDSC_AUC.csv")
106
dim(gdsc_auc)
107
gdsc_auc$V1
108
109
ccle_rna[1:5, 1:5]
110
length(unique(ccle_line_info$DepMap_ID))
111
112
ccle_linenames <- gsub(pattern = "_.*", replacement = "", x = ccle_line_info[DepMap_ID %in% ccle_rna$V1]$CCLE_Name)
113
line_name_id <- data.table(DepMap_ID = ccle_line_info$DepMap_ID,
114
                           Name = gsub(pattern = "_.*", replacement = "", x = ccle_line_info$CCLE_Name))
115
# Percentage CTRPv2 cell lines with CCLE RNA expression data
116
sum(ctrp_cell_info$ccl_name %in% ccle_linenames) / length(ctrp_cell_info$ccl_name)
117
# Percentage CTRPv2 cell lines with DepMap mutation data
118
ctrp_depmap_id <- line_name_id[Name %in% ctrp_cell_info$ccl_name]$DepMap_ID
119
sum(ctrp_depmap_id %in% depmap_mutation$DepMap_ID) / length(ctrp_depmap_id)
120
121
122
sum(ctrp_cell_info$ccl_name %in% ccle_linenames) / length(ctrp_cell_info$ccl_name)
123
124
# Percentage GDSC cell lines in DepMap CCLE expression data
125
sum(colnames(gdsc_auc)[-1] %in% ccle_rna$V1) / length(colnames(gdsc_auc)[-1])
126
127
sum(ccle_line_info$DepMap_ID %in% ccle_rna$V1)
128
129
sum(colnames(gdsc_auc)[-1] %in% ccle_line_info$DepMap_ID) / (ncol(gdsc_auc)-1)
130
length(unique(colnames(gdsc_auc)))-1
131
length(unique(ccle_line_info$DepMap_ID))
132
133
ctrp_columns <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20._COLUMNS.txt")
134
unique(ctrp_columns[COLUMN_HEADER == "master_cpd_id", "COLUMN_DESCRIPTION"])
135
unique(ctrp_columns[COLUMN_HEADER == "experiment_id", "COLUMN_DESCRIPTION"])
136
unique(ctrp_columns[COLUMN_HEADER == "cpd_pred_pv", "COLUMN_DESCRIPTION"])
137
unique(ctrp_columns[COLUMN_HEADER == "cpd_avg_pv", "COLUMN_DESCRIPTION"])
138
unique(ctrp_columns[COLUMN_HEADER == "master_ccl_id", "COLUMN_DESCRIPTION"])
139
unique(ctrp_columns[COLUMN_HEADER == "baseline_signal", "COLUMN_DESCRIPTION"])
140
141
142
ctrp_plate <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_assay_plate.txt")
143
unique(ctrp_plate$assay_plate_barcode)
144
ctrp_data <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.data.per_cpd_post_qc.txt")
145
ctrp_experiment <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_experiment.txt")
146
ctrp_drug_data <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.data.per_cpd_avg.txt")
147
ctrp_line_info <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_cell_line.txt")
148
149
ctrp_data[experiment_id == 1 & master_cpd_id == 1788]
150
151
length(unique(ctrp_drug_data$assay_plate_barcode))
152
ctrp_drug_data[experiment_id == 1]
153
ctrp_experiment$experiment_id
154
155
ctrp_master <- merge(x = ctrp_data, y = ctrp_drug_data, by = "experiment_id")
156
ctrp_drug_info <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_compound.txt")
157
158