|
a |
|
b/R/Dataset_Exploration.R |
|
|
1 |
# Dataset_exploration.R |
|
|
2 |
|
|
|
3 |
if (!requireNamespace("BiocManager", quietly = TRUE)) |
|
|
4 |
install.packages("BiocManager") |
|
|
5 |
BiocManager::install("PharmacoGx", version = "3.8") |
|
|
6 |
|
|
|
7 |
|
|
|
8 |
library(PharmacoGx) |
|
|
9 |
library(data.table) |
|
|
10 |
??PharmacoSet |
|
|
11 |
|
|
|
12 |
availablePSets() |
|
|
13 |
ccle <- PharmacoGx::downloadPSet("CCLE_2013") |
|
|
14 |
gdsc <- PharmacoGx::downloadPSet("GDSC_2013") |
|
|
15 |
gcsi <- PharmacoGx::downloadPSet("gCSI") |
|
|
16 |
|
|
|
17 |
common = intersectPSet(pSets = list(ccle, gdsc, gcsi), |
|
|
18 |
intersectOn = c("drugs", "cell.lines"), strictIntersect = T, nthread = 2) |
|
|
19 |
intersectPSet(pSets = list(ccle, gdsc), |
|
|
20 |
intersectOn = "cell.lines", strictIntersect = F, nthread = 2) |
|
|
21 |
fNames(ccle, 'rna') |
|
|
22 |
fNames(gdsc, 'rna') |
|
|
23 |
fNames(gcsi, 'rnaseq') |
|
|
24 |
|
|
|
25 |
sensitivityMeasures(ccle) |
|
|
26 |
unique(ccle@sensitivity$info[,-(1:2)]) |
|
|
27 |
ccle@sensitivity$raw |
|
|
28 |
ccle@sensitivity$profiles |
|
|
29 |
|
|
|
30 |
ccle_sum <- summarizeSensitivityProfiles(pSet = ccle, sensitivity.measure = "auc_published", |
|
|
31 |
drugs = "lapatinib") |
|
|
32 |
ccle_sum[1:5] |
|
|
33 |
# common = intersectPSet(pSets = list(ccle, gdsc), |
|
|
34 |
# intersectOn = c("drugs", "cell.lines"), strictIntersect = T, nthread = 2) |
|
|
35 |
|
|
|
36 |
# common_drugs = intersectPSet(pSets = list(ccle, gdsc, gcsi), |
|
|
37 |
# intersectOn = c("drugs", "cell.lines"), |
|
|
38 |
# drugs = c("Erlotinib", "Lapatinib", "Paclitaxel"), nthread = 2) |
|
|
39 |
# common_drugs$CCLE@drug |
|
|
40 |
# common_drugs$GDSC@drug |
|
|
41 |
# common_drugs$GDSC@cell$tissueid |
|
|
42 |
common$CCLE@drug$drug.name |
|
|
43 |
common$CCLE@cell$cellid[common$CCLE@cell$tissueid == "breast"] |
|
|
44 |
|
|
|
45 |
common$CCLE@drug$drug.name[common$CCLE@cell$tissueid == "breast"] |
|
|
46 |
|
|
|
47 |
sensitivityMeasures(pSet = gdsc) |
|
|
48 |
gdsc_auc <- |
|
|
49 |
summarizeSensitivityProfiles( |
|
|
50 |
pSet = gdsc, |
|
|
51 |
sensitivity.measure = "auc_published", |
|
|
52 |
summary.stat = "median", |
|
|
53 |
fill.missing = T |
|
|
54 |
) |
|
|
55 |
ccle_auc <- |
|
|
56 |
summarizeSensitivityProfiles( |
|
|
57 |
pSet = ccle, |
|
|
58 |
sensitivity.measure = "auc_published", |
|
|
59 |
summary.stat = "median", |
|
|
60 |
fill.missing = T |
|
|
61 |
) |
|
|
62 |
gcsi_auc <- |
|
|
63 |
summarizeSensitivityProfiles( |
|
|
64 |
pSet = gcsi, |
|
|
65 |
sensitivity.measure = "auc_recomputed", |
|
|
66 |
summary.stat = "median", |
|
|
67 |
fill.missing = T |
|
|
68 |
) |
|
|
69 |
|
|
|
70 |
# Find breast tissue cell lines with Lapatinib tested on them |
|
|
71 |
ccle@drug$drug.name == "Lapatinib" |
|
|
72 |
cell_drug <- sensNumber(ccle) |
|
|
73 |
cell_drug <- as.data.table(cell_drug, keep.rownames = T) |
|
|
74 |
breast_cells <- ccle@cell$cellid[ccle@cell$tissueid == "breast"] |
|
|
75 |
# All the cell lines in CCLE that have a breast origin and test Lapatinib |
|
|
76 |
cell_drug[rn %in% breast_cells & lapatinib == 1]$rn |
|
|
77 |
|
|
|
78 |
drugDoseResponseCurve(drug = "Lapatinib", cellline = "HARA", |
|
|
79 |
pSets = ccle) |
|
|
80 |
drugDoseResponseCurve(drug = "lapatinib", cellline = "HARA", |
|
|
81 |
pSets = ccle) |
|
|
82 |
|
|
|
83 |
|
|
|
84 |
|
|
|
85 |
ach <- fread("Data/Achilles/D2_combined_gene_dep_scores.csv") |
|
|
86 |
ach <- fread("Data/Achilles/RNAseq_lRPKM_data.csv") |
|
|
87 |
dim(ach) |
|
|
88 |
ach[1:5, 1:5] |
|
|
89 |
# ==== Read DepMap data ==== |
|
|
90 |
ccle_rna <- fread("Data/DepMap/CCLE_depMap_19Q1_TPM.csv") |
|
|
91 |
ccle_transcripts <- fread("Data/DepMap/CCLE_depMap_19Q1_TPM_transcripts.csv") |
|
|
92 |
dim(ccle_transcripts) |
|
|
93 |
ccle_drug_data <- fread("Data/DepMap/CCLE_NP24.2009_Drug_data_2015.02.24.csv") |
|
|
94 |
ccle_line_info <- fread("Data/DepMap/DepMap-2019q1-celllines_v2.csv") |
|
|
95 |
dim(ccle_rna) |
|
|
96 |
|
|
|
97 |
ccle_line_info |
|
|
98 |
|
|
|
99 |
depmap_mutation <- fread("Data/DepMap/depmap_19Q1_mutation_calls.csv") |
|
|
100 |
# Percentage DepMap cell line mutation data shared with CCLE |
|
|
101 |
sum(ccle_rna$V1 %in% depmap_mutation$DepMap_ID) / length(ccle_rna$V1) |
|
|
102 |
# Percentage GDSC cell lines in DepMap mutation data |
|
|
103 |
sum(colnames(gdsc_auc)[-1] %in% depmap_mutation$DepMap_ID) / (ncol(gdsc_auc)-1) |
|
|
104 |
|
|
|
105 |
gdsc_auc <- fread("Data/DepMap/GDSC_AUC.csv") |
|
|
106 |
dim(gdsc_auc) |
|
|
107 |
gdsc_auc$V1 |
|
|
108 |
|
|
|
109 |
ccle_rna[1:5, 1:5] |
|
|
110 |
length(unique(ccle_line_info$DepMap_ID)) |
|
|
111 |
|
|
|
112 |
ccle_linenames <- gsub(pattern = "_.*", replacement = "", x = ccle_line_info[DepMap_ID %in% ccle_rna$V1]$CCLE_Name) |
|
|
113 |
line_name_id <- data.table(DepMap_ID = ccle_line_info$DepMap_ID, |
|
|
114 |
Name = gsub(pattern = "_.*", replacement = "", x = ccle_line_info$CCLE_Name)) |
|
|
115 |
# Percentage CTRPv2 cell lines with CCLE RNA expression data |
|
|
116 |
sum(ctrp_cell_info$ccl_name %in% ccle_linenames) / length(ctrp_cell_info$ccl_name) |
|
|
117 |
# Percentage CTRPv2 cell lines with DepMap mutation data |
|
|
118 |
ctrp_depmap_id <- line_name_id[Name %in% ctrp_cell_info$ccl_name]$DepMap_ID |
|
|
119 |
sum(ctrp_depmap_id %in% depmap_mutation$DepMap_ID) / length(ctrp_depmap_id) |
|
|
120 |
|
|
|
121 |
|
|
|
122 |
sum(ctrp_cell_info$ccl_name %in% ccle_linenames) / length(ctrp_cell_info$ccl_name) |
|
|
123 |
|
|
|
124 |
# Percentage GDSC cell lines in DepMap CCLE expression data |
|
|
125 |
sum(colnames(gdsc_auc)[-1] %in% ccle_rna$V1) / length(colnames(gdsc_auc)[-1]) |
|
|
126 |
|
|
|
127 |
sum(ccle_line_info$DepMap_ID %in% ccle_rna$V1) |
|
|
128 |
|
|
|
129 |
sum(colnames(gdsc_auc)[-1] %in% ccle_line_info$DepMap_ID) / (ncol(gdsc_auc)-1) |
|
|
130 |
length(unique(colnames(gdsc_auc)))-1 |
|
|
131 |
length(unique(ccle_line_info$DepMap_ID)) |
|
|
132 |
|
|
|
133 |
ctrp_columns <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20._COLUMNS.txt") |
|
|
134 |
unique(ctrp_columns[COLUMN_HEADER == "master_cpd_id", "COLUMN_DESCRIPTION"]) |
|
|
135 |
unique(ctrp_columns[COLUMN_HEADER == "experiment_id", "COLUMN_DESCRIPTION"]) |
|
|
136 |
unique(ctrp_columns[COLUMN_HEADER == "cpd_pred_pv", "COLUMN_DESCRIPTION"]) |
|
|
137 |
unique(ctrp_columns[COLUMN_HEADER == "cpd_avg_pv", "COLUMN_DESCRIPTION"]) |
|
|
138 |
unique(ctrp_columns[COLUMN_HEADER == "master_ccl_id", "COLUMN_DESCRIPTION"]) |
|
|
139 |
unique(ctrp_columns[COLUMN_HEADER == "baseline_signal", "COLUMN_DESCRIPTION"]) |
|
|
140 |
|
|
|
141 |
|
|
|
142 |
ctrp_plate <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_assay_plate.txt") |
|
|
143 |
unique(ctrp_plate$assay_plate_barcode) |
|
|
144 |
ctrp_data <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.data.per_cpd_post_qc.txt") |
|
|
145 |
ctrp_experiment <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_experiment.txt") |
|
|
146 |
ctrp_drug_data <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.data.per_cpd_avg.txt") |
|
|
147 |
ctrp_line_info <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_cell_line.txt") |
|
|
148 |
|
|
|
149 |
ctrp_data[experiment_id == 1 & master_cpd_id == 1788] |
|
|
150 |
|
|
|
151 |
length(unique(ctrp_drug_data$assay_plate_barcode)) |
|
|
152 |
ctrp_drug_data[experiment_id == 1] |
|
|
153 |
ctrp_experiment$experiment_id |
|
|
154 |
|
|
|
155 |
ctrp_master <- merge(x = ctrp_data, y = ctrp_drug_data, by = "experiment_id") |
|
|
156 |
ctrp_drug_info <- fread("Data/DepMap/CTRPv2.0_2015_ctd2_ExpandedDataset/v20.meta.per_compound.txt") |
|
|
157 |
|
|
|
158 |
|