|
a |
|
b/R/Data_Source_Comparison.R |
|
|
1 |
# Find the percentage of cell lines with proteomics data that also have drug response data in DepMap |
|
|
2 |
|
|
|
3 |
require(data.table) |
|
|
4 |
d <- fread("/Users/ftaj/Downloads/primary-screen-replicate-collapsed-logfold-change.csv") |
|
|
5 |
protein <- fread("/Users/ftaj/OneDrive - University of Toronto/Drug_Response/Data/DepMap/20Q2/CCLE_protein_quant_current_normalized.csv") |
|
|
6 |
cells <- fread("/Users/ftaj/OneDrive - University of Toronto/Drug_Response/Data/DepMap/20Q2/DepMap_Cell_Line_info-3.csv") |
|
|
7 |
|
|
|
8 |
dim(d) |
|
|
9 |
|
|
|
10 |
c <- c[, c("DepMap_ID", "CCLE_Name")] |
|
|
11 |
cp <- merge(c, p, by.x = "CCLE_Name", by.y = "CCLE Code") |
|
|
12 |
p |
|
|
13 |
|
|
|
14 |
sum(cp$DepMap_ID %in% d$V1)/375 |
|
|
15 |
colnames(d) |
|
|
16 |
|
|
|
17 |
|
|
|
18 |
# ==== Compare to GDSC ==== |
|
|
19 |
require(data.table) |
|
|
20 |
gdsc1 <- fread("/Users/ftaj/OneDrive - University of Toronto/Drug_Response/Data/GDSC/GDSC1_Fitted_Dose_Response.csv") |
|
|
21 |
length(unique(gdsc1$CELL_LINE_NAME)) # 987 cell lines in GDSC2 |
|
|
22 |
gdsc2 <- fread("/Users/ftaj/OneDrive - University of Toronto/Drug_Response/Data/GDSC/GDSC2_Fitted_Dose_Response.csv") |
|
|
23 |
length(unique(gdsc2$CELL_LINE_NAME)) # 809 cell lines in GDSC2 |
|
|
24 |
|
|
|
25 |
gdsc_cells <- fread("/Users/ftaj/OneDrive - University of Toronto/Drug_Response/Data/GDSC/GDSC_Cell_Lines_Details.csv") |
|
|
26 |
|
|
|
27 |
# Find overlap with DepMap cell lines |
|
|
28 |
sum(toupper(gdsc_cells$`Sample Name`) %in% toupper(cells$stripped_cell_line_name)) / length(gdsc_cells$`Sample Name`) |
|
|
29 |
sum(toupper(gdsc_cells$`COSMIC identifier`) %in% toupper(cells$COSMICID)) / length(gdsc_cells$`Sample Name`) |
|
|
30 |
# COSMIC IDs have ~98% overlap |
|
|
31 |
# This implies that different names are being used |