[c3b4f8]: / R / 04_Repurposable_Drugs_and_Their_Interpretation.R

Download this file

1424 lines (1142 with data), 69.9 kB

   1
   2
   3
   4
   5
   6
   7
   8
   9
  10
  11
  12
  13
  14
  15
  16
  17
  18
  19
  20
  21
  22
  23
  24
  25
  26
  27
  28
  29
  30
  31
  32
  33
  34
  35
  36
  37
  38
  39
  40
  41
  42
  43
  44
  45
  46
  47
  48
  49
  50
  51
  52
  53
  54
  55
  56
  57
  58
  59
  60
  61
  62
  63
  64
  65
  66
  67
  68
  69
  70
  71
  72
  73
  74
  75
  76
  77
  78
  79
  80
  81
  82
  83
  84
  85
  86
  87
  88
  89
  90
  91
  92
  93
  94
  95
  96
  97
  98
  99
 100
 101
 102
 103
 104
 105
 106
 107
 108
 109
 110
 111
 112
 113
 114
 115
 116
 117
 118
 119
 120
 121
 122
 123
 124
 125
 126
 127
 128
 129
 130
 131
 132
 133
 134
 135
 136
 137
 138
 139
 140
 141
 142
 143
 144
 145
 146
 147
 148
 149
 150
 151
 152
 153
 154
 155
 156
 157
 158
 159
 160
 161
 162
 163
 164
 165
 166
 167
 168
 169
 170
 171
 172
 173
 174
 175
 176
 177
 178
 179
 180
 181
 182
 183
 184
 185
 186
 187
 188
 189
 190
 191
 192
 193
 194
 195
 196
 197
 198
 199
 200
 201
 202
 203
 204
 205
 206
 207
 208
 209
 210
 211
 212
 213
 214
 215
 216
 217
 218
 219
 220
 221
 222
 223
 224
 225
 226
 227
 228
 229
 230
 231
 232
 233
 234
 235
 236
 237
 238
 239
 240
 241
 242
 243
 244
 245
 246
 247
 248
 249
 250
 251
 252
 253
 254
 255
 256
 257
 258
 259
 260
 261
 262
 263
 264
 265
 266
 267
 268
 269
 270
 271
 272
 273
 274
 275
 276
 277
 278
 279
 280
 281
 282
 283
 284
 285
 286
 287
 288
 289
 290
 291
 292
 293
 294
 295
 296
 297
 298
 299
 300
 301
 302
 303
 304
 305
 306
 307
 308
 309
 310
 311
 312
 313
 314
 315
 316
 317
 318
 319
 320
 321
 322
 323
 324
 325
 326
 327
 328
 329
 330
 331
 332
 333
 334
 335
 336
 337
 338
 339
 340
 341
 342
 343
 344
 345
 346
 347
 348
 349
 350
 351
 352
 353
 354
 355
 356
 357
 358
 359
 360
 361
 362
 363
 364
 365
 366
 367
 368
 369
 370
 371
 372
 373
 374
 375
 376
 377
 378
 379
 380
 381
 382
 383
 384
 385
 386
 387
 388
 389
 390
 391
 392
 393
 394
 395
 396
 397
 398
 399
 400
 401
 402
 403
 404
 405
 406
 407
 408
 409
 410
 411
 412
 413
 414
 415
 416
 417
 418
 419
 420
 421
 422
 423
 424
 425
 426
 427
 428
 429
 430
 431
 432
 433
 434
 435
 436
 437
 438
 439
 440
 441
 442
 443
 444
 445
 446
 447
 448
 449
 450
 451
 452
 453
 454
 455
 456
 457
 458
 459
 460
 461
 462
 463
 464
 465
 466
 467
 468
 469
 470
 471
 472
 473
 474
 475
 476
 477
 478
 479
 480
 481
 482
 483
 484
 485
 486
 487
 488
 489
 490
 491
 492
 493
 494
 495
 496
 497
 498
 499
 500
 501
 502
 503
 504
 505
 506
 507
 508
 509
 510
 511
 512
 513
 514
 515
 516
 517
 518
 519
 520
 521
 522
 523
 524
 525
 526
 527
 528
 529
 530
 531
 532
 533
 534
 535
 536
 537
 538
 539
 540
 541
 542
 543
 544
 545
 546
 547
 548
 549
 550
 551
 552
 553
 554
 555
 556
 557
 558
 559
 560
 561
 562
 563
 564
 565
 566
 567
 568
 569
 570
 571
 572
 573
 574
 575
 576
 577
 578
 579
 580
 581
 582
 583
 584
 585
 586
 587
 588
 589
 590
 591
 592
 593
 594
 595
 596
 597
 598
 599
 600
 601
 602
 603
 604
 605
 606
 607
 608
 609
 610
 611
 612
 613
 614
 615
 616
 617
 618
 619
 620
 621
 622
 623
 624
 625
 626
 627
 628
 629
 630
 631
 632
 633
 634
 635
 636
 637
 638
 639
 640
 641
 642
 643
 644
 645
 646
 647
 648
 649
 650
 651
 652
 653
 654
 655
 656
 657
 658
 659
 660
 661
 662
 663
 664
 665
 666
 667
 668
 669
 670
 671
 672
 673
 674
 675
 676
 677
 678
 679
 680
 681
 682
 683
 684
 685
 686
 687
 688
 689
 690
 691
 692
 693
 694
 695
 696
 697
 698
 699
 700
 701
 702
 703
 704
 705
 706
 707
 708
 709
 710
 711
 712
 713
 714
 715
 716
 717
 718
 719
 720
 721
 722
 723
 724
 725
 726
 727
 728
 729
 730
 731
 732
 733
 734
 735
 736
 737
 738
 739
 740
 741
 742
 743
 744
 745
 746
 747
 748
 749
 750
 751
 752
 753
 754
 755
 756
 757
 758
 759
 760
 761
 762
 763
 764
 765
 766
 767
 768
 769
 770
 771
 772
 773
 774
 775
 776
 777
 778
 779
 780
 781
 782
 783
 784
 785
 786
 787
 788
 789
 790
 791
 792
 793
 794
 795
 796
 797
 798
 799
 800
 801
 802
 803
 804
 805
 806
 807
 808
 809
 810
 811
 812
 813
 814
 815
 816
 817
 818
 819
 820
 821
 822
 823
 824
 825
 826
 827
 828
 829
 830
 831
 832
 833
 834
 835
 836
 837
 838
 839
 840
 841
 842
 843
 844
 845
 846
 847
 848
 849
 850
 851
 852
 853
 854
 855
 856
 857
 858
 859
 860
 861
 862
 863
 864
 865
 866
 867
 868
 869
 870
 871
 872
 873
 874
 875
 876
 877
 878
 879
 880
 881
 882
 883
 884
 885
 886
 887
 888
 889
 890
 891
 892
 893
 894
 895
 896
 897
 898
 899
 900
 901
 902
 903
 904
 905
 906
 907
 908
 909
 910
 911
 912
 913
 914
 915
 916
 917
 918
 919
 920
 921
 922
 923
 924
 925
 926
 927
 928
 929
 930
 931
 932
 933
 934
 935
 936
 937
 938
 939
 940
 941
 942
 943
 944
 945
 946
 947
 948
 949
 950
 951
 952
 953
 954
 955
 956
 957
 958
 959
 960
 961
 962
 963
 964
 965
 966
 967
 968
 969
 970
 971
 972
 973
 974
 975
 976
 977
 978
 979
 980
 981
 982
 983
 984
 985
 986
 987
 988
 989
 990
 991
 992
 993
 994
 995
 996
 997
 998
 999
1000
1001
1002
1003
1004
1005
1006
1007
1008
1009
1010
1011
1012
1013
1014
1015
1016
1017
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028
1029
1030
1031
1032
1033
1034
1035
1036
1037
1038
1039
1040
1041
1042
1043
1044
1045
1046
1047
1048
1049
1050
1051
1052
1053
1054
1055
1056
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067
1068
1069
1070
1071
1072
1073
1074
1075
1076
1077
1078
1079
1080
1081
1082
1083
1084
1085
1086
1087
1088
1089
1090
1091
1092
1093
1094
1095
1096
1097
1098
1099
1100
1101
1102
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115
1116
1117
1118
1119
1120
1121
1122
1123
1124
1125
1126
1127
1128
1129
1130
1131
1132
1133
1134
1135
1136
1137
1138
1139
1140
1141
1142
1143
1144
1145
1146
1147
1148
1149
1150
1151
1152
1153
1154
1155
1156
1157
1158
1159
1160
1161
1162
1163
1164
1165
1166
1167
1168
1169
1170
1171
1172
1173
1174
1175
1176
1177
1178
1179
1180
1181
1182
1183
1184
1185
1186
1187
1188
1189
1190
1191
1192
1193
1194
1195
1196
1197
1198
1199
1200
1201
1202
1203
1204
1205
1206
1207
1208
1209
1210
1211
1212
1213
1214
1215
1216
1217
1218
1219
1220
1221
1222
1223
1224
1225
1226
1227
1228
1229
1230
1231
1232
1233
1234
1235
1236
1237
1238
1239
1240
1241
1242
1243
1244
1245
1246
1247
1248
1249
1250
1251
1252
1253
1254
1255
1256
1257
1258
1259
1260
1261
1262
1263
1264
1265
1266
1267
1268
1269
1270
1271
1272
1273
1274
1275
1276
1277
1278
1279
1280
1281
1282
1283
1284
1285
1286
1287
1288
1289
1290
1291
1292
1293
1294
1295
1296
1297
1298
1299
1300
1301
1302
1303
1304
1305
1306
1307
1308
1309
1310
1311
1312
1313
1314
1315
1316
1317
1318
1319
1320
1321
1322
1323
1324
1325
1326
1327
1328
1329
1330
1331
1332
1333
1334
1335
1336
1337
1338
1339
1340
1341
1342
1343
1344
1345
1346
1347
1348
1349
1350
1351
1352
1353
1354
1355
1356
1357
1358
1359
1360
1361
1362
1363
1364
1365
1366
1367
1368
1369
1370
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384
1385
1386
1387
1388
1389
1390
1391
1392
1393
1394
1395
1396
1397
1398
1399
1400
1401
1402
1403
1404
1405
1406
1407
1408
1409
1410
1411
1412
1413
1414
1415
1416
1417
1418
1419
1420
1421
1422
# repurposable_drugs_interpretation.R
require(data.table)
ctrp <- fread("Data/DRP_Training_Data/CTRP_AAC_SMILES.txt")
# gdsc2 <- fread("Data/DRP_Training_Data/GDSC2_AAC_SMILES.txt")
get_all_interpret <- function(data_types, split_type) {
cv_path = paste0("Data/CV_Results/HyperOpt_DRP_ResponseOnly_", data_types, "_HyperOpt_DRP_CTRP_ResponseOnly_EncoderTrain_Split_", split_type, "_NoBottleNeck_NoTCGAPretrain_MergeByLMF_WeightedRMSELoss_GNNDrugs_", data_types, "/")
interpret_paths = list.files(path = cv_path, pattern = ".*final_interpretation.*", full.names = T)
all_files = vector(mode = "list", length = length(interpret_paths))
for (i in 1:length(interpret_paths)) {
cur_file <- fread(interpret_paths[i])
all_files[[i]] <- cur_file
}
final_set = rbindlist(all_files)
return(final_set)
}
get_top_attrs <- function(integ_data, compound_name, cell_line_name) {
cur_integ <- integ_data[cpd_name == compound_name & cell_name == cell_line_name]
cur_integ$V1 <- NULL
cur_integ_melt <- melt(cur_integ, id.vars = c("cpd_name", "cell_name", "target", "predicted", "RMSE_loss", "interpret_delta"))
setorder(cur_integ_melt, -value)
return(cur_integ_melt)
}
path_dict <- vector("list", length = 8)
path_dict[['MUT']] <- "Data/DRP_Training_Data/DepMap_21Q2_Mutations_by_Cell.csv"
path_dict[['CNV']] <- "Data/DRP_Training_Data/DepMap_21Q2_CopyNumber.csv"
path_dict[['EXP']] <- "Data/DRP_Training_Data/DepMap_21Q2_Expression.csv"
path_dict[['PROT']] <- "Data/DRP_Training_Data/DepMap_20Q2_No_NA_ProteinQuant.csv"
path_dict[['MIRNA']] <- "Data/DRP_Training_Data/DepMap_2019_miRNA.csv"
path_dict[['METAB']] <- "Data/DRP_Training_Data/DepMap_2019_Metabolomics.csv"
path_dict[['HIST']] <- "Data/DRP_Training_Data/DepMap_2019_ChromatinProfiling.csv"
path_dict[['RPPA']] <- "Data/DRP_Training_Data/DepMap_2019_RPPA.csv"
get_expression_in_cell_line <- function(data_type, cell_line_name, variable_name) {
cur_path <- path_dict[[data_type]]
cur_depmap <- fread(cur_path)
# dim(depmap_mirna)
# depmap_mirna[1:10, 1:10]
# Get primary disease of the given cell line
cur_cancer_type <- unique(cur_depmap[stripped_cell_line_name == cell_line_name]$primary_disease)
cur_col_idx <- which(colnames(cur_depmap) == variable_name)
print(cur_depmap[stripped_cell_line_name == cell_line_name, c(1:2, cur_col_idx), with = F])
if (data_type != "MUT") {
cur_value <- unlist(cur_depmap[stripped_cell_line_name == cell_line_name, cur_col_idx, with = F])
cur_quantile_func <- ecdf(unlist(cur_depmap[, cur_col_idx, with = F]))
print("Expression Percentile in All Cell Lines")
print(cur_quantile_func(cur_value))
cur_quantile_func <- ecdf(unlist(cur_depmap[primary_disease == cur_cancer_type, cur_col_idx, with = F]))
print("Expression Percentile in Cell Lines from the Same Cancer Type")
print(cur_quantile_func(cur_value))
} else {
print(cur_depmap[primary_disease == cur_cancer_type, cur_col_idx, with = F])
}
}
# require(clusterProfiler)
# require(pathview)
# organism = "org.Hs.eg.db"
# # BiocManager::install(organism, character.only = TRUE)
# library(organism, character.only = TRUE)
# keytypes(get(organism))
# org.Hs.eg.db
# ggsave("Plots/Interpretation/IntegratedGradients//gnndrug_prot_697_Paclitaxel_GSE_bottom_5.pdf", p_bottom_prot,
# width = 20, units = "in")
# Find the best re-purposable drugs, perform IntegratedGradients Interpretation
# final_data <- fread("Data/repurposable_drugs_table.csv")
final_data <- fread("Data/repurposable_drugs_table.csv")
# Regorafenib ----
# Regorafenib (BAY 73-4506, Stivarga®) is an oral diphenylurea multi-kinase inhibitor
# that targets angiogenic (VEGFR1-3, TIE2), stromal (PDGFR-β, FGFR), and oncogenic
# receptor tyrosine kinases (KIT, RET, and RAF).
final_data[cpd_name == "Regorafenib"]
# Assigned for Colorectal Cancer, Good for Leukemia (AML)
# Currently in Phase I clinical trials (Study of Regorafenib in Patients With Advanced Myeloid Malignancies)
# https://clinicaltrials.gov/ct2/show/NCT03042689
# EOL1 cell line AAC: 0.992
# MV411 cell line AAC: 0.524
# Best data type for EOL1 is MUT_PROT
mut_prot_integ <- get_all_interpret(data_types = "gnndrug_mut_prot", split_type = "CELL_LINE")
dim(mut_prot_integ)
min(mut_prot_integ$target)
max(mut_prot_integ$target)
max(mut_prot_integ$RMSE_loss)
# mirna_metab_integ <- fread("Data/CV_Results/HyperOpt_DRP_ResponseOnly_gnndrug_mirna_metab_HyperOpt_DRP_CTRP_ResponseOnly_EncoderTrain_Split_CELL_LINE_NoBottleNeck_NoTCGAPretrain_MergeByLMF_WeightedRMSELoss_GNNDrugs_gnndrug_mirna_metab/inter")
colnames(mut_prot_integ)[1:10]
unique(mut_prot_integ$cell_name)
## EOL1 ----
### AAC vs EXP/CNV ====
mut_prot_integ <- get_all_interpret(data_types = "gnndrug_mut_prot", split_type = "CELL_LINE")
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
regorafenib_eol1_attrs <- get_top_attrs(mut_prot_integ, compound_name = "Regorafenib",
cell_line_name = "EOL1")
regorafenib_eol1_attrs_cnv_exp <- get_top_attrs(cnv_exp_integ, compound_name = "Regorafenib",
cell_line_name = "EOL1")
regorafenib_eol1_attrs[variable %like% "VEGF"] # very small value...
regorafenib_eol1_attrs_cnv_exp[variable %like% "VEGF"] # very small value...
regorafenib_eol1_attrs[variable %like% "TIE2"] # very small value...
regorafenib_eol1_attrs[variable %like% "PDFGFR"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFR1"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFR2"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFR3"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFR4"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFRL1"] # very small value...
regorafenib_eol1_attrs[variable %like% "KIT"] # very small value...
regorafenib_eol1_attrs[variable %like% "RET"] # very small value...
regorafenib_eol1_attrs[variable %like% "RAF"] # very small value...
# NONE OF THE KINASES ARE ACTUALLY IN THE PROTEIN QUANTIFICATION DATA
regorafenib_eol1_attrs[variable %like% "P17948"]
regorafenib_eol1_attrs[variable %like% "P35968"]
regorafenib_eol1_attrs[variable %like% "P35916"]
regorafenib_eol1_attrs[variable %like% "Q02763"]
regorafenib_eol1_attrs[variable %like% "P16234"]
regorafenib_eol1_attrs[variable %like% "P10721"]
regorafenib_eol1_attrs[variable %like% "P07949"]
MUT <- fread("Data/DRP_Training_Data/DepMap_21Q2_Mutations_by_Cell.csv")
CNV <- fread("Data/DRP_Training_Data/DepMap_21Q2_CopyNumber.csv")
EXP <- fread("Data/DRP_Training_Data/DepMap_21Q2_Expression.csv")
# Check EXP data for expression
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "VEGFA")
# 96th percentile in all, 96 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "TIE2")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "PDFGFR")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "FGFR1")
# 37th percentile in all, 54 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "FGFR2")
# 23th percentile in all, 57 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "FGFR3")
# 67th percentile in all, 85 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "FGFR4")
# 7th percentile in all, 18 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "FGFRL1")
# 49th percentile in all, 78 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "KIT")
# 68th percentile in all, 54 in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "RET")
# 92th percentile in all, 86th in leukemia
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "RAF1")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "BRAF")
# Check PROT data for expression
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P17948") # VEGFR1
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P35968") # VEGFR2
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P35916") # VEGFR3
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "Q02763") # TIE2
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P16234") # PDFGFR
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P10721") # KIT
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "P07949") # RET
# Check MUT data for mutations
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "VEGFA")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "TIE2")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "PDFGFR")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "FGFR1")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "FGFR2")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "FGFR3")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "FGFR4")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "FGFRL1")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "KIT")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "RET")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "RAF1")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "BRAF")
# Expression of BTK is in the 46th percentile (among all cell lines)
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "EFM192A", variable_name = "BTK")
# Expression of BTK is in the 46th percentile (among all cell lines)
drug_name <- "Regorafenib"
gene_name <- "BTK"
cell_line_name <- "EFM192A"
# Find expression of BTK in all cell lines that ibrutinib was tested in, plot it against AAC
cur_cell_aac <- unique(ctrp[cpd_name == drug_name][, c("ccl_name", "area_above_curve")])
cur_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
gene_name), with = F]
exp_aac_subset <- merge(cur_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(exp_aac_subset)[2] <- "value"
# Find copy number of BTK in all cell lines that ibrutinib was tested in, plot it against AAC
cur_cnv_subset <- CNV[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
gene_name), with = F]
cnv_aac_subset <- merge(cur_cnv_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(cnv_aac_subset)[2] <- "value"
require(ggplot2)
require(patchwork)
p_exp_aac <- ggplot(data = exp_aac_subset) +
geom_point(aes(x = area_above_curve, y = value)) +
xlab("Area Above Curve") +
ylab("Gene Expression") +
annotate(geom = "point",
x = exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve,
y = exp_aac_subset[stripped_cell_line_name == cell_line_name]$value,
colour = "orange", size = 1) +
annotate(
geom = "curve",
x = 0.6, y = 2,
xend = exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve - 0.005,
yend = exp_aac_subset[stripped_cell_line_name == cell_line_name]$value + 0.2,
curvature = -.3, arrow = arrow(length = unit(2, "mm"))
) +
annotate(geom = "text", x = 0.6, y = 2.3, label = "EFM192A", size = 6,
) +
theme(text = element_text(size = 14, face = "bold"))
p_cnv_aac <- ggplot(data = cnv_aac_subset) +
geom_point(aes(x = area_above_curve, y = value)) +
xlab("Area Above Curve") +
ylab("Copy Number") +
annotate(geom = "point",
x = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve,
y = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$value,
colour = "orange", size = 1) +
annotate(
geom = "curve",
x = 0.6, y = 1.35,
xend = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve - 0.005,
yend = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$value + 0.05,
curvature = -.3, arrow = arrow(length = unit(2, "mm"))
) +
annotate(geom = "text", x = 0.6, y = 1.4, label = "EFM192A", size = 6,
) +
theme(text = element_text(size = 14, face = "bold"))
p_exp_aac + p_cnv_aac
ggsave("Plots/Interpretation/BTK_EXP_CNV_vs_AAC.pdf")
### Initial analysis ====
regorafenib_eol1_attrs <- get_top_attrs(mut_prot_integ, compound_name = "Regorafenib",
cell_line_name = "EOL1")
regorafenib_eol1_attrs[variable %like% "VEGF"] # very small value...
regorafenib_eol1_attrs[variable %like% "TIE2"] # very small value...
regorafenib_eol1_attrs[variable %like% "PDFGFR"] # very small value...
regorafenib_eol1_attrs[variable %like% "FGFR"] # very small value...
regorafenib_eol1_attrs[variable %like% "KIT"] # very small value...
regorafenib_eol1_attrs[variable %like% "RET"] # very small value...
regorafenib_eol1_attrs[variable %like% "RAF"] # very small value...
# Top positive attributed variables:
head(regorafenib_eol1_attrs)
# Top 3 positive attributions:
# (all in mutational data)
depmap_mut <- fread("Data/DepMap/21Q2/CCLE_mutations.csv")
# n_TAS2R20
# Paper: The Role of Bitter Taste Receptors in Cancer: A Systematic Review (2021)
# the agonist-related activation and overexpression of TAS2Rs per se induced various anti-cancer effects, leading to the
# hypothesis that TAS2Rs impact carcinogenesis and could serve as a target in cancer therapy by interfering with typical
# capabilities of cancerous cells, known as the hallmarks of cancer
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "TAS2R20")
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "TAS2R20")
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "EOL1", variable_name = "TAS2R20")
sum(temp$TAS2R20) # 1 only
# ACH-000198 cell line is EOL1
depmap_mut[DepMap_ID == "ACH-000198" & Hugo_Symbol %like% "TAS2R20"]
# n_OSBP2
# Paper: hHLM/OSBP2 is expressed in chronic myeloid leukemia (2003)
# Oxysterols are oxygenated derivatives of cholesterol that have been shown to influence a wide variety of cellular
# processes including sterol metabolism, lipid trafficking, apoptosis and more recently, cell differentiation.
# The oxysterol binding proteins (OSBPs) comprise a large conserved family of proteins in eukaryotes with high affinity
# for oxysterols, but their precise function has not been defined yet. One member of this family in humans, HLM/OSBP2
# protein, has recently been reported as a potential marker for solid tumor dissemination and worse prognosis in these cases.
# In this study we focused on the evaluation of HLM/OSBP2 expression in malignant cell lines from different origins
# (blood and solid tumors) and we also evaluated its expression in chronic myeloid leukemia patients, correlating the
# molecular findings with clinical outcome. Our results showed that HLM/OSBP2 was expressed in 80% of the analysed CML
# patients, suggesting that this protein could constitute a helpful tool for disease monitoring and reinforces recent
# findings that HLM/OSBP2 protein could be involved in the maintenance of the undifferentiated state necessary for
# leukemogenesis
temp <- get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "OSBP2")
sum(temp$OSBP2) # only 1
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "OSBP2")
get_expression_in_cell_line(data_type = "PROT", cell_line_name = "EOL1", variable_name = "OSBP2")
depmap_mut[DepMap_ID == "ACH-000198" & Hugo_Symbol %like% "OSBP2"]
# n_SLC1A6
# Papers: Investigating the microRNA-mRNA regulatory network in acute myeloid leukemia (2017)
# SLC1A3 contributes to L-asparaginase resistance in solid tumors (2019)
# https://www.genecards.org/cgi-bin/carddisp.pl?gene=SLC1A6
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "SLC1A6")
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EOL1", variable_name = "SLC1A3")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "SLC1A6")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EOL1", variable_name = "SLC1A3")
# Top 2 negative attributions:
tail(cur_integ_melt)
## Regorafenib FlexTable ====
require(flextable)
require(magrittr)
require(scales)
require(officer)
regorafenib_final_data <- final_data[cpd_name == "Regorafenib" &
primary_disease == "Leukemia"]
regorafenib_final_data <- regorafenib_final_data[, head(.SD, 1), by = "cell_name"]
setcolorder(regorafenib_final_data, c(
"cell_name", "lineage_subtype",
"data_types", "target", "pred", "RMSE"))
regorafenib_final_data[, data_types := gsub("_", "+", data_types)]
regorafenib_final_data$cpd_name <- NULL
regorafenib_final_data$primary_disease <- NULL
regorafenib_final_data$assigned_disease <- NULL
regorafenib_final_data$lineage <- NULL
regorafenib_final_data$highest_drug_match_disease_aac <- NULL
regorafenib_final_data$split_method <- NULL
regorafenib_final_data$lineage <- NULL
# ibrutinib_final_data$lineage_subtype <- NULL
# "Data Type(s)", "Split Method", "True AAC", "Prediction", "RMSE Loss"
colnames(regorafenib_final_data) <- c(
"Cell Line", "Lineage Subtype",
"Data Type(s)", "True AAC", "Prediction", "MAE Loss")
regorafenib_final_data <- unique(regorafenib_final_data)
# regorafenib_final_data$Lineage <- tools::toTitleCase(gsub("_", " ", regorafenib_final_data$Lineage))
regorafenib_final_data$`Lineage Subtype` <- tools::toTitleCase(gsub("_", " ", regorafenib_final_data$`Lineage Subtype`))
ft <- flextable(regorafenib_final_data)
final_ft <- ft %>%
# merge_v(j = c("Cancer", "Prescribed Drug(s)", "Cell Line Primary Disease")) %>%
merge_v() %>%
border_inner(border = fp_border(color="gray", width = 1)) %>%
border_outer(part="all", border = fp_border(color="gray", width = 2)) %>%
align(align = "center", part = "all")
final_ft <- autofit(final_ft)
dir.create("Plots/Drug_Tables/")
read_docx() %>%
body_add_flextable(value = final_ft) %>%
print(target = "Plots/Drug_Tables/Regorafenib_Table.docx")
# Axitinib ----
final_data[cpd_name == "Axitinib"]
# Assigned for Kidney Cancer, Good for Blood (AML and CML) and Lung (NSCLC)
# "Axitinib effectively inhibits BCR-ABL1(T315I) with a distinct binding conformation" (March 2015) for Leukemia
# "Axitinib for the treatment of advanced non-small-cell lung cancer" (June 2013) for NSCLC Lung Cancer
# ==== Axitinib and EOL1 (AML)
prot_mirna_integ <- get_all_interpret(data_types = "gnndrug_prot_mirna", split_type = "CELL_LINE")
axitinib_eol1_attrs <- get_top_attrs(prot_mirna_integ, compound_name = "Axitinib", cell_line_name = "EOL1")
# Top positive attributed variables:
head(axitinib_eol1_attrs)
# hsv2-miR-H2 (viral miRNA)
get_expression_in_cell_line(data_type = "MIRNA", cell_line_name = "EOL1", variable_name = "hsv2-miR-H2")
# Expression of hsv2-miR-H2 is in ~99 percentile, high compared to other cell lines
# This cell line may be infected with some virus (e.g. herpes) or that this miRNA has oncogenic properties in AML (EOL1)
# It's show to have oncogenic activity in prostate cancer
# Paper: Increased Expression of Herpes Virus-Encoded hsv1-miR-H18 and hsv2-miR-H9-5p in Cancer-Containing Prostate Tissue
# Compared to That in Benign Prostate Hyperplasia Tissue (2016)
# https://www.cancer.org/cancer/cancer-causes/infectious-agents/infections-that-can-lead-to-cancer/viruses.html
# miR-1247
# Paper: Epigenetically altered miR-1247 functions as a tumor suppressor in pancreatic cancer
# Top negative attributed variables
tail(axitinib_eol1_attrs)
# hsa-miR-124
get_expression_in_cell_line(data_type = "MIRNA", cell_line_name = "EOL1", variable_name = "hsa-miR-124")
# 99 percentile, is a tumor suppressor!
# Paper: Methylation-mediated silencing and tumour suppressive function of hsa-miR-124 in cervical cancer (2010)
# Belinostat ----
final_data[cpd_name == "Belinostat"]
# Assigned for Lymphoma, Good for Neuroblastoma (pediatric) and Leukemia (AML)
# Selective Inhibition of HDAC Class I Sensitizes Leukemia and Neuroblastoma Cells to Anticancer Drugs (2021)
exp_rppa_integ <- get_all_interpret(data_types = "gnndrug_exp_rppa", split_type = "CELL_LINE")
belinostat_imr32_attrs <- get_top_attrs(exp_rppa_integ, compound_name = "Belinostat", cell_line_name = "IMR32")
# Top positive attributed variables:
head(belinostat_imr32_attrs)
# YAP_pS127, YAP Protein (only when phosphorylated at serine 127)
# Potential target for cancer treatment: https://www.genecards.org/cgi-bin/carddisp.pl?gene=YAP1
# Paper: Histone Acetylation-Mediated Regulation of the Hippo Pathway (2013)
# YAP is involved in the Hippo pathway, which is related to angiogenesis
# Paper: YAP and the Hippo pathway in pediatric cancer (2017)
# IMR32 was sampled from a pediatric tumor sample
# HER3 / ERBB
# Paper: Signaling of ERBB Receptor Tyrosine Kinases Promotes Neuroblastoma Growth in vitro and in vivo
# PRKCD (PKC delta)
# Interacts with ERBB2
# Top negative attributed variables
tail(belinostat_imr32_attrs)
# Raptor (RPTOR), involved in the mTOR pathway
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "IMR32", variable_name = "Raptor")
# 100 percentile, highly express, so is it a tumor supporessor?
# Paper: mTOR Interacts with Raptor to Form a Nutrient-Sensitive Complex that Signals to the Cell Growth Machinery (2002)
# "...The association of raptor with mTOR also negatively regulates the mTOR kinase activity..."
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "IMR32", variable_name = "GATA3")
# 96th percentile, highly expressed, is it a tumor suppressor?
# Paper: GATA-3 expression in breast cancer has a strong association with estrogen receptor but lacks independent prognostic value (2008)
# ...In univariate analysis, the presence of GATA-3 is a marker of good prognosis and predicted for superior breast
# cancer-specific survival, relapse-free survival, and overall survival...
# Paper: GATA3 is a reliable marker for neuroblastoma in limited samples, including FNA Cell Blocks, core biopsies,
# and touch imprints (2017)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "IMR32", variable_name = "Bak_Caution")
# 94th percentile, Highly expressed
# https://www.genecards.org/cgi-bin/carddisp.pl?gene=BAK1
# Is pro-apoptotic, and also interacts with p53
# Bosutinib ----
final_data[cpd_name == "Bosutinib"]
# Assigned for Leukemia, Good for Lung cancer (NSCLC)
# Bosutinib inhibits migration and invasion via ACK1 in KRAS mutant non-small cell lung cancer (2014)
# MUT_CNV has lowest RMSE
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
# OR2T34
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "PC14", variable_name = "OR2T34")
# ~ 100th percentile, is it oncogenic?
# Genetic Features of Lung Adenocarcinoma with Ground-Glass Opacity: What Causes the Invasiveness of Lung Adenocarcinoma? (2020)
# ... Among the mutant genes commonly expressed in GGO and non-GGO LUAD, the top 10 most significant genes were OR2T34...
# ...Among them, OR2T34 was the most frequently-appearing gene...
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "PC14", variable_name = "FAM25E")
# ~ 94th percentile, is it oncogenic?
# Top negative attributed variables
tail(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "PC14", variable_name = "KDM5A")
# ~ 100th percentile, is it a tumor suppressor?
# https://www.genecards.org/cgi-bin/carddisp.pl?gene=KDM5A
# Implicated in the transcriptional regulation of Hox genes and cytokines, may play a role in tumor progression
# "Seems to act as a transcriptional corepressor for some genes such as MT1F and to favor the proliferation of cancer cells"
# Cabozantinib ----
final_data[cpd_name == "Cabozantinib"]
# Assigned for Thyroid Cancer, Good for Leukemia (AML)
# Cabozantinib is selectively cytotoxic in acute myeloid leukemia cells with FLT3-internal tandem duplication (FLT3-ITD) (2016)
cnv_rppa_integ <- get_all_interpret(data_types = "gnndrug_cnv_rppa", split_type = "CELL_LINE")
cabozantinib_molm13_attrs <- get_top_attrs(cnv_rppa_integ, compound_name = "Cabozantinib", cell_line_name = "MOLM13")
# Top positive attributed variables:
head(cabozantinib_molm13_attrs)
# n_4E-BP1_pT37_T46 antibody -> EIF4EBP1
# Paper: 4E-BP1, a multifactor regulated multifunctional protein (2016)
# ...It is likely that ERK acts directly on 4E-BP1 and indirectly via TSC2/mTOR following ionizing radiation (IR)
# and stimulates protein synthesis via ATM-dependent ERK phosphorylation...
# Paper: Eukaryotic initiation factor 4E-binding protein 1 (4E-BP1): a master regulator of mRNA translation involved in tumorigenesis (2016)
# Paper: Bcr-Abl Kinase Modulates the Translation Regulators Ribosomal Protein S6 and 4E-BP1 in Chronic Myelogenous Leukemia Cells via the Mammalian Target
# of Rapamycin
# BCL-2, eIF4E, FoxM1
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "4E-BP1_pT37_T46")
# 99th percentile, is it an oncogene?
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
# B-Raf
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# 2nd percentile, underexpressed...
# Perhaps this underexpression is strange to the model...
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "eEF2K")
# 6th percentile, underexpressed...
# Perhaps this underexpression is strange to the model...
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "PCNA_Caution")
# 99th percentile, Overexpressed...
# Crizotinib ----
final_data[cpd_name == "Crizotinib"]
# Assigned for Lung Cancer, Good for Leukemia (CML)
# Crizotinib acts as ABL1 inhibitor combining ATP-binding with allosteric inhibition and is active against
# native BCR-ABL1 and its resistance and compound mutants BCR-ABL1T315I and BCR-ABL1T315I-E255K (2021)
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
crizotinib_jk1_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Crizotinib", cell_line_name = "JK1")
# Top positive attributed variables:
head(crizotinib_jk1_attrs)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "JK1", variable_name = "ACSBG1")
# ~ 100th percentile, overexpressed
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "JK1", variable_name = "ACSBG1")
# ~ 37th percentile
# Weak link Global Identification of EVI1 Target Genes in Acute Myeloid Leukemia
# https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3694976/
# Top negative attributed variables
tail(crizotinib_jk1_attrs)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "JK1", variable_name = "HBM")
# 100th percentile, overexpressed, the model doesn't expect this for CML?
# https://genevisible.com/cancers/HS/Gene%20Symbol/HBM
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "JK1", variable_name = "HBM")
# ~ 47th percentile
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "JK1", variable_name = "OR51V1")
# 100th percentile
# Olfactory receptor,
# Unique Polymorphisms at BCL11A, HBS1L-MYB and HBB Loci Associated with HbF in Kuwaiti Patients with Sickle Cell Disease (2021)
# Dabrafenib ----
final_data[cpd_name == "Dabrafenib"]
ctrp[cpd_name %like% "vemurafenib"] # not in the dataset
gdsc2[cpd_name %like% "vemurafenib"] # not in the dataset
# Assigned for Lung Cancer, Good for breast cancer (BRAF V600E) and Skin Cancer (melanoma)
# Dabrafenib binds and inhibits mutated BRAF
# It had been already approved for melanoma since 2013, but use in breast cancer is novel
# The model predicts the effect on DU4475 well, but doesn't consider BRAF in its decisions...
# DU4475 cell line page: https://web.expasy.org/cellosaurus/CVCL_1183
# Five-Year Outcomes with Dabrafenib plus Trametinib in Metastatic Melanoma (2019)
mut_exp_integ <- get_all_interpret(data_types = "gnndrug_mut_exp", split_type = "CELL_LINE")
dabrafenib_du4475_attrs <- get_top_attrs(mut_exp_integ, compound_name = "Dabrafenib", cell_line_name = "DU4475")
# Top positive attributed variables:
head(dabrafenib_du4475_attrs)
# OR4F21
# Paper: Olfactory Receptors as Biomarkers in Human Breast Carcinoma Tissues (2018)
# ...Furthermore, we observed the expression of ORs in brain tissue and brain tumor tissue (Figure ​(Figure5C).5C).
# It was striking that there were only three ORs (OR4F21, OR1F1, and OR2B6) expressed in cancerous tissues and cell lines,
# whereas several ORs were expressed in three different healthy brain tissues...
# Paper: Genome-wide copy number analysis of circulating tumor cells in breast cancer patients with liver metastasis (2020)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "OR4F21")
# ~ 99th percentile, overexpressed
# DNAJC5G
# Paper: Identification of novel methylation markers in HPV-associated oropharyngeal cancer: genome-wide discovery,
# tissue verification and validation testing in ctDNA (2020)
# ...Further study on ctDNA using Q-MSP in HPV-associated OPC showed that three genes (CALML5, DNAJC5G, and LY6D) had a
# high predictive ability as emerging biomarkers for a validation set, each capable of discriminating between the plasma
# of the patients from healthy individuals...
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "DNAJC5G")
# ~100th percentile, overexpressed
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "DU4475", variable_name = "BRAF")
dabrafenib_du4475_attrs[variable %like% "BRAF"] # Very low value: -2.79308e-05
# Top negative attributed variables
tail(dabrafenib_du4475_attrs)
# C16orf82 (TNT protein)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "C16orf82")
# 100th percentile, highly expressed, does it act as a tumor suppressor?
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "MRGPRG")
# 100th percentile, highly expressed, does it act as a tumor suppressor?
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "ADAD2")
# 100th percentile, highly expressed, does it act as a tumor suppressor?
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "DU4475", variable_name = "CA6")
# 100th percentile, highly expressed, does it act as a tumor suppressor?
# Ibrutinib ----
final_data[cpd_name == "Ibrutinib"]
# Assigned for Leukemia, Good for Breast Cancer, Lung Cancer (NSCLC), Colon/Colorectal Cancer, Gastric Cancer,
# and Lymphoma
## TE617T (Sarcoma, soft tissue, rhabdomyosarcoma) ====
# Paper: Ibrutinib inhibition of ERBB4 reduces cell growth in a WNT5A-dependent manner (2018), mentions Ewing’s sarcoma and
# triple negative breast cancer
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
ibrutinib_te617t_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Ibrutinib", cell_line_name = "TE617T")
ibrutinib_te617t_attrs[variable %like% "BTK"] # very small value...
## EFM192A, AU565, SKBR3, ZR7530 and HCC1419 Breast Cancer cell lines ----
# Ibrutinib treatment inhibits breast cancer progression and metastasis by inducing conversion of myeloid-derived
# suppressor cells to dendritic cells (2020)
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
ibrutinib_EFM192A_attrs[variable %like% "BTK"] # very small value...
ibrutinib_EFM192A_attrs[variable %like% "ITK"] # very small value...
ibrutinib_EFM192A_attrs[variable %like% "EGFR"] # very small value...
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "BTK")
# Expression of BTK is in the 46th percentile among all cell lines, and 47th in breast cancer
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "EFM192A", variable_name = "BTK")
# CNV of BTK is in the 93rd percentile (among all cell lines)
# Top positive attributed variables:
head(ibrutinib_EFM192A_attrs, n = 10)
quantile(ibrutinib_EFM192A_attrs$value)
# MAGEA8, CRLF3, FAM91A1, DERL1, THAP3, PSMA1, TOB1-AS1, FAM90A26, CDC27, DNTTIP1
sum(ibrutinib_EFM192A_attrs[value >= 0]$value)
sum(ibrutinib_EFM192A_attrs[value < 0]$value)
# MAGEA8
# Paper: Prognostic roles of MAGE family members in breast cancer based on KM‑Plotter Data (2019)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "MAGEA8")
# 98th percentile in all cell lines
# 97th percentile in breast cancer cell lines
# CRLF3
# Paper: Comprehensive molecular biomarker identification in breast cancer brain metastases (2017)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "CRLF3")
# 2nd percentile in all cell lines
# 18th percentile in breast cancer cell lines
# Seems to be downregulated
# DERL1
# Paper: Derlin-1 functions as a growth promoter in breast cancer (2020)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "DERL1")
# 100th percentile in all cell lines
# FAM91A1
# Chromatin interactome mapping at 139 independent breast cancer risk signals (2020)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "FAM91A1")
# 99.9th percentile in all cell lines
# 100th percentile in breast cancer
# Top negative attributed variables
tail(ibrutinib_EFM192A_attrs)
# ODF4
# Upregulation of RHOXF2 and ODF4 Expression in Breast Cancer Tissues (2015)
# ... The expression of both genes was correlated with HER2/neu overexpression...
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "ODF4")
# 100th percentile in all cell lines
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "EFM192A", variable_name = "ODF4")
# 16th percentile
# ACTA1
# The remodelling of actin composition as a hallmark of cancer (2021)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "ACTA1")
# 100th percentile
# LYZL2
# LYZL2 expression associates with survival in triple negative breast cancer (preprint)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "LYZL2")
# 100th percentile
### EFM192A FlexTable ----
require(flextable)
require(magrittr)
require(scales)
require(officer)
ibrutinib_final_data <- final_data[cpd_name == "Ibrutinib" &
primary_disease == "Breast Cancer"]
cnv_exp_sub <- ibrutinib_final_data[data_types == "CNV_EXP"]
ibrutinib_final_data <- ibrutinib_final_data[, head(.SD, 1), by = "cell_name"]
setcolorder(ibrutinib_final_data, c(
"cell_name", "lineage_subtype",
"data_types", "target", "pred", "RMSE"))
ibrutinib_final_data[, data_types := gsub("_", "+", data_types)]
ibrutinib_final_data$cpd_name <- NULL
ibrutinib_final_data$primary_disease <- NULL
ibrutinib_final_data$assigned_disease <- NULL
ibrutinib_final_data$lineage <- NULL
ibrutinib_final_data$highest_drug_match_disease_aac <- NULL
ibrutinib_final_data$split_method <- NULL
ibrutinib_final_data$lineage <- NULL
# ibrutinib_final_data$lineage_subtype <- NULL
# "Data Type(s)", "Split Method", "True AAC", "Prediction", "RMSE Loss"
colnames(ibrutinib_final_data) <- c(
"Cell Line", "Lineage Subtype",
"Data Type(s)", "True AAC", "Prediction", "MAE Loss")
ibrutinib_final_data <- unique(ibrutinib_final_data)
ibrutinib_final_data$`Lineage Subtype` <- tools::toTitleCase(gsub("_", " ", ibrutinib_final_data$`Lineage Subtype`))
ft <- flextable(ibrutinib_final_data)
final_ft <- ft %>%
# merge_v(j = c("Cancer", "Prescribed Drug(s)", "Cell Line Primary Disease")) %>%
merge_v() %>%
border_inner(border = fp_border(color="gray", width = 1)) %>%
border_outer(part="all", border = fp_border(color="gray", width = 2)) %>%
align(align = "center", part = "all")
final_ft <- autofit(final_ft)
dir.create("Plots/Drug_Tables/")
read_docx() %>%
body_add_flextable(value = final_ft) %>%
print(target = "Plots/Drug_Tables/Ibrutinib_Table.docx")
### EFM192A GSEA ----
require(clusterProfiler)
require(pathview)
organism = "org.Hs.eg.db"
# BiocManager::install(organism, character.only = TRUE)
library(organism, character.only = TRUE)
keytypes(get(organism))
org.Hs.eg.db
# Get top 5% of attributions from EXP in each of positive and negative attributions
get_top_and_bottom_attrs <- function(all_attrs, omic_grep) {
# Get attributes from specified omic data type
cur_attrs <- all_attrs[variable %like% omic_grep]
# Separate positive and negative attributes
cur_attr_pos <- cur_attrs[value >= 0]
cur_attr_neg <- cur_attrs[value < 0]
# Get top 5 and bottom 5 percentile attributes
attr_top_5 <- cur_attr_pos[value > quantile(cur_attr_pos$value, 0.95)]
attr_bot_5 <- cur_attr_neg[value < quantile(cur_attr_neg$value, 0.05)]
return(list(attr_top_5 = attr_top_5,
attr_bot_5 = attr_bot_5))
}
exp_top_bot_attrs <- get_top_and_bottom_attrs(all_attrs = ibrutinib_EFM192A_attrs, omic_grep = "exp_.+")
top_exp_variables <- gsub(pattern = "exp_", "", exp_top_bot_attrs[[1]]$variable)
top_exp_variables <- setNames(exp_top_bot_attrs[[1]]$value, top_exp_variables)
cnv_top_bot_attrs <- get_top_and_bottom_attrs(all_attrs = ibrutinib_EFM192A_attrs, omic_grep = "cnv_.+")
top_cnv_variables <- gsub(pattern = "cnv_", "", cnv_top_bot_attrs[[1]]$variable)
top_cnv_variables <- setNames(cnv_top_bot_attrs[[1]]$value, top_cnv_variables)
cnv_exp_top_variables <- c(top_exp_variables, top_cnv_variables)
cnv_exp_top_variables <- sort(cnv_exp_top_variables, decreasing = T)
top_gse_cnv_exp <- gseGO(
geneList = cnv_exp_top_variables,
ont = "ALL",
keyType = "SYMBOL",
# nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
OrgDb = get(organism),
scoreType = "pos",
pAdjustMethod = "BH"
)
p_top_cnv_exp <- ridgeplot(top_gse_cnv_exp) + labs(x = "enrichment distribution") +
ggtitle("Top 5% EXP Attributions GSE",
subtitle = "Cell-line EFM192A (Breast Adenocarcinoma) + Ibrutinib\nTarget: 0.66, Predicted: 0.51")
exp_attrs <- ibrutinib_EFM192A_attrs[variable %like% "exp_.+"]
all_exp_variables <- gsub(pattern = "exp_", "", exp_attrs$variable)
all_exp_variables <- setNames(exp_attrs$value, all_exp_variables)
#
# all_gse_exp <- gseGO(
# geneList = all_exp_variables,
# ont = "ALL",
# keyType = "SYMBOL",
# nPerm = 10000,
# minGSSize = 3,
# maxGSSize = 800,
# pvalueCutoff = 0.05,
# verbose = TRUE,
# OrgDb = get(organism),
# # scoreType = "pos",
# pAdjustMethod = "BH",
# )
# p_all_exp <- ridgeplot(all_gse_exp) + labs(x = "enrichment distribution") +
# ggtitle("Top 5% EXP Attributions GSE",
# subtitle = "Cell-line EFM192A (Breast Adenocarcinoma) + Ibrutinib\nTarget: 0.66, Predicted: 0.51")
all_gse_exp <- enrichDAVID(
gene = all_exp_variables,
idType = "SYMBOL",
universe =
ont = "ALL",
nPerm = 10000,
minGSSize = 3,
maxGSSize = 800,
pvalueCutoff = 0.05,
verbose = TRUE,
# OrgDb = get(organism),
# scoreType = "pos",
pAdjustMethod = "BH",
)
p_all_exp <- ridgeplot(all_gse_exp) + labs(x = "enrichment distribution") +
ggtitle("Top 5% EXP Attributions GSE",
subtitle = "Cell-line EFM192A (Breast Adenocarcinoma) + Ibrutinib\nTarget: 0.66, Predicted: 0.51")
### BTK in EFM192A AAC vs EXP/CNV ====
MUT <- fread("Data/DRP_Training_Data/DepMap_21Q2_Mutations_by_Cell.csv")
CNV <- fread("Data/DRP_Training_Data/DepMap_21Q2_CopyNumber.csv")
EXP <- fread("Data/DRP_Training_Data/DepMap_21Q2_Expression.csv")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "BTK")
# Expression of BTK is in the 46th percentile (among all cell lines)
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "EFM192A", variable_name = "BTK")
# Expression of BTK is in the 46th percentile (among all cell lines)
get_expression_in_cell_line(data_type = "MUT", cell_line_name = "EFM192A", variable_name = "BTK")
drug_name <- "Ibrutinib"
gene_name <- "BTK"
cell_line_name <- "EFM192A"
# Find expression of BTK in all cell lines that ibrutinib was tested in, plot it against AAC
cur_cell_aac <- unique(ctrp[cpd_name == drug_name][, c("ccl_name", "area_above_curve")])
cur_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
gene_name), with = F]
exp_aac_subset <- merge(cur_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(exp_aac_subset)[2] <- "value"
# Find copy number of BTK in all cell lines that ibrutinib was tested in, plot it against AAC
cur_cnv_subset <- CNV[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
gene_name), with = F]
cnv_aac_subset <- merge(cur_cnv_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(cnv_aac_subset)[2] <- "value"
require(ggplot2)
require(patchwork)
p_exp_aac <- ggplot(data = exp_aac_subset) +
geom_point(aes(x = area_above_curve, y = value)) +
xlab("Area Above Curve") +
ylab("Gene Expression") +
annotate(geom = "point",
x = exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve,
y = exp_aac_subset[stripped_cell_line_name == cell_line_name]$value,
colour = "orange", size = 1) +
annotate(
geom = "curve",
x = 0.6, y = 2,
xend = exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve - 0.005,
yend = exp_aac_subset[stripped_cell_line_name == cell_line_name]$value + 0.2,
curvature = -.3, arrow = arrow(length = unit(2, "mm"))
) +
annotate(geom = "text", x = 0.6, y = 2.3, label = "EFM192A", size = 6,
) +
theme(text = element_text(size = 14, face = "bold"))
p_cnv_aac <- ggplot(data = cnv_aac_subset) +
geom_point(aes(x = area_above_curve, y = value)) +
xlab("Area Above Curve") +
ylab("Copy Number") +
annotate(geom = "point",
x = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve,
y = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$value,
colour = "orange", size = 1) +
annotate(
geom = "curve",
x = 0.6, y = 1.35,
xend = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve - 0.005,
yend = cnv_aac_subset[stripped_cell_line_name == cell_line_name]$value + 0.05,
curvature = -.3, arrow = arrow(length = unit(2, "mm"))
) +
annotate(geom = "text", x = 0.6, y = 1.4, label = "EFM192A", size = 6,
) +
theme(text = element_text(size = 14, face = "bold"))
p_exp_aac + p_cnv_aac
ggsave("Plots/Interpretation/BTK_EXP_CNV_vs_AAC.pdf")
# Top positive attributed variables:
head(ibrutinib_te617t_attrs)
# FGF10
# Paper: FGF10/FGFR2 signal induces cell migration and invasion in pancreatic cancer (2008)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "FGF10")
# ~ 99th percentile
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "TE617T", variable_name = "FGF10")
# 24th percentile
# UTS2B, potent vasoconstrictor...
# Paper: Protein expression of urotensin II, urotensin-related peptide and their receptor in the lungs of patients
# with lymphangioleiomyomatosis (2010)
# ...Urotensin II (UII) and urotensin-related peptide (URP) are vasoactive neuropeptides with wide ranges of action in
# the normal mammalian lung, including the control of smooth muscle cell proliferation...
# Overexpressed in a bunch of cancers: https://www.proteinatlas.org/ENSG00000188958-UTS2B/pathology
# Paper: Integrated Genomic Analysis of Hu€rthle Cell Cancer Reveals Oncogenic Drivers, Recurrent Mitochondrial Mutations,
# and Unique Chromosomal Landscapes (2018) ...overexpressed in thyroid cancer...
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "UTS2B")
# ~ 99th percentile
# BMP3
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "BMP3")
# ~ 100th percentile
# Paper: Bone morphogenic protein 3 inactivation is an early and frequent event in colorectal cancer development (2008)
# Overexpressed in few cancer types: https://www.proteinatlas.org/ENSG00000152785-BMP3/pathology
# Top negative attributed variables
tail(ibrutinib_te617t_attrs)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "SYPL2")
# 100th percentile, overexpressed
### MAGEA8, DERL1, FAM91A1 in EFM192A AAC vs EXP/CNV ====
# MUT <- fread("Data/DRP_Training_Data/DepMap_21Q2_Mutations_by_Cell.csv")
# CNV <- fread("Data/DRP_Training_Data/DepMap_21Q2_CopyNumber.csv")
EXP <- fread("Data/DRP_Training_Data/DepMap_21Q2_Expression.csv")
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
head(ibrutinib_EFM192A_attrs, n = 10)
# MAGEA8, CRLF3, FAM91A1, DERL1, THAP3, PSMA1, TOB1-AS1 (CNV), FAM90A26, CDC27, DNTTIP1
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "MAGEA8")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "DERL1")
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "FAM91A1")
# Find expression of each gene in all cell lines that ibrutinib was tested in, plot it against AAC
drug_name <- "Ibrutinib"
cell_line_name <- "EFM192A"
cur_cell_aac <- unique(ctrp[cpd_name == drug_name][, c("ccl_name", "area_above_curve", "primary_disease")])
# MAGEA8
magea8_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
"MAGEA8"), with = F]
magea8_exp_aac_subset <- merge(magea8_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(magea8_exp_aac_subset)[2] <- "value"
# CRLF3
crlf3_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
"CRLF3"), with = F]
crlf3_exp_aac_subset <- merge(crlf3_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(crlf3_exp_aac_subset)[2] <- "value"
#"DERL1"
derl1_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
"DERL1"), with = F]
derl1_exp_aac_subset <- merge(derl1_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(derl1_exp_aac_subset)[2] <- "value"
#"FAM91A1"
fam91a1_exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
"FAM91A1"), with = F]
fam91a1_exp_aac_subset <- merge(fam91a1_exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(fam91a1_exp_aac_subset)[2] <- "value"
# # Find copy number of BTK in all cell lines that ibrutinib was tested in, plot it against AAC
# cur_cnv_subset <- CNV[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
# gene_name), with = F]
# cnv_aac_subset <- merge(cur_cnv_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
# colnames(cnv_aac_subset)[2] <- "value"
plot_relative_expression <- function(data, xlab, ylab, annotate_label, cell_line_name,
curvature=-0.3,
curve_x_plus = 0.1, curve_y_plus = 0.1,
curve_xend_plus = 0.01, curve_yend_plus = 0,
text_x_plus = 0.1, text_y_plus = 0.1,
xlim_min = 0, xlim_max = 1) {
p_exp_aac <- ggplot(data = data, aes(x= area_above_curve, y = value)) +
geom_point(aes(x = area_above_curve, y = value, color = primary_disease)) +
xlab(xlab) +
ylab(ylab) +
annotate(geom = "point",
x = data[stripped_cell_line_name == cell_line_name]$area_above_curve,
y = data[stripped_cell_line_name == cell_line_name]$value,
colour = "orange", size = 1) +
annotate(
geom = "curve",
x = data[stripped_cell_line_name == cell_line_name]$area_above_curve + curve_x_plus,
y = data[stripped_cell_line_name == cell_line_name]$value + curve_y_plus,
xend = data[stripped_cell_line_name == cell_line_name]$area_above_curve + curve_xend_plus,
yend = data[stripped_cell_line_name == cell_line_name]$value + curve_yend_plus,
curvature = curvature, arrow = arrow(length = unit(2, "mm"))
) +
annotate(geom = "text",
x = data[stripped_cell_line_name == cell_line_name]$area_above_curve + text_x_plus,
y = data[stripped_cell_line_name == cell_line_name]$value + text_y_plus,
label = cell_line_name, size = 4,
) +
theme(text = element_text(size = 14, face = "bold"), legend.position = "top") +
scale_color_discrete(name = "Primary Disease") +
scale_x_continuous(breaks = seq(xlim_min, xlim_max, by = 0.1)) +
ylim(0, 10) +
# xlim(xlim_min, xlim_max) +
geom_smooth(aes(color = primary_disease),
data = subset(data, primary_disease == "Breast Cancer"),
method='lm')
return(p_exp_aac)
}
require(ggplot2)
require(patchwork)
magea8_exp_aac_subset[!(primary_disease %in% c("Breast Cancer", "Leukemia", "Lymphoma")), primary_disease := "Other"]
magea8_exp_aac_subset <- magea8_exp_aac_subset[primary_disease != "Other"]
p_magea8_exp_aac <- plot_relative_expression(data = magea8_exp_aac_subset,
xlab = "Area Above Curve", ylab = "MAGEA8 Expression",
cell_line_name = "EFM192A")
# p_magea8_exp_aac <- ggplot(data = magea8_exp_aac_subset, aes(x= area_above_curve, y = value)) +
# geom_point(aes(x = area_above_curve, y = value, color = primary_disease)) +
# xlab("Area Above Curve") +
# ylab("MAGEA8 Expression") +
# annotate(geom = "point",
# x = magea8_exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve,
# y = magea8_exp_aac_subset[stripped_cell_line_name == cell_line_name]$value,
# colour = "orange", size = 1) +
# annotate(
# geom = "curve",
# x = 0.6, y = 4.5,
# xend = magea8_exp_aac_subset[stripped_cell_line_name == cell_line_name]$area_above_curve - 0.01,
# yend = magea8_exp_aac_subset[stripped_cell_line_name == cell_line_name]$value,
# curvature = -.3, arrow = arrow(length = unit(2, "mm"))
# ) +
# annotate(geom = "text", x = 0.6, y = 4.4, label = "EFM192A", size = 4,
# ) +
# theme(text = element_text(size = 14, face = "bold"), legend.position = "top") +
# scale_color_discrete(name = "Primary Disease") +
# ylim(0, 10) +
# xlim(0, 1) +
# geom_smooth(aes(color = primary_disease),
# data = subset(magea8_exp_aac_subset, primary_disease == "Breast Cancer"),
# method='lm')
derl1_exp_aac_subset[!(primary_disease %in% c("Breast Cancer", "Leukemia", "Lymphoma")), primary_disease := "Other"]
derl1_exp_aac_subset <- derl1_exp_aac_subset[primary_disease != "Other"]
p_derl1_exp_aac <- plot_relative_expression(data = derl1_exp_aac_subset,
xlab = "Area Above Curve", ylab = "DERL1 Expression",
cell_line_name = "EFM192A",
curvature = -0.3,
curve_x_plus = -0.02, curve_y_plus = 0.5,
curve_xend_plus = -0.001, curve_yend_plus = 0.1,
text_x_plus = -0.075, text_y_plus = 0.5,
xlim_max = 0.75)
library(dplyr)
fitted_models <- derl1_exp_aac_subset %>% group_by(primary_disease) %>% do(model = lm(value ~ area_above_curve, data = .))
fitted_models$model
summary(fitted_models$model[[1]]) # 0.3505 Adjusted R-squared, breast cancer
summary(fitted_models$model[[2]]) # 0.0124 leukemia
summary(fitted_models$model[[3]]) # -0.0267 lymphoma
fam91a1_exp_aac_subset[!(primary_disease %in% c("Breast Cancer", "Leukemia", "Lymphoma")), primary_disease := "Other"]
fam91a1_exp_aac_subset <- fam91a1_exp_aac_subset[primary_disease != "Other"]
p_fam91a1_exp_aac <- plot_relative_expression(data = fam91a1_exp_aac_subset,
xlab = "Area Above Curve", ylab = "FAM91A1 Expression",
cell_line_name = "EFM192A",
curvature = 0.3,
curve_x_plus = -0.05, curve_y_plus = 0.2,
curve_xend_plus = -0.005, curve_yend_plus = 0.01,
text_x_plus = -0.05, text_y_plus = 0.5,
xlim_max = 0.75)
p_derl1_exp_aac + p_fam91a1_exp_aac +
plot_layout(guides = "collect") & theme(legend.position = "top")
ggsave("Plots/Interpretation/DERL1_FAM91A1_EXP_vs_AAC.pdf")
library(dplyr)
fitted_models <- fam91a1_exp_aac_subset %>% group_by(primary_disease) %>% do(model = lm(value ~ area_above_curve, data = .))
fitted_models$model
summary(fitted_models$model[[1]]) # 0.1336 Adjusted R-squared
summary(fitted_models$model[[2]]) # -0.02229
summary(fitted_models$model[[3]]) # 0.005535
p_magea8_exp_aac + p_derl1_exp_aac + p_fam91a1_exp_aac +
plot_layout(guides = "collect") & theme(legend.position = "top")
ggsave("Plots/Interpretation/MAGEA8_DERL1_FAM91A1_EXP_vs_AAC.pdf",
width = 16, height = 7)
top_genes <- c("MAGEA8", "CRLF3", "FAM91A1", "DERL1", "THAP3", "PSMA1", "FAM90A26", "CDC27", "DNTTIP1")
top_cnv <- "TOB1-AS1"
all_plots <- vector(mode = "list", length = length(top_genes))
# MAGEA8, CRLF3, FAM91A1, DERL1, THAP3, PSMA1, TOB1-AS1 (CNV), FAM90A26, CDC27, DNTTIP1
dir.create("Plots/Interpretation/EFM192A/")
for (i in 1:length(top_genes)) {
cur_gene <- top_genes[i]
exp_subset <- EXP[stripped_cell_line_name %in% unique(cur_cell_aac$ccl_name)][, c("stripped_cell_line_name",
cur_gene), with = F]
cur_exp_aac_subset <- merge(exp_subset, cur_cell_aac, by.x = "stripped_cell_line_name", by.y = "ccl_name")
colnames(cur_exp_aac_subset)[2] <- "value"
cur_exp_aac_subset <- cur_exp_aac_subset[primary_disease %in% c("Breast Cancer", "Leukemia", "Lymphoma")]
# cur_exp_aac_subset <- cur_exp_aac_subset[primary_disease != "Other"]
p_exp_aac <- plot_relative_expression(data = cur_exp_aac_subset,
xlab = "Area Above Curve", ylab = paste(cur_gene, "Expression"),
cell_line_name = "EFM192A")
all_plots[[i]] <- p_exp_aac
ggsave(filename = paste0("Plots/Interpretation/EFM192A/", cur_gene, "_EXP_vs_AAC.pdf"),
plot = p_exp_aac)
}
all_plots[[5]]
# SVM and Biomarker Proof ====
# Use top genes as features for an SVM, classifying responsive (AAC >= 0.5) and non-responsive
# (AAC < 0.5) cell lines to ibrutinib
require(data.table)
EXP <- fread("Data/DRP_Training_Data/DepMap_21Q2_Expression.csv")
ctrp <- fread("Data/DRP_Training_Data/CTRP_AAC_SMILES.txt")
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
# Take top 100 exp features
head(ibrutinib_EFM192A_attrs, n = 100)
top_100 <- head(ibrutinib_EFM192A_attrs[variable %like% "exp_"], n = 100)$variable
top_100 <- gsub("exp_", "", top_100)
top_genes <- c("MAGEA8", "CRLF3", "FAM91A1", "DERL1", "THAP3", "PSMA1", "FAM90A26", "CDC27", "DNTTIP1")
for (gene in top_genes) {
print(get_expression_in_cell_line(data_type = "EXP", cell_line_name = "EFM192A", variable_name = "MAGEA8"))
}
# Subset gene expression data to only the 9 genes
exp_sub <- EXP[, c("stripped_cell_line_name", top_genes), with = F]
# Add AAC information
ctrp_sub <- ctrp[cpd_name == "Ibrutinib", c("ccl_name", "primary_disease", "area_above_curve")]
cur_data <- merge(exp_sub, ctrp_sub, by.x = "stripped_cell_line_name", by.y = "ccl_name")
cur_data[, responsive := ifelse(area_above_curve >= 0.5, 1, 0)]
# require(devtools)
# install_version("RSofia", version = "1.1", repos = "http://cran.us.r-project.org")
# require("RSofia")
require(caTools)
require(e1071)
set.seed(42)
sum(cur_data$responsive)
cur_data$responsive <- as.factor(cur_data$responsive)
sample = sample.split(cur_data$responsive, SplitRatio = .5) # stratifies data too
train <- cur_data[(sample)]
test <- cur_data[(!sample)]
# train = subset(cur_data, sample == TRUE)
# test = subset(cur_data, sample == FALSE)
colnames(train[, c(top_genes, "responsive"), with = F])
classifier = svm(formula = responsive ~ .,
data = train[, c(top_genes, "responsive"), with = F],
type = 'C-classification',
kernel = 'linear',
scale = F
)
summary(classifier)
print(classifier)
pred <- predict(classifier, test[, 2:101])
table(pred, test$responsive) # terrible...
# Linear regression model
cnv_exp_integ <- get_all_interpret(data_types = "gnndrug_cnv_exp", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs <- get_top_attrs(cnv_exp_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
# Take top 100 exp features
head(ibrutinib_EFM192A_attrs, n = 100)
top_100 <- head(ibrutinib_EFM192A_attrs[variable %like% "exp_"], n = 100)$variable
top_100 <- gsub("exp_", "", top_100)
top_genes <- c("MAGEA8", "CRLF3", "FAM91A1", "DERL1", "THAP3", "PSMA1", "FAM90A26", "CDC27", "DNTTIP1")
regressor <- lm(area_above_curve ~ ., data = train[, c(top_genes, "area_above_curve"), with = F])
summary(regressor)
regressor <- lm(area_above_curve ~ ., data = train[, c(top_100, "area_above_curve"), with = F])
summary(regressor)
exp_mirna_integ <- get_all_interpret(data_types = "gnndrug_exp_mirna", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs_exp_mirna <- get_top_attrs(exp_mirna_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
# Take top 100 mirna features
head(ibrutinib_EFM192A_attrs_exp_mirna, n = 100)
top_100 <- head(ibrutinib_EFM192A_attrs_exp_mirna[variable %like% "-miR-"], n = 100)$variable
top_100 <- gsub("n_", "", top_100)
# top_genes <- c("MAGEA8", "CRLF3", "FAM91A1", "DERL1", "THAP3", "PSMA1", "FAM90A26", "CDC27", "DNTTIP1")
# Subset gene expression data to only the 9 genes
MIRNA <- fread(path_dict[["MIRNA"]])
mirna_sub <- MIRNA[, c("stripped_cell_line_name", top_100), with = F]
# Add AAC information
ctrp_sub <- ctrp[cpd_name == "Ibrutinib", c("ccl_name", "primary_disease", "area_above_curve")]
cur_data <- merge(mirna_sub, ctrp_sub, by.x = "stripped_cell_line_name", by.y = "ccl_name")
cur_data[, responsive := ifelse(area_above_curve >= 0.5, 1, 0)]
sample = sample.split(cur_data$responsive, SplitRatio = .5) # stratifies data too
train <- cur_data[(sample)]
test <- cur_data[(!sample)]
regressor <- lm(area_above_curve ~ .,
data = cur_data[, c(top_100, "area_above_curve"), with = F], )
summary(regressor) # Adjusted R-squared: 0.0747
# Take top 100 mirna features
metab_rppa_integ <- get_all_interpret(data_types = "gnndrug_metab_rppa", split_type = "CELL_LINE")
ibrutinib_EFM192A_attrs_metab_rppa <- get_top_attrs(metab_rppa_integ, compound_name = "Ibrutinib", cell_line_name = "EFM192A")
head(ibrutinib_EFM192A_attrs_exp_mirna, n = 100)
top_100 <- head(ibrutinib_EFM192A_attrs_exp_mirna[variable %like% "-miR-"], n = 100)$variable
top_100 <- gsub("n_", "", top_100)
# top_genes <- c("MAGEA8", "CRLF3", "FAM91A1", "DERL1", "THAP3", "PSMA1", "FAM90A26", "CDC27", "DNTTIP1")
# Subset gene expression data to only the 9 genes
MIRNA <- fread(path_dict[["MIRNA"]])
mirna_sub <- MIRNA[, c("stripped_cell_line_name", top_100), with = F]
# Add AAC information
ctrp_sub <- ctrp[cpd_name == "Ibrutinib", c("ccl_name", "primary_disease", "area_above_curve")]
cur_data <- merge(mirna_sub, ctrp_sub, by.x = "stripped_cell_line_name", by.y = "ccl_name")
cur_data[, responsive := ifelse(area_above_curve >= 0.5, 1, 0)]
sample = sample.split(cur_data$responsive, SplitRatio = .5) # stratifies data too
train <- cur_data[(sample)]
test <- cur_data[(!sample)]
regressor <- lm(area_above_curve ~ .,
data = cur_data[, c(top_100, "area_above_curve"), with = F], )
summary(regressor)
# https://cran.r-project.org/src/contrib/Archive/RSofia/
# Top positive attributed variables:
head(ibrutinib_te617t_attrs)
# FGF10
# Paper: FGF10/FGFR2 signal induces cell migration and invasion in pancreatic cancer (2008)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "FGF10")
# ~ 99th percentile
get_expression_in_cell_line(data_type = "CNV", cell_line_name = "TE617T", variable_name = "FGF10")
# 24th percentile
# UTS2B, potent vasoconstrictor...
# Paper: Protein expression of urotensin II, urotensin-related peptide and their receptor in the lungs of patients
# with lymphangioleiomyomatosis (2010)
# ...Urotensin II (UII) and urotensin-related peptide (URP) are vasoactive neuropeptides with wide ranges of action in
# the normal mammalian lung, including the control of smooth muscle cell proliferation...
# Overexpressed in a bunch of cancers: https://www.proteinatlas.org/ENSG00000188958-UTS2B/pathology
# Paper: Integrated Genomic Analysis of Hu€rthle Cell Cancer Reveals Oncogenic Drivers, Recurrent Mitochondrial Mutations,
# and Unique Chromosomal Landscapes (2018) ...overexpressed in thyroid cancer...
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "UTS2B")
# ~ 99th percentile
# BMP3
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "BMP3")
# ~ 100th percentile
# Paper: Bone morphogenic protein 3 inactivation is an early and frequent event in colorectal cancer development (2008)
# Overexpressed in few cancer types: https://www.proteinatlas.org/ENSG00000152785-BMP3/pathology
# Top negative attributed variables
tail(ibrutinib_te617t_attrs)
get_expression_in_cell_line(data_type = "EXP", cell_line_name = "TE617T", variable_name = "SYPL2")
# 100th percentile, overexpressed
# Lapatinib ----
final_data[cpd_name == "Lapatinib"]
# Assigned for Breast Cancer, Good for Gastric Cancer (gastric adenocarcinoma)
# Reached Phase III clinical trials in combination with chemotherapy
# CNV_RPPA had lowest RMSE
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# ==== Sorafenib ====
final_data[cpd_name == "Sorafenib"]
# Assigned for Kidney Cancer and Thyroid Cancer, Good for Leukemia (AML)
# Sorafenib or placebo in patients with newly diagnosed acute myeloid leukaemia: long-term follow-up of the
# randomized controlled SORAML trial
# PROT_* data types are good for DRP
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# ==== Sunitinib ====
final_data[cpd_name == "Sunitinib"]
# Assigned for Pancreatic Cancer, Good for Leukemia (AML) and Malignant Rhabdoid Tumor (kidneys)
# A phase I/II study of sunitinib and intensive chemotherapy in patients over 60 years of age with
# acute myeloid leukaemia and activating FLT3 mutations (2015)
# PROT_* data types are good for DRP
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# ==== Temsirolimus ====
final_data[cpd_name == "Temsirolimus"]
print(final_data[cpd_name == "Temsirolimus"], nrows = 500)
# Assigned for Kidney Cancer, Good for Multiple Myeloma, Lymphoma (Non-Hodgkin), Leukemia (CLL),
# Breast Cancer (Breast Ductal), Ovarian Cancer (Adenocarcinoma),
# Lung Cancer (Mesothelioma), Endometrial/Uterine Cancer
# mTOR inhibitor, might explain efficacy in multiple cancers
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# ==== Trametinib ====
final_data[cpd_name == "Trametinib"]
# Assigned for Skin Cancer, Good for Leukemia (AML)
# Used in patients with lymphatic edema that have a specific ARAF gene variation
# Trametinib inhibits RAS-mutant MLL-rearranged acute lymphoblastic leukemia at specific niche sites
# and reduces ERK phosphorylation in vivo (2018)
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# ==== Vandetanib ====
final_data[cpd_name == "Vandetanib"]
# Assigned for Thyroid Cancer, Good for Lung Cancer (NSCLC), Leukemia (AML), Head and Neck Cancer (Upper aerodigestiv squamous)
# AstraZeneca tried Vandetanib for NSCLC but didn't see improved effect alongside chemotherapy
mut_cnv_integ <- get_all_interpret(data_types = "gnndrug_mut_cnv", split_type = "CELL_LINE")
bosutinib_pc14_attrs <- get_top_attrs(mut_cnv_integ, compound_name = "Bosutinib", cell_line_name = "PC14")
# Top positive attributed variables:
head(bosutinib_pc14_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")
# Top negative attributed variables
tail(cabozantinib_molm13_attrs)
get_expression_in_cell_line(data_type = "RPPA", cell_line_name = "MOLM13", variable_name = "B-Raf_Caution")