This is an R Markdown
Notebook. When you execute code within the notebook, the results appear
beneath the code.
Try executing this chunk by clicking the Run button within
the chunk or by placing your cursor inside it and pressing
Cmd+Shift+Enter.
library(reticulate)
library('caret')
library(spatialLIBD)
library(MLmetrics)
py_install("pandas")
py_install("numpy")
py_install("scikit-learn")
py_install("matplotlib")
sk <- import("sklearn")
pd <- import("pandas")
np <- import("numpy")
plt <- import("matplotlib")
sk_model_selection <- import("sklearn.model_selection")
sk_linear_model <- import("sklearn.linear_model")
sk_preprocessing <- import("sklearn.preprocessing")
pyplot <- import("matplotlib.pyplot")
Logistic Regression Classification from RGBG/HSL Image Spaces
sum(y[y > 0])
[1] 3590
split_data <- function(spe, g_name, logcounts, color_arr, isBinary) {
gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
y <- logcounts[,gene_id]
if (isBinary) {
y[y > 0] <- 1
}
x <- color_arr
temp <- sk_model_selection$train_test_split(x, y, test_size=0.1)
x_train <- temp[1]
x_test <- temp[2]
y_train <- temp[3]
y_test <- temp[4]
list(x_train, x_test, y_train, y_test)
}
evaluate_model <- function(y_predict, y_true, g_name) {
precision <- Precision(y_true, y_predict)
f1 <- F1_Score(y_true, y_predict)
recall <- Recall(y_true, y_predict)
accuracy <- Accuracy(y_predict, y_true)
print(cat(paste(g_name, paste("Accuracy: ", accuracy), paste('Recall: ', recall), paste('Precision: ', precision), paste('F1: ', f1), sep="\n")))
}
train <- function(spe, g_name, logcounts, color_arr, isBinary) {
split_data <- split_data(spe, g_name, logcounts, color_arr, isBinary)
x_train <- split_data[1][[1]][[1]]
x_test <- split_data[2][[1]][[1]]
y_train <- split_data[3][[1]][[1]]
y_test <- np$ravel(split_data[4][[1]][[1]])
scaler <- sk_preprocessing$StandardScaler()
features_standardized <- scaler$fit_transform(x_train)
test_standardized <- scaler$transform(x_test)
# Logistic regression for binary classification
clf <- sk_linear_model$LogisticRegression(random_state=as.integer(100))
clf$fit(features_standardized, np$ravel(y_train))
y_predict <- clf$predict(test_standardized)
# Metrics for binary classification
evaluate_model(y_predict, y_test, g_name)
# Find the top 10 most entries most likely to be + gene expression value and observe their ground truth
y_predict_proba <- clf$predict_proba(test_standardized)
temp <- order(y_predict_proba[,2],decreasing=T)[1:10]
print(paste("Accuracy of top 10 barcodes likely to have positive gene expression: ", sum(y_test[temp] == 1) / 10))
cat('\n')
}
g_name <- "MOBP"
logcounts <- logcounts_151673_filtered
gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
y <- logcounts[,gene_id]
#y[y > 0] <- 1
x <- data.frame(data[10], data[11])
scaler <- sk_preprocessing$StandardScaler()
features_standardized <- scaler$fit_transform(x)
test_standardized <- scaler$transform(x)
temp_df <- data.frame(test_standardized,y)
ggplot(temp_df, aes(x = temp_df[,'X1'], y=temp_df[,'X2'])) + geom_point(aes(color=temp_df[,'y'])) + theme(aspect.ratio = 1) + ggtitle(paste("Green and Saturation Features For ",g_name))

# Get list of genes with strongest correlations
top_genes <- rownames(top_corr)
for (g_name in c("MOBP", "CD74")) {
train(spe, g_name, logcounts_151673_filtered, data.frame(data[7], data[11]), TRUE)
}
MOBP
Accuracy: 0.67966573816156
Recall: 0.815789473684211
Precision: 0.659574468085106
F1: 0.729411764705882NULL
[1] "Accuracy of top 10 barcodes likely to have positive gene expression: 0.9"
CD74
Accuracy: 0.573816155988858
Recall: 0.485714285714286
Precision: 0.574324324324324
F1: 0.526315789473684NULL
[1] "Accuracy of top 10 barcodes likely to have positive gene expression: 0.6"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KbGlicmFyeShyZXRpY3VsYXRlKQpsaWJyYXJ5KCdjYXJldCcpCmxpYnJhcnkoc3BhdGlhbExJQkQpCmxpYnJhcnkoTUxtZXRyaWNzKQoKcHlfaW5zdGFsbCgicGFuZGFzIikKcHlfaW5zdGFsbCgibnVtcHkiKQpweV9pbnN0YWxsKCJzY2lraXQtbGVhcm4iKQpweV9pbnN0YWxsKCJtYXRwbG90bGliIikKCnNrIDwtIGltcG9ydCgic2tsZWFybiIpCnBkIDwtIGltcG9ydCgicGFuZGFzIikKbnAgPC0gaW1wb3J0KCJudW1weSIpCnBsdCA8LSBpbXBvcnQoIm1hdHBsb3RsaWIiKQpza19tb2RlbF9zZWxlY3Rpb24gPC0gaW1wb3J0KCJza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiIpCnNrX2xpbmVhcl9tb2RlbCA8LSBpbXBvcnQoInNrbGVhcm4ubGluZWFyX21vZGVsIikKc2tfcHJlcHJvY2Vzc2luZyA8LSBpbXBvcnQoInNrbGVhcm4ucHJlcHJvY2Vzc2luZyIpCnB5cGxvdCA8LSBpbXBvcnQoIm1hdHBsb3RsaWIucHlwbG90IikKYGBgCgojIyBMb2dpc3RpYyBSZWdyZXNzaW9uIENsYXNzaWZpY2F0aW9uIGZyb20gUkdCRy9IU0wgSW1hZ2UgU3BhY2VzCmBgYHtyfQp5IDwtIGxvZ2NvdW50c18xNTE2NzNfZmlsdGVyZWQKc3VtKHlbeSA+IDBdKQp5WyEoeSA+IDApXSA8LSAxCmBgYAoKCmBgYHtyfQpzcGxpdF9kYXRhIDwtIGZ1bmN0aW9uKHNwZSwgZ19uYW1lLCBsb2djb3VudHMsIGNvbG9yX2FyciwgaXNCaW5hcnkpIHsKICBnZW5lX2lkIDwtIHJvd0RhdGEoc3BlKVtyb3dEYXRhKHNwZSkkZ2VuZV9uYW1lID09IGdfbmFtZSxdJGdlbmVfaWQKICB5IDwtIGxvZ2NvdW50c1ssZ2VuZV9pZF0KICBpZiAoaXNCaW5hcnkpIHsKICAgIHlbeSA+IDBdIDwtIDEKICB9CiAgeCA8LSBjb2xvcl9hcnIKICAKICB0ZW1wIDwtIHNrX21vZGVsX3NlbGVjdGlvbiR0cmFpbl90ZXN0X3NwbGl0KHgsIHksIHRlc3Rfc2l6ZT0wLjEpCiAgeF90cmFpbiA8LSB0ZW1wWzFdCiAgeF90ZXN0IDwtIHRlbXBbMl0KICB5X3RyYWluIDwtIHRlbXBbM10KICB5X3Rlc3QgPC0gdGVtcFs0XQogIGxpc3QoeF90cmFpbiwgeF90ZXN0LCB5X3RyYWluLCB5X3Rlc3QpCn0KCmV2YWx1YXRlX21vZGVsIDwtIGZ1bmN0aW9uKHlfcHJlZGljdCwgeV90cnVlLCBnX25hbWUpIHsKICBwcmVjaXNpb24gPC0gUHJlY2lzaW9uKHlfdHJ1ZSwgeV9wcmVkaWN0KQogIGYxIDwtIEYxX1Njb3JlKHlfdHJ1ZSwgeV9wcmVkaWN0KQogIHJlY2FsbCA8LSBSZWNhbGwoeV90cnVlLCB5X3ByZWRpY3QpCiAgYWNjdXJhY3kgPC0gQWNjdXJhY3koeV9wcmVkaWN0LCB5X3RydWUpCiAgcHJpbnQoY2F0KHBhc3RlKGdfbmFtZSwgcGFzdGUoIkFjY3VyYWN5OiAiLCBhY2N1cmFjeSksIHBhc3RlKCdSZWNhbGw6ICcsIHJlY2FsbCksIHBhc3RlKCdQcmVjaXNpb246ICcsIHByZWNpc2lvbiksIHBhc3RlKCdGMTogJywgZjEpLCBzZXA9IlxuIikpKQoKfQoKdHJhaW4gPC0gZnVuY3Rpb24oc3BlLCBnX25hbWUsIGxvZ2NvdW50cywgY29sb3JfYXJyLCBpc0JpbmFyeSkgewogIHNwbGl0X2RhdGEgPC0gc3BsaXRfZGF0YShzcGUsIGdfbmFtZSwgbG9nY291bnRzLCBjb2xvcl9hcnIsIGlzQmluYXJ5KQoKICB4X3RyYWluIDwtIHNwbGl0X2RhdGFbMV1bWzFdXVtbMV1dCiAgeF90ZXN0IDwtIHNwbGl0X2RhdGFbMl1bWzFdXVtbMV1dCiAgeV90cmFpbiA8LSBzcGxpdF9kYXRhWzNdW1sxXV1bWzFdXQogIHlfdGVzdCA8LSBucCRyYXZlbChzcGxpdF9kYXRhWzRdW1sxXV1bWzFdXSkKICAKICBzY2FsZXIgPC0gc2tfcHJlcHJvY2Vzc2luZyRTdGFuZGFyZFNjYWxlcigpCiAgZmVhdHVyZXNfc3RhbmRhcmRpemVkIDwtIHNjYWxlciRmaXRfdHJhbnNmb3JtKHhfdHJhaW4pCiAgdGVzdF9zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJHRyYW5zZm9ybSh4X3Rlc3QpCiAgCiAgIyBMb2dpc3RpYyByZWdyZXNzaW9uIGZvciBiaW5hcnkgY2xhc3NpZmljYXRpb24KICBjbGYgPC0gc2tfbGluZWFyX21vZGVsJExvZ2lzdGljUmVncmVzc2lvbihyYW5kb21fc3RhdGU9YXMuaW50ZWdlcigxMDApKQogIGNsZiRmaXQoZmVhdHVyZXNfc3RhbmRhcmRpemVkLCBucCRyYXZlbCh5X3RyYWluKSkKICB5X3ByZWRpY3QgPC0gY2xmJHByZWRpY3QodGVzdF9zdGFuZGFyZGl6ZWQpCiAgCiAgIyBNZXRyaWNzIGZvciBiaW5hcnkgY2xhc3NpZmljYXRpb24KICBldmFsdWF0ZV9tb2RlbCh5X3ByZWRpY3QsIHlfdGVzdCwgZ19uYW1lKQogIAogICMgRmluZCB0aGUgdG9wIDEwIG1vc3QgZW50cmllcyBtb3N0IGxpa2VseSB0byBiZSArIGdlbmUgZXhwcmVzc2lvbiB2YWx1ZSBhbmQgb2JzZXJ2ZSB0aGVpciBncm91bmQgdHJ1dGgKICB5X3ByZWRpY3RfcHJvYmEgPC0gY2xmJHByZWRpY3RfcHJvYmEodGVzdF9zdGFuZGFyZGl6ZWQpCiAgdGVtcCA8LSBvcmRlcih5X3ByZWRpY3RfcHJvYmFbLDJdLGRlY3JlYXNpbmc9VClbMToxMF0KICBwcmludChwYXN0ZSgiQWNjdXJhY3kgb2YgdG9wIDEwIGJhcmNvZGVzIGxpa2VseSB0byBoYXZlIHBvc2l0aXZlIGdlbmUgZXhwcmVzc2lvbjogIiwgc3VtKHlfdGVzdFt0ZW1wXSA9PSAxKSAvIDEwKSkKICBjYXQoJ1xuJykKfQpgYGAKCmBgYHtyfQpnX25hbWUgPC0gIk1PQlAiCmxvZ2NvdW50cyA8LSBsb2djb3VudHNfMTUxNjczX2ZpbHRlcmVkCmdlbmVfaWQgPC0gcm93RGF0YShzcGUpW3Jvd0RhdGEoc3BlKSRnZW5lX25hbWUgPT0gZ19uYW1lLF0kZ2VuZV9pZAp5IDwtIGxvZ2NvdW50c1ssZ2VuZV9pZF0KI3lbeSA+IDBdIDwtIDEKeCA8LSBkYXRhLmZyYW1lKGRhdGFbMTBdLCBkYXRhWzExXSkKc2NhbGVyIDwtIHNrX3ByZXByb2Nlc3NpbmckU3RhbmRhcmRTY2FsZXIoKQpmZWF0dXJlc19zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJGZpdF90cmFuc2Zvcm0oeCkKdGVzdF9zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJHRyYW5zZm9ybSh4KQp0ZW1wX2RmIDwtIGRhdGEuZnJhbWUodGVzdF9zdGFuZGFyZGl6ZWQseSkKZ2dwbG90KHRlbXBfZGYsIGFlcyh4ID0gdGVtcF9kZlssJ1gxJ10sIHk9dGVtcF9kZlssJ1gyJ10pKSArIGdlb21fcG9pbnQoYWVzKGNvbG9yPXRlbXBfZGZbLCd5J10pKSArIHRoZW1lKGFzcGVjdC5yYXRpbyA9IDEpICsgZ2d0aXRsZShwYXN0ZSgiR3JlZW4gYW5kIFNhdHVyYXRpb24gRmVhdHVyZXMgRm9yICIsZ19uYW1lKSkKYGBgCgpgYGB7cn0KIyBHZXQgbGlzdCBvZiBnZW5lcyB3aXRoIHN0cm9uZ2VzdCBjb3JyZWxhdGlvbnMKdG9wX2dlbmVzIDwtIHJvd25hbWVzKHRvcF9jb3JyKQoKZm9yIChnX25hbWUgaW4gYygiTU9CUCIsICJDRDc0IikpIHsKICB0cmFpbihzcGUsIGdfbmFtZSwgbG9nY291bnRzXzE1MTY3M19maWx0ZXJlZCwgZGF0YS5mcmFtZShkYXRhWzddLCBkYXRhWzExXSksIFRSVUUpCn0KYGBgCmBgYHtyfQoKYGBgCgoK