This is an R Markdown Notebook. When you execute code within the notebook, the results appear beneath the code.

Try executing this chunk by clicking the Run button within the chunk or by placing your cursor inside it and pressing Cmd+Shift+Enter.

library(reticulate)
library('caret')
library(spatialLIBD)
library(MLmetrics)

py_install("pandas")
py_install("numpy")
py_install("scikit-learn")
py_install("matplotlib")

sk <- import("sklearn")
pd <- import("pandas")
np <- import("numpy")
plt <- import("matplotlib")
sk_model_selection <- import("sklearn.model_selection")
sk_linear_model <- import("sklearn.linear_model")
sk_preprocessing <- import("sklearn.preprocessing")
pyplot <- import("matplotlib.pyplot")

Logistic Regression Classification from RGBG/HSL Image Spaces

sum(y[y > 0])
[1] 3590
split_data <- function(spe, g_name, logcounts, color_arr, isBinary) {
  gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
  y <- logcounts[,gene_id]
  if (isBinary) {
    y[y > 0] <- 1
  }
  x <- color_arr
  
  temp <- sk_model_selection$train_test_split(x, y, test_size=0.1)
  x_train <- temp[1]
  x_test <- temp[2]
  y_train <- temp[3]
  y_test <- temp[4]
  list(x_train, x_test, y_train, y_test)
}

evaluate_model <- function(y_predict, y_true, g_name) {
  precision <- Precision(y_true, y_predict)
  f1 <- F1_Score(y_true, y_predict)
  recall <- Recall(y_true, y_predict)
  accuracy <- Accuracy(y_predict, y_true)
  print(cat(paste(g_name, paste("Accuracy: ", accuracy), paste('Recall: ', recall), paste('Precision: ', precision), paste('F1: ', f1), sep="\n")))

}

train <- function(spe, g_name, logcounts, color_arr, isBinary) {
  split_data <- split_data(spe, g_name, logcounts, color_arr, isBinary)

  x_train <- split_data[1][[1]][[1]]
  x_test <- split_data[2][[1]][[1]]
  y_train <- split_data[3][[1]][[1]]
  y_test <- np$ravel(split_data[4][[1]][[1]])
  
  scaler <- sk_preprocessing$StandardScaler()
  features_standardized <- scaler$fit_transform(x_train)
  test_standardized <- scaler$transform(x_test)
  
  # Logistic regression for binary classification
  clf <- sk_linear_model$LogisticRegression(random_state=as.integer(100))
  clf$fit(features_standardized, np$ravel(y_train))
  y_predict <- clf$predict(test_standardized)
  
  # Metrics for binary classification
  evaluate_model(y_predict, y_test, g_name)
  
  # Find the top 10 most entries most likely to be + gene expression value and observe their ground truth
  y_predict_proba <- clf$predict_proba(test_standardized)
  temp <- order(y_predict_proba[,2],decreasing=T)[1:10]
  print(paste("Accuracy of top 10 barcodes likely to have positive gene expression: ", sum(y_test[temp] == 1) / 10))
  cat('\n')
}
g_name <- "MOBP"
logcounts <- logcounts_151673_filtered
gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
y <- logcounts[,gene_id]
#y[y > 0] <- 1
x <- data.frame(data[10], data[11])
scaler <- sk_preprocessing$StandardScaler()
features_standardized <- scaler$fit_transform(x)
test_standardized <- scaler$transform(x)
temp_df <- data.frame(test_standardized,y)
ggplot(temp_df, aes(x = temp_df[,'X1'], y=temp_df[,'X2'])) + geom_point(aes(color=temp_df[,'y'])) + theme(aspect.ratio = 1) + ggtitle(paste("Green and Saturation Features For ",g_name))

# Get list of genes with strongest correlations
top_genes <- rownames(top_corr)

for (g_name in c("MOBP", "CD74")) {
  train(spe, g_name, logcounts_151673_filtered, data.frame(data[7], data[11]), TRUE)
}
MOBP
Accuracy:  0.67966573816156
Recall:  0.815789473684211
Precision:  0.659574468085106
F1:  0.729411764705882NULL
[1] "Accuracy of top 10 barcodes likely to have positive gene expression:  0.9"

CD74
Accuracy:  0.573816155988858
Recall:  0.485714285714286
Precision:  0.574324324324324
F1:  0.526315789473684NULL
[1] "Accuracy of top 10 barcodes likely to have positive gene expression:  0.6"
LS0tCnRpdGxlOiAiUiBOb3RlYm9vayIKb3V0cHV0OiBodG1sX25vdGVib29rCi0tLQoKVGhpcyBpcyBhbiBbUiBNYXJrZG93bl0oaHR0cDovL3JtYXJrZG93bi5yc3R1ZGlvLmNvbSkgTm90ZWJvb2suIFdoZW4geW91IGV4ZWN1dGUgY29kZSB3aXRoaW4gdGhlIG5vdGVib29rLCB0aGUgcmVzdWx0cyBhcHBlYXIgYmVuZWF0aCB0aGUgY29kZS4gCgpUcnkgZXhlY3V0aW5nIHRoaXMgY2h1bmsgYnkgY2xpY2tpbmcgdGhlICpSdW4qIGJ1dHRvbiB3aXRoaW4gdGhlIGNodW5rIG9yIGJ5IHBsYWNpbmcgeW91ciBjdXJzb3IgaW5zaWRlIGl0IGFuZCBwcmVzc2luZyAqQ21kK1NoaWZ0K0VudGVyKi4gCgpgYGB7cn0KbGlicmFyeShyZXRpY3VsYXRlKQpsaWJyYXJ5KCdjYXJldCcpCmxpYnJhcnkoc3BhdGlhbExJQkQpCmxpYnJhcnkoTUxtZXRyaWNzKQoKcHlfaW5zdGFsbCgicGFuZGFzIikKcHlfaW5zdGFsbCgibnVtcHkiKQpweV9pbnN0YWxsKCJzY2lraXQtbGVhcm4iKQpweV9pbnN0YWxsKCJtYXRwbG90bGliIikKCnNrIDwtIGltcG9ydCgic2tsZWFybiIpCnBkIDwtIGltcG9ydCgicGFuZGFzIikKbnAgPC0gaW1wb3J0KCJudW1weSIpCnBsdCA8LSBpbXBvcnQoIm1hdHBsb3RsaWIiKQpza19tb2RlbF9zZWxlY3Rpb24gPC0gaW1wb3J0KCJza2xlYXJuLm1vZGVsX3NlbGVjdGlvbiIpCnNrX2xpbmVhcl9tb2RlbCA8LSBpbXBvcnQoInNrbGVhcm4ubGluZWFyX21vZGVsIikKc2tfcHJlcHJvY2Vzc2luZyA8LSBpbXBvcnQoInNrbGVhcm4ucHJlcHJvY2Vzc2luZyIpCnB5cGxvdCA8LSBpbXBvcnQoIm1hdHBsb3RsaWIucHlwbG90IikKYGBgCgojIyBMb2dpc3RpYyBSZWdyZXNzaW9uIENsYXNzaWZpY2F0aW9uIGZyb20gUkdCRy9IU0wgSW1hZ2UgU3BhY2VzCmBgYHtyfQp5IDwtIGxvZ2NvdW50c18xNTE2NzNfZmlsdGVyZWQKc3VtKHlbeSA+IDBdKQp5WyEoeSA+IDApXSA8LSAxCmBgYAoKCmBgYHtyfQpzcGxpdF9kYXRhIDwtIGZ1bmN0aW9uKHNwZSwgZ19uYW1lLCBsb2djb3VudHMsIGNvbG9yX2FyciwgaXNCaW5hcnkpIHsKICBnZW5lX2lkIDwtIHJvd0RhdGEoc3BlKVtyb3dEYXRhKHNwZSkkZ2VuZV9uYW1lID09IGdfbmFtZSxdJGdlbmVfaWQKICB5IDwtIGxvZ2NvdW50c1ssZ2VuZV9pZF0KICBpZiAoaXNCaW5hcnkpIHsKICAgIHlbeSA+IDBdIDwtIDEKICB9CiAgeCA8LSBjb2xvcl9hcnIKICAKICB0ZW1wIDwtIHNrX21vZGVsX3NlbGVjdGlvbiR0cmFpbl90ZXN0X3NwbGl0KHgsIHksIHRlc3Rfc2l6ZT0wLjEpCiAgeF90cmFpbiA8LSB0ZW1wWzFdCiAgeF90ZXN0IDwtIHRlbXBbMl0KICB5X3RyYWluIDwtIHRlbXBbM10KICB5X3Rlc3QgPC0gdGVtcFs0XQogIGxpc3QoeF90cmFpbiwgeF90ZXN0LCB5X3RyYWluLCB5X3Rlc3QpCn0KCmV2YWx1YXRlX21vZGVsIDwtIGZ1bmN0aW9uKHlfcHJlZGljdCwgeV90cnVlLCBnX25hbWUpIHsKICBwcmVjaXNpb24gPC0gUHJlY2lzaW9uKHlfdHJ1ZSwgeV9wcmVkaWN0KQogIGYxIDwtIEYxX1Njb3JlKHlfdHJ1ZSwgeV9wcmVkaWN0KQogIHJlY2FsbCA8LSBSZWNhbGwoeV90cnVlLCB5X3ByZWRpY3QpCiAgYWNjdXJhY3kgPC0gQWNjdXJhY3koeV9wcmVkaWN0LCB5X3RydWUpCiAgcHJpbnQoY2F0KHBhc3RlKGdfbmFtZSwgcGFzdGUoIkFjY3VyYWN5OiAiLCBhY2N1cmFjeSksIHBhc3RlKCdSZWNhbGw6ICcsIHJlY2FsbCksIHBhc3RlKCdQcmVjaXNpb246ICcsIHByZWNpc2lvbiksIHBhc3RlKCdGMTogJywgZjEpLCBzZXA9IlxuIikpKQoKfQoKdHJhaW4gPC0gZnVuY3Rpb24oc3BlLCBnX25hbWUsIGxvZ2NvdW50cywgY29sb3JfYXJyLCBpc0JpbmFyeSkgewogIHNwbGl0X2RhdGEgPC0gc3BsaXRfZGF0YShzcGUsIGdfbmFtZSwgbG9nY291bnRzLCBjb2xvcl9hcnIsIGlzQmluYXJ5KQoKICB4X3RyYWluIDwtIHNwbGl0X2RhdGFbMV1bWzFdXVtbMV1dCiAgeF90ZXN0IDwtIHNwbGl0X2RhdGFbMl1bWzFdXVtbMV1dCiAgeV90cmFpbiA8LSBzcGxpdF9kYXRhWzNdW1sxXV1bWzFdXQogIHlfdGVzdCA8LSBucCRyYXZlbChzcGxpdF9kYXRhWzRdW1sxXV1bWzFdXSkKICAKICBzY2FsZXIgPC0gc2tfcHJlcHJvY2Vzc2luZyRTdGFuZGFyZFNjYWxlcigpCiAgZmVhdHVyZXNfc3RhbmRhcmRpemVkIDwtIHNjYWxlciRmaXRfdHJhbnNmb3JtKHhfdHJhaW4pCiAgdGVzdF9zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJHRyYW5zZm9ybSh4X3Rlc3QpCiAgCiAgIyBMb2dpc3RpYyByZWdyZXNzaW9uIGZvciBiaW5hcnkgY2xhc3NpZmljYXRpb24KICBjbGYgPC0gc2tfbGluZWFyX21vZGVsJExvZ2lzdGljUmVncmVzc2lvbihyYW5kb21fc3RhdGU9YXMuaW50ZWdlcigxMDApKQogIGNsZiRmaXQoZmVhdHVyZXNfc3RhbmRhcmRpemVkLCBucCRyYXZlbCh5X3RyYWluKSkKICB5X3ByZWRpY3QgPC0gY2xmJHByZWRpY3QodGVzdF9zdGFuZGFyZGl6ZWQpCiAgCiAgIyBNZXRyaWNzIGZvciBiaW5hcnkgY2xhc3NpZmljYXRpb24KICBldmFsdWF0ZV9tb2RlbCh5X3ByZWRpY3QsIHlfdGVzdCwgZ19uYW1lKQogIAogICMgRmluZCB0aGUgdG9wIDEwIG1vc3QgZW50cmllcyBtb3N0IGxpa2VseSB0byBiZSArIGdlbmUgZXhwcmVzc2lvbiB2YWx1ZSBhbmQgb2JzZXJ2ZSB0aGVpciBncm91bmQgdHJ1dGgKICB5X3ByZWRpY3RfcHJvYmEgPC0gY2xmJHByZWRpY3RfcHJvYmEodGVzdF9zdGFuZGFyZGl6ZWQpCiAgdGVtcCA8LSBvcmRlcih5X3ByZWRpY3RfcHJvYmFbLDJdLGRlY3JlYXNpbmc9VClbMToxMF0KICBwcmludChwYXN0ZSgiQWNjdXJhY3kgb2YgdG9wIDEwIGJhcmNvZGVzIGxpa2VseSB0byBoYXZlIHBvc2l0aXZlIGdlbmUgZXhwcmVzc2lvbjogIiwgc3VtKHlfdGVzdFt0ZW1wXSA9PSAxKSAvIDEwKSkKICBjYXQoJ1xuJykKfQpgYGAKCmBgYHtyfQpnX25hbWUgPC0gIk1PQlAiCmxvZ2NvdW50cyA8LSBsb2djb3VudHNfMTUxNjczX2ZpbHRlcmVkCmdlbmVfaWQgPC0gcm93RGF0YShzcGUpW3Jvd0RhdGEoc3BlKSRnZW5lX25hbWUgPT0gZ19uYW1lLF0kZ2VuZV9pZAp5IDwtIGxvZ2NvdW50c1ssZ2VuZV9pZF0KI3lbeSA+IDBdIDwtIDEKeCA8LSBkYXRhLmZyYW1lKGRhdGFbMTBdLCBkYXRhWzExXSkKc2NhbGVyIDwtIHNrX3ByZXByb2Nlc3NpbmckU3RhbmRhcmRTY2FsZXIoKQpmZWF0dXJlc19zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJGZpdF90cmFuc2Zvcm0oeCkKdGVzdF9zdGFuZGFyZGl6ZWQgPC0gc2NhbGVyJHRyYW5zZm9ybSh4KQp0ZW1wX2RmIDwtIGRhdGEuZnJhbWUodGVzdF9zdGFuZGFyZGl6ZWQseSkKZ2dwbG90KHRlbXBfZGYsIGFlcyh4ID0gdGVtcF9kZlssJ1gxJ10sIHk9dGVtcF9kZlssJ1gyJ10pKSArIGdlb21fcG9pbnQoYWVzKGNvbG9yPXRlbXBfZGZbLCd5J10pKSArIHRoZW1lKGFzcGVjdC5yYXRpbyA9IDEpICsgZ2d0aXRsZShwYXN0ZSgiR3JlZW4gYW5kIFNhdHVyYXRpb24gRmVhdHVyZXMgRm9yICIsZ19uYW1lKSkKYGBgCgpgYGB7cn0KIyBHZXQgbGlzdCBvZiBnZW5lcyB3aXRoIHN0cm9uZ2VzdCBjb3JyZWxhdGlvbnMKdG9wX2dlbmVzIDwtIHJvd25hbWVzKHRvcF9jb3JyKQoKZm9yIChnX25hbWUgaW4gYygiTU9CUCIsICJDRDc0IikpIHsKICB0cmFpbihzcGUsIGdfbmFtZSwgbG9nY291bnRzXzE1MTY3M19maWx0ZXJlZCwgZGF0YS5mcmFtZShkYXRhWzddLCBkYXRhWzExXSksIFRSVUUpCn0KYGBgCmBgYHtyfQoKYGBgCgoK