Diff of /Models.Rmd [000000] .. [c9fae8]

Switch to unified view

a b/Models.Rmd
1
---
2
title: "R Notebook"
3
output: html_notebook
4
---
5
6
This is an [R Markdown](http://rmarkdown.rstudio.com) Notebook. When you execute code within the notebook, the results appear beneath the code. 
7
8
Try executing this chunk by clicking the *Run* button within the chunk or by placing your cursor inside it and pressing *Cmd+Shift+Enter*. 
9
10
```{r}
11
library(reticulate)
12
library('caret')
13
library(spatialLIBD)
14
library(MLmetrics)
15
16
py_install("pandas")
17
py_install("numpy")
18
py_install("scikit-learn")
19
py_install("matplotlib")
20
21
sk <- import("sklearn")
22
pd <- import("pandas")
23
np <- import("numpy")
24
plt <- import("matplotlib")
25
sk_model_selection <- import("sklearn.model_selection")
26
sk_linear_model <- import("sklearn.linear_model")
27
sk_preprocessing <- import("sklearn.preprocessing")
28
pyplot <- import("matplotlib.pyplot")
29
```
30
31
## Logistic Regression Classification from RGBG/HSL Image Spaces
32
```{r}
33
y <- logcounts_151673_filtered
34
sum(y[y > 0])
35
y[!(y > 0)] <- 1
36
```
37
38
39
```{r}
40
split_data <- function(spe, g_name, logcounts, color_arr, isBinary) {
41
  gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
42
  y <- logcounts[,gene_id]
43
  if (isBinary) {
44
    y[y > 0] <- 1
45
  }
46
  x <- color_arr
47
  
48
  temp <- sk_model_selection$train_test_split(x, y, test_size=0.1)
49
  x_train <- temp[1]
50
  x_test <- temp[2]
51
  y_train <- temp[3]
52
  y_test <- temp[4]
53
  list(x_train, x_test, y_train, y_test)
54
}
55
56
evaluate_model <- function(y_predict, y_true, g_name) {
57
  precision <- Precision(y_true, y_predict)
58
  f1 <- F1_Score(y_true, y_predict)
59
  recall <- Recall(y_true, y_predict)
60
  accuracy <- Accuracy(y_predict, y_true)
61
  print(cat(paste(g_name, paste("Accuracy: ", accuracy), paste('Recall: ', recall), paste('Precision: ', precision), paste('F1: ', f1), sep="\n")))
62
63
}
64
65
train <- function(spe, g_name, logcounts, color_arr, isBinary) {
66
  split_data <- split_data(spe, g_name, logcounts, color_arr, isBinary)
67
68
  x_train <- split_data[1][[1]][[1]]
69
  x_test <- split_data[2][[1]][[1]]
70
  y_train <- split_data[3][[1]][[1]]
71
  y_test <- np$ravel(split_data[4][[1]][[1]])
72
  
73
  scaler <- sk_preprocessing$StandardScaler()
74
  features_standardized <- scaler$fit_transform(x_train)
75
  test_standardized <- scaler$transform(x_test)
76
  
77
  # Logistic regression for binary classification
78
  clf <- sk_linear_model$LogisticRegression(random_state=as.integer(100))
79
  clf$fit(features_standardized, np$ravel(y_train))
80
  y_predict <- clf$predict(test_standardized)
81
  
82
  # Metrics for binary classification
83
  evaluate_model(y_predict, y_test, g_name)
84
  
85
  # Find the top 10 most entries most likely to be + gene expression value and observe their ground truth
86
  y_predict_proba <- clf$predict_proba(test_standardized)
87
  temp <- order(y_predict_proba[,2],decreasing=T)[1:10]
88
  print(paste("Accuracy of top 10 barcodes likely to have positive gene expression: ", sum(y_test[temp] == 1) / 10))
89
  cat('\n')
90
}
91
```
92
93
```{r}
94
g_name <- "MOBP"
95
logcounts <- logcounts_151673_filtered
96
gene_id <- rowData(spe)[rowData(spe)$gene_name == g_name,]$gene_id
97
y <- logcounts[,gene_id]
98
#y[y > 0] <- 1
99
x <- data.frame(data[10], data[11])
100
scaler <- sk_preprocessing$StandardScaler()
101
features_standardized <- scaler$fit_transform(x)
102
test_standardized <- scaler$transform(x)
103
temp_df <- data.frame(test_standardized,y)
104
ggplot(temp_df, aes(x = temp_df[,'X1'], y=temp_df[,'X2'])) + geom_point(aes(color=temp_df[,'y'])) + theme(aspect.ratio = 1) + ggtitle(paste("Green and Saturation Features For ",g_name))
105
```
106
107
```{r}
108
# Get list of genes with strongest correlations
109
top_genes <- rownames(top_corr)
110
111
for (g_name in c("MOBP", "CD74")) {
112
  train(spe, g_name, logcounts_151673_filtered, data.frame(data[7], data[11]), TRUE)
113
}
114
```
115
```{r}
116
117
```
118
119