[63133d]: / code_snapshots / R / LIBRA_code / Metrics_LIBRA.R

Download this file

199 lines (161 with data), 8.2 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
#Summarizations of metrics computed on LIBRA
################################
setwd("/your_output_path_from_LIBRA")
#Summarize JACARD distances into 1 value (PPJI)
latent_dim_val = c(5,10,20,40,60)
#latent_dim_val = c(10) for explicit case
for (m in latent_dim_val){
jacard_vector_rna = c()
jacard_vector_rna_test = c()
jacard_vector_atac = c()
for (n in 1:10){
load(paste0("RNA_and_ATAC_",m,"_nodes_",n,"interaction.RData"))
#RNA
jacard=PairWiseJaccardSetsHeatmap(pbmc_rna@active.ident, pbmc.atac_libra@active.ident, #pbmc.atac_libra and pbmc.rna_libra are the same
show_row_dend = F, show_column_dend = F,
cluster_row = F, cluster_column =F)
#PPJI summarization
jacard_distance = jacard@matrix
is.na(jacard_distance) <- jacard_distance==0
jacard_distance = jacard_distance[,colSums(is.na(jacard_distance))<nrow(jacard_distance)]
jacard_vector_rna = append(jacard_vector_rna, mean(apply(jacard_distance,1,sum, na.rm=TRUE)))
print(jacard_vector_rna)
#Validation split data
validation_split_cells = tail(names(pbmc.rna@active.ident),floor((length(names(pbmc.rna@active.ident))*20)/100)) #If validation_split = 0.2
#Compare to original clustering using JACARD ONLY ON TEST DATA
A = pbmc.rna@active.ident[names(pbmc.rna@active.ident) %in% validation_split_cells]
B = pbmc.rna_libra@active.ident[names(pbmc.rna_libra@active.ident) %in% validation_split_cells]
jacard=PairWiseJaccardSetsHeatmap(A, B,
show_row_dend = F, show_column_dend = F,
cluster_row = F, cluster_column =F)
#Mean for non 0 vales
jacard_distance = jacard@matrix
is.na(jacard_distance) <- jacard_distance==0
jacard_distance = jacard_distance[,colSums(is.na(jacard_distance))<nrow(jacard_distance)]
jacard_vector_rna_test = append(jacard_vector_rna_test, mean(apply(jacard_distance,1,sum, na.rm=TRUE)))
#ATAC
jacard=PairWiseJaccardSetsHeatmap(pbmc_atac@active.ident, pbmc.atac_libra@active.ident, #pbmc.atac_libra and pbmc.rna_libra are the same
show_row_dend = F, show_column_dend = F,
cluster_row = F, cluster_column =F)
#Meand for non 0 vales
jacard_distance = jacard@matrix
is.na(jacard_distance) <- jacard_distance==0
jacard_distance = jacard_distance[,colSums(is.na(jacard_distance))<nrow(jacard_distance)]
jacard_vector_atac = append(jacard_vector_atac, mean(apply(jacard_distance,1,sum, na.rm=TRUE)))
}
write.table(jacard_vector_rna, file=paste0("RNA_nodes_", m,"Jacard_RNA_summary.txt"), sep="\t")
write.table(jacard_vector_rna_test, file=paste0("RNA_test_nodes_", m,"Jacard_RNA_summary.txt"), sep="\t")
write.table(jacard_vector_atac, file=paste0("ATAC_nodes_", m,"Jacard_ATAC_summary.txt"), sep="\t")
}
#Highlight bimodal cells where are there? (test cells)
for (m in latent_dim_val){
percentage_bimodal_test_cells = c()
for (n in 1:10){
load(paste0("RNA_and_ATAC_",m,"_nodes_",n,"interaction.RData"))
#Get distances RNA to ATAC in shared space / OBTAIN THE LOCAL MINIMUN
minimun_local = distance_vector[which.min(abs(diff(distance_vector)))]
#Get the local minimun to the min distances
distance_vector[distance_vector < minimun_local] <- 0
#Get the local minimun to the max distances
distance_vector[distance_vector >= minimun_local] <- 1
pbmc.atac@meta.data$distance_vector = distance_vector
bimodal_state = rownames(pbmc.atac@meta.data)[pbmc.atac@meta.data$distance_vector == 1]
percentage_bimodal_test_cells = append(percentage_bimodal_test_cells,(sum(bimodal_state %in% validation_split_cells) * 100) / length(validation_split_cells))
}
percentage_bimodal_test_cells_mean = mean(percentage_bimodal_test_cells)
percentage_bimodal_test_cells_median = median(percentage_bimodal_test_cells)
percentage_bimodal_test_cells_sd = sd(percentage_bimodal_test_cells)
percentage_bimodal_test_cells = append(percentage_bimodal_test_cells, percentage_bimodal_test_cells_mean)
percentage_bimodal_test_cells = append(percentage_bimodal_test_cells, percentage_bimodal_test_cells_median)
percentage_bimodal_test_cells = append(percentage_bimodal_test_cells, percentage_bimodal_test_cells_sd)
write.table(percentage_bimodal_test_cells, file=paste0("BIMODAL_CELLS", m,"_explanation_test_cells.txt"), sep="\t")
percentage_bimodal_test_cells_mean
}
#Summarize distances of bimodal_1 and bimodal_2
latent_dim_val = c(5,10,20,40,60)
#latent_dim_val = c(10) for explicit case
for (m in latent_dim_val){
bimodal_1_all = c()
bimodal_2_all = c()
bimodal_1_mean_all = c()
bimodal_1_mean_all = c()
bimodal_1_mean_all = c()
bimodal_2_mean_all = c()
bimodal_2_mean_all = c()
bimodal_2_mean_all = c()
bimodal_1_mean = c()
bimodal_1_median = c()
bimodal_1_sd = c()
bimodal_2_mean = c()
bimodal_2_median = c()
bimodal_2_sd = c()
for (n in 1:10){
load(paste0("RNA_and_ATAC_",m,"_nodes_",n,"interaction.RData"))
#Get distances RNA to ATAC in shared space / OBTAIN THE LOCAL MINIMUN
d <- density(distance_vector)
minimun_local = optimize(approxfun(d$x,d$y),interval=c(((min(distance_vector)*125)/100),((max(distance_vector)*60)/100)))$minimum
#Min distance
min_distance = min(distance_vector)
modal_1 = distance_vector[distance_vector < minimun_local & distance_vector > min_distance]
#Max distance
max_distance = max(distance_vector)
modal_2 = distance_vector[distance_vector > minimun_local & distance_vector < max_distance]
mean_range = (abs(min_distance - minimun_local) / 4)
#modal_1
h = hist(modal_1, breaks=100000)
i = which.max(h$counts)
max_peak_modal_1 = h$mids[i]
peak_upper = max_peak_modal_1 + mean_range
peak_botton = max_peak_modal_1 - mean_range
if (peak_upper < max(modal_1)) {
peak_upper = peak_upper
} else {
peak_upper = max(modal_1)
}
if (peak_botton > min(modal_1)) {
peak_botton = peak_botton
} else {
peak_botton = min(modal_1)
}
modal_1_peak_zone = modal_1[(modal_1 > peak_botton) & (modal_1 < peak_upper)]
bimodal_1_mean = append(bimodal_1_mean, mean(modal_1_peak_zone))
bimodal_1_median = append(bimodal_1_median, median(modal_1_peak_zone))
bimodal_1_sd = append(bimodal_1_sd, sd(modal_1_peak_zone))
#modal_2
h = hist(modal_2, breaks=100000)
i = which.max(h$counts)
max_peak_modal_2 = h$mids[i]
peak_upper = max_peak_modal_2 + mean_range
peak_botton = max_peak_modal_2 - mean_range
if (peak_upper < max(modal_2)) {
peak_upper = peak_upper
} else {
peak_upper = max(modal_2)
}
if (peak_botton > min(modal_2)) {
peak_botton = peak_botton
} else {
peak_botton = min(modal_2)
}
modal_2_peak_zone = modal_2[(modal_2 > peak_botton) & (modal_2 < peak_upper)]
bimodal_2_mean = append(bimodal_2_mean, mean(modal_2_peak_zone))
bimodal_2_median = append(bimodal_2_median, median(modal_2_peak_zone))
bimodal_2_sd = append(bimodal_2_sd, sd(modal_2_peak_zone))
}
bimodal_1_mean_all = mean(bimodal_1_mean)
bimodal_1_median_all = mean(bimodal_1_median)
bimodal_1_sd_all = mean(bimodal_1_sd)
bimodal_2_mean_all = mean(bimodal_2_mean)
bimodal_2_median_all = mean(bimodal_2_median)
bimodal_2_sd_all = mean(bimodal_2_sd)
bimodal_1_all = append(bimodal_1_all, bimodal_1_mean_all)
bimodal_1_all = append(bimodal_1_all, bimodal_1_median_all)
bimodal_1_all = append(bimodal_1_all, bimodal_1_sd_all)
print(bimodal_1_all)
bimodal_2_all = append(bimodal_2_all, bimodal_2_mean_all)
bimodal_2_all = append(bimodal_2_all, bimodal_2_median_all)
bimodal_2_all = append(bimodal_2_all, bimodal_2_sd_all)
print(bimodal_2_all)
write.table(bimodal_1_all, file=paste0("BIMODAL_CELLS", m,"modal_1_peak_zone.txt"), sep="\t")
write.table(bimodal_2_all, file=paste0("BIMODAL_CELLS", m,"modal_2_peak_zone.txt"), sep="\t")
}