|
a |
|
b/paper/Benchmarking with real and synthetic datasets/run_other_methods.R |
|
|
1 |
library(dyno) |
|
|
2 |
library(tidyverse) |
|
|
3 |
library(Matrix) |
|
|
4 |
library(hdf5r) |
|
|
5 |
library(Seurat) |
|
|
6 |
|
|
|
7 |
method_names <- c('paga','slingshot','monocle') |
|
|
8 |
container_names <- c('jaydu/ti_paga_tree:v2.0.0','dynverse/ti_slingshot:v2.0.1','jaydu/ti_monocle3:v3.0.0') |
|
|
9 |
|
|
|
10 |
path <- "../../data/*.h5"#"/Users/dujinhong/Downloads/new dataset/data_h5/*.h5"# |
|
|
11 |
save_path <- "/Users/dujinhong/Downloads/result/"#"result/other methods/" |
|
|
12 |
|
|
|
13 |
for(load_file in Sys.glob(path)){ |
|
|
14 |
filename <- gsub(".h5", "", basename(load_file)) |
|
|
15 |
if(grepl('mouse_brain',filename)){ |
|
|
16 |
next |
|
|
17 |
} |
|
|
18 |
|
|
|
19 |
dir.create(file.path(save_path, filename)) |
|
|
20 |
|
|
|
21 |
file.h5 <- H5File$new(load_file, mode="r") |
|
|
22 |
counts <- t(as.matrix(file.h5[['count']][,])) |
|
|
23 |
grouping <- data.frame(cell_id = paste0('c-',as.character(c(1:length(file.h5[['grouping']][])))), |
|
|
24 |
group_id = as.character(as.vector(file.h5[['grouping']][]))) |
|
|
25 |
milestone_net <- data.frame(file.h5[['milestone_network']][]) |
|
|
26 |
root_milestone_id <- file.h5[['root_milestone_id']][] |
|
|
27 |
if('w' %in% colnames(milestone_net)){ |
|
|
28 |
cell_ids <- which(milestone_net$from==root_milestone_id) |
|
|
29 |
start_id <- paste0('c-',as.character(cell_ids[which.max(milestone_net[cell_ids,'w'])])) |
|
|
30 |
}else{ |
|
|
31 |
start_id <- paste0('c-',sample(which(grouping$group_id==root_milestone_id), 1)) |
|
|
32 |
} |
|
|
33 |
file.h5$close_all() |
|
|
34 |
rownames(counts) <- paste0('c-',as.character(c(1:dim(counts)[1]))) |
|
|
35 |
colnames(counts) <- paste0('g-',as.character(c(1:dim(counts)[2]))) |
|
|
36 |
|
|
|
37 |
data <- CreateSeuratObject(counts = t(counts), project = "miller") |
|
|
38 |
data <- NormalizeData(data, normalization.method = "LogNormalize", scale.factor = 10000) |
|
|
39 |
data <- FindVariableFeatures(data, selection.method = "vst", nfeatures = 2000) |
|
|
40 |
data <- ScaleData(data, features = rownames(data)) |
|
|
41 |
|
|
|
42 |
counts <- Matrix(counts[,VariableFeatures(data)], sparse = TRUE) |
|
|
43 |
expression <- Matrix(t(data[["RNA"]]@scale.data)[,VariableFeatures(data)], sparse = TRUE) |
|
|
44 |
|
|
|
45 |
|
|
|
46 |
|
|
|
47 |
dataset <- wrap_expression( |
|
|
48 |
expression = expression, |
|
|
49 |
counts = counts |
|
|
50 |
) |
|
|
51 |
|
|
|
52 |
for(i in c(1:3)){ |
|
|
53 |
method_name <- method_names[i] |
|
|
54 |
|
|
|
55 |
method <- create_ti_method_container(container_names[i]) |
|
|
56 |
|
|
|
57 |
if(i<3){ |
|
|
58 |
K <- length(unique(grouping$group_id)) |
|
|
59 |
if(i==1){ |
|
|
60 |
K <- K - 1 |
|
|
61 |
} |
|
|
62 |
else |
|
|
63 |
ndim <- 20 |
|
|
64 |
pca <- irlba::prcomp_irlba(expression, n = ndim) |
|
|
65 |
rd <- pca$x |
|
|
66 |
cluster_labels <- cluster::pam(rd, K)$clustering |
|
|
67 |
grouping <- data.frame(cell_id = grouping$cell_id, |
|
|
68 |
group_id = cluster_labels) |
|
|
69 |
} |
|
|
70 |
|
|
|
71 |
if(i==2){ |
|
|
72 |
dataset <- dataset %>% add_prior_information(start_id = c(start_id), groups_id = grouping) |
|
|
73 |
}else{ |
|
|
74 |
dataset <- dataset %>% add_prior_information(start_id = start_id, groups_id = grouping) |
|
|
75 |
} |
|
|
76 |
model <- infer_trajectory(dataset, method(), give_priors = c("start_id"), verbose = TRUE) |
|
|
77 |
|
|
|
78 |
|
|
|
79 |
ix <- wrapr::match_order(model$progressions$cell_id, grouping$cell_id) |
|
|
80 |
model$progressions <- model$progressions[ix,] |
|
|
81 |
rownames(model$progressions) <- NULL |
|
|
82 |
write.csv(x=model$progressions, file=file.path(save_path, filename, sprintf("%s_progressions.csv", method_name))) |
|
|
83 |
|
|
|
84 |
write.csv(x=model$milestone_network, file=file.path(save_path, filename, sprintf("%s_milestone_network.csv", method_name))) |
|
|
85 |
} |
|
|
86 |
} |