a b/paper/Benchmarking with real and synthetic datasets/run_other_methods.R
1
library(dyno)
2
library(tidyverse)
3
library(Matrix)
4
library(hdf5r)
5
library(Seurat)
6
7
method_names <- c('paga','slingshot','monocle')
8
container_names <- c('jaydu/ti_paga_tree:v2.0.0','dynverse/ti_slingshot:v2.0.1','jaydu/ti_monocle3:v3.0.0')
9
10
path <- "../../data/*.h5"#"/Users/dujinhong/Downloads/new dataset/data_h5/*.h5"#
11
save_path <- "/Users/dujinhong/Downloads/result/"#"result/other methods/"
12
13
for(load_file in Sys.glob(path)){  
14
    filename <- gsub(".h5", "", basename(load_file))
15
    if(grepl('mouse_brain',filename)){
16
        next
17
    }
18
19
    dir.create(file.path(save_path, filename))
20
    
21
    file.h5 <- H5File$new(load_file, mode="r")
22
    counts <- t(as.matrix(file.h5[['count']][,]))
23
    grouping <- data.frame(cell_id = paste0('c-',as.character(c(1:length(file.h5[['grouping']][])))), 
24
                           group_id = as.character(as.vector(file.h5[['grouping']][])))    
25
    milestone_net <- data.frame(file.h5[['milestone_network']][])
26
    root_milestone_id <- file.h5[['root_milestone_id']][]    
27
    if('w' %in% colnames(milestone_net)){
28
        cell_ids <- which(milestone_net$from==root_milestone_id)
29
        start_id <- paste0('c-',as.character(cell_ids[which.max(milestone_net[cell_ids,'w'])]))
30
    }else{
31
        start_id <- paste0('c-',sample(which(grouping$group_id==root_milestone_id), 1))
32
    }    
33
    file.h5$close_all()
34
    rownames(counts) <- paste0('c-',as.character(c(1:dim(counts)[1])))
35
    colnames(counts) <- paste0('g-',as.character(c(1:dim(counts)[2])))
36
    
37
    data <- CreateSeuratObject(counts = t(counts), project = "miller")
38
    data <- NormalizeData(data, normalization.method = "LogNormalize", scale.factor = 10000)
39
    data <- FindVariableFeatures(data, selection.method = "vst", nfeatures = 2000)
40
    data <- ScaleData(data, features = rownames(data))
41
  
42
    counts <- Matrix(counts[,VariableFeatures(data)], sparse = TRUE)
43
    expression <- Matrix(t(data[["RNA"]]@scale.data)[,VariableFeatures(data)], sparse = TRUE)
44
    
45
    
46
    
47
    dataset <- wrap_expression(
48
      expression = expression,
49
      counts = counts
50
    )
51
52
    for(i in c(1:3)){
53
        method_name <- method_names[i]                
54
55
        method <- create_ti_method_container(container_names[i])        
56
        
57
        if(i<3){
58
            K <- length(unique(grouping$group_id))
59
            if(i==1){
60
                K <- K - 1
61
            }
62
            else
63
                ndim <- 20
64
            pca <- irlba::prcomp_irlba(expression, n = ndim)
65
            rd <- pca$x
66
            cluster_labels <- cluster::pam(rd, K)$clustering
67
            grouping <- data.frame(cell_id = grouping$cell_id, 
68
                                   group_id = cluster_labels)   
69
        }
70
         
71
        if(i==2){
72
            dataset <- dataset %>% add_prior_information(start_id = c(start_id), groups_id = grouping)
73
        }else{
74
            dataset <- dataset %>% add_prior_information(start_id = start_id, groups_id = grouping)
75
        }
76
        model <- infer_trajectory(dataset, method(), give_priors = c("start_id"), verbose = TRUE)
77
        
78
        
79
        ix <- wrapr::match_order(model$progressions$cell_id, grouping$cell_id)
80
        model$progressions <- model$progressions[ix,]
81
        rownames(model$progressions) <- NULL
82
        write.csv(x=model$progressions, file=file.path(save_path, filename, sprintf("%s_progressions.csv", method_name)))
83
        
84
        write.csv(x=model$milestone_network, file=file.path(save_path, filename, sprintf("%s_milestone_network.csv", method_name)))
85
    }
86
}