Diff of /R/method_wrappers.R [000000] .. [9abfcf]

Switch to unified view

a b/R/method_wrappers.R
1
# Description: This file contains the wrapper functions for the methods that
2
# are used to compute the different layers of the multilayer network. The
3
# functions are called from the compute_*_network functions in layers.R
4
# For now, only the compute_atac_peak_network function has wrapper functions
5
# for the different methods. The other methods are still directly implemented
6
# in the compute_*_network functions in layers.R
7
8
#' @title Cicero wrapper function for the compute_atac_peak_network function
9
#'
10
#' @description This function is a wrapper for the compute_atac_peak_network
11
#' function in layers.R. It computes the peak network from scATAC-seq data
12
#' using Cicero. It returns a data frame with the peak network. The data frame
13
#' also contains the coaccess score for each edge. The coaccess score is the
14
#' probability that two peaks are accessible in the same cell. The coaccess 
15
#' score is computed by Cicero. Edges are filtered based on the coaccess score.
16
#' Only edges with a coaccess score > threshold are kept.
17
#'
18
#' @param hummus A hummus object
19
#' @param atac_assay The name of the assay containing the scATAC-seq data
20
#' @param genome The genome object
21
#' @param window The window size used by Cicero to compute the coaccess score
22
#' @param number_cells_per_clusters The number of cells per cluster used by
23
#' Cicero to compute the coaccess score
24
#' @param sample_num The number of samples used by Cicero to compute the
25
#' coaccess score
26
#' @param seed The seed used by Cicero to compute the coaccess score
27
#' @param verbose The verbosity level
28
#' @param threshold The threshold used to filter edges based on the coaccess
29
#' score
30
#' @param reduction_method The method used by monocle3 to reduce the dimension
31
#' of the scATAC-seq data before defining the pseudocells. The default is UMAP.
32
#'
33
#' @return A data frame containing the peak network
34
#' @export
35
#'
36
run_cicero_wrapper <- function(
37
    hummus,
38
    atac_assay,
39
    genome,
40
    window,
41
    number_cells_per_clusters,
42
    sample_num,
43
    seed,
44
    verbose,
45
    threshold,
46
    reduction_method = "UMAP"
47
    ) {
48
49
    # functions that need to be renamed
50
    int_elementMetadata <- SingleCellExperiment::int_elementMetadata
51
    counts <- SingleCellExperiment::counts
52
53
    # obtain chromosome sizes
54
    chromosome_sizes <- data.frame(V1 = genome@seqinfo@seqnames,
55
                                   V2 = genome@seqinfo@seqlengths)
56
57
    # Get scATAC-seq data
58
    scATAC <- as.matrix(hummus@assays[[atac_assay]]@counts)
59
    # Matrix to edgelist
60
    acc <- reshape2::melt(scATAC)
61
    colnames(acc) <- c("V1", "V2", "V3")
62
63
    # Prepare cicero input
64
    input_cds <- cicero::make_atac_cds(acc, binarize = TRUE) # Create CDS object
65
    set.seed(seed)
66
    # It is required that there is no empty cell
67
    if (length(which(colSums(as.matrix(monocle3::exprs(input_cds))) == 0)) == 0
68
    ) {
69
    # Calculating size factors using default method = mean-geometric-mean-total
70
      input_cds <- monocle3::estimate_size_factors(input_cds)
71
      # Preprocessing using LSI
72
      input_cds <- monocle3::preprocess_cds(input_cds, method = "LSI")
73
      # Dimensionality reduction using UMAP
74
      input_cds <- monocle3::reduce_dimension(
75
                                    input_cds,
76
                                    reduction_method = reduction_method,
77
                                    preprocess_method = "LSI")
78
    } else {
79
      print("Error: there is at least one cell with no signal.")
80
    }
81
    # Get reduced (UMAP) coordinates
82
    umap_coords <- SingleCellExperiment::reducedDims(input_cds)$UMAP
83
    # Compute pseudocells
84
    cicero_cds <- cicero::make_cicero_cds(
85
      input_cds,  # Create a Cicero CDS object
86
      reduced_coordinates = umap_coords,
87
      k = number_cells_per_clusters,  #number neighbors/ Default = 50
88
      summary_stats = NULL,         # Default
89
      size_factor_normalize = TRUE, # Default
90
      silent = FALSE)               # Default
91
92
    cicero <- cicero::run_cicero(
93
      cds = cicero_cds, # Infer peak-links
94
      genomic_coords = chromosome_sizes,
95
      window = window,             # Default = 5e+05
96
      silent = FALSE,             # Default = FALSE
97
      sample_num = sample_num) # Default = 100
98
99
    # Remove NAs, double edges, and edges with coaccess score <=0
100
    # Check for coaccess = NA
101
    if (length(which(is.na(cicero$coaccess))) > threshold) {
102
      cicero <- cicero[which(!is.na(cicero$coaccess)), ]  # Remove NAs
103
    }
104
    cicero$temp <- NA  # Helper column to check and remove double edges
105
    my_cols <- which(as.character(cicero$Peak1) <= as.character(cicero$Peak2))
106
    cicero$temp[my_cols] <- paste(cicero$Peak1[my_cols],
107
                                  cicero$Peak2[my_cols],
108
                                  sep = ";")
109
110
    my_cols <- which(as.character(cicero$Peak1) > as.character(cicero$Peak2))
111
    cicero$temp[my_cols] <- paste(cicero$Peak2[my_cols],
112
                                  cicero$Peak1[my_cols],
113
                                  sep = ";")
114
115
    # Sort table according to temp-column (each entry appears double)
116
    cicero <- cicero[with(cicero, order(temp, decreasing = TRUE)), ]
117
    rownames(cicero) <- c(1:dim(cicero)[1])
118
    A <- as.character(cicero$Peak1[seq(1, dim(cicero)[1], 2)])
119
    Anum <- round(cicero$coaccess[seq(1, dim(cicero)[1], 2)], 10)
120
    B <- as.character(cicero$Peak2[seq(2, dim(cicero)[1], 2)])
121
    Bnum <- round(cicero$coaccess[seq(2, dim(cicero)[1], 2)], 10)
122
    # length(which(A==B & Anum==Bnum)) 
123
    # Each edge appears twice with same coaccess score (rounded to 10 digits)
124
    cicero <- cicero[seq(1, dim(cicero)[1], 2), ] # Remove double edges
125
    cicero$temp <- NULL # Remove helper column
126
    cicero <- cicero[with(cicero, order(cicero$coaccess,
127
                                        decreasing = TRUE)), ]  # Sort
128
    rownames(cicero) <- c(1:dim(cicero)[1])
129
    cicero$Peak1 <- gsub("_", "-", cicero$Peak1)
130
    # Peak names 2x"-" to match bipartites
131
    cicero$Peak2 <- gsub("_", "-", cicero$Peak2)
132
    # Peak names 2x"-" to match bipartites ? 2x"-" or 2x"_"
133
134
    peak_network <- cicero[which(cicero$coaccess > threshold), ]
135
    # Remove edges with coaccess score <= threshold
136
137
    if (verbose > 0) {
138
      cat("\n", dim(peak_network)[1], "peak edges with a coaccess score >",
139
          threshold, "were found.\n")
140
    }
141
142
    # Return peak network including edges with positive coaccess score
143
    return(peak_network)
144
}
145
146
147
#' @title Omnipath wrapper function for the compute_tf_network function
148
#' @description This function is a wrapper for the compute_tf_network function
149
#' in layers.R. It computes the TF network from using Omnipath database.
150
#' It returns a data frame with the TF network. The data frame is not weighted
151
#' and does not contain scores for the edges.
152
#' @param hummus A hummus object
153
  # Get TF-TF interactions from Omnipath
154
run_omnipath_wrapper <- function(
155
  hummus = hummus,
156
  organism = organism,
157
  tfs = tfs,
158
  gene_assay = gene_assay,
159
  source_target = source_target,
160
  verbose = 1) {
161
162
  TF_PPI <- OmnipathR::import_post_translational_interactions(
163
    organism = organism, partners = tfs, source_target = source_target
164
  )
165
166
  if (verbose > 0) {
167
    cat("\tNumber of edges from Omnipath:", nrow(TF_PPI),
168
    "\nWill now be filtered to only those corresponding to specified tfs")
169
  }
170
171
  if (is.na(tfs)) {
172
    # Get tfs list
173
    tfs <- get_tfs(hummus = hummus,
174
              assay = gene_assay,
175
              store_tfs = FALSE,
176
              output_file = NULL,
177
              verbose = verbose)
178
  } else if (typeof(tfs) != "character") {
179
      stop("'tfs' argument needs to be a vector of characters
180
      (e.g.: c('MYC', 'JAK1')).")
181
  }
182
183
  # add filtering if element is not a TF expressed in the dataset
184
  if (source_target == "AND") {
185
    TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs &
186
                           TF_PPI$target_genesymbol %in% tfs), ]
187
  } else if (source_target == "OR") {
188
    TF_PPI <- TF_PPI[which(TF_PPI$source_genesymbol %in% tfs |
189
                           TF_PPI$target_genesymbol %in% tfs), ]
190
  }
191
  # Get only source and target columns
192
  tf_network <- TF_PPI[, c(3, 4)]
193
194
  # Convert to data.frame from tibble
195
  tf_network <- as.data.frame(tf_network)
196
197
  # Check if there is any TF-TF edges otherwise add a fake node
198
  # and connect all TFs to it (to allow HuMMuS to run without impacting result)
199
  if (nrow(tf_network) == 0) {
200
    cat("No TF-TF edges from Omnipath for the given parameters.
201
        You can try to change the source_target parameter to 'OR' to get
202
        TF-other protein interactions. Or try to import a network  
203
        computed externally. Right now, a network with all TFs connected
204
        to a fake node is created, for HuMMuS analysis.\n It has no biological
205
        meaning but will allow to run the pipeline as if no edges were present.
206
        \n")
207
    tf_network <- run_tf_null_wrapper(
208
      hummus = hummus,
209
      organism = organism,
210
      tfs = tfs,
211
      gene_assay = gene_assay,
212
      verbose = )
213
214
  }
215
  return(tf_network)
216
}
217
218
#' @title tf_null wrapper function for the tf_network function
219
#' @description This function is a wrapper for the tf_network function
220
#' 
221
#' @param hummus A hummus object
222
#' 
223
#' 
224
run_tf_null_wrapper <- function(
225
  hummus = hummus,
226
  organism = organism,
227
  tfs = tfs,
228
  gene_assay = gene_assay,
229
  verbose = 1) {
230
    
231
  if (verbose > 0) {
232
    cat("Creating a fake TF network with all TFs connected to a fake node.\n")
233
  }
234
235
  if (is.na(tfs)) {
236
    # Get tfs list
237
    tfs <- get_tfs(hummus = hummus,
238
              assay = gene_assay,
239
              store_tfs = FALSE,
240
              output_file = NULL,
241
              verbose = verbose)
242
  } else if (typeof(tfs) != "character") {
243
      stop("'tfs' argument needs to be a vector of characters
244
      (e.g.: c('MYC', 'JAK1')).")
245
  }
246
247
248
  FAKE_NODE <- "fake_node"
249
  tf_network <- data.frame(source = c(), target = c())
250
  for (tf in tfs) {
251
    tf_network <- rbind(tf_network, data.frame(source = tf, target = FAKE_NODE))
252
  }
253
  return(tf_network)
254
}