Diff of /R/fit_CPLM.R [000000] .. [727782]

Switch to unified view

a b/R/fit_CPLM.R
1
fit.CPLM <- function(features,
2
                     metadata,
3
                     link = "log",
4
                     formula = NULL,
5
                     random_effects_formula = NULL,
6
                     correction = 'BH',
7
                     cores = 4,
8
                     optimizer = 'nlminb',
9
                     na.action = na.exclude) {
10
  
11
  ######################################
12
  # Fit and summary functions for CPLM #
13
  ######################################
14
  
15
  if (is.null(random_effects_formula)) {
16
    
17
    ##########################
18
    # Fixed effects modeling #
19
    ##########################
20
    
21
    model_function <- function(formula,
22
                               data,
23
                               link,
24
                               optimizer,
25
                               na.action) {
26
      return(
27
        cplm::cpglm(
28
          formula = formula,
29
          data = data,
30
          link = link,
31
          optimizer = optimizer,
32
          na.action = na.action
33
        )
34
      )
35
      
36
    }
37
    
38
    summary_function <- function(fit) {
39
      cplm_out <-
40
        capture.output(cplm_summary <- cplm::summary(fit)$coefficients)
41
      para <- as.data.frame(cplm_summary)[-1,-3]
42
      para$base.model <- 'CPLM'
43
      para$tweedie.index <- round(fit$p, 3)
44
      para$name <- rownames(cplm_summary)[-1]
45
      return(para)
46
    }
47
    
48
  } else{
49
    
50
    ###########################
51
    # Random effects modeling #
52
    ###########################
53
    
54
    formula <-
55
      paste('. ~', paste(all.vars(formula)[-1], collapse = ' + '), '.', sep = ' + ')
56
    formula <- update(random_effects_formula, formula)
57
    
58
    model_function <- function(formula,
59
                               data,
60
                               link,
61
                               optimizer,
62
                               na.action) {
63
      return(
64
        glmmTMB::glmmTMB(
65
          formula = formula,
66
          data = data,
67
          family = glmmTMB::tweedie(link = link),
68
          ziformula = ~ 0,
69
          na.action = na.action
70
        )
71
      )
72
      
73
    }
74
    
75
    summary_function <- function(fit) {
76
      glmmTMB_summary <- coef(summary(fit))
77
      para <- as.data.frame(glmmTMB_summary$cond)[-1,-3]
78
      para$base.model <-
79
        ifelse(is.null(glmmTMB_summary$zi), 'CPLM', 'ZICP')
80
      para$tweedie.index <-
81
        round(unname(plogis(fit$fit$par["thetaf"]) + 1), 3)
82
      para$name <- rownames(glmmTMB_summary$cond)[-1]
83
      return(para)
84
    }
85
  }
86
  
87
  
88
  #######################################
89
  # Init cluster for parallel computing #
90
  #######################################
91
  
92
  cluster <- NULL
93
  if (cores > 1)
94
  {
95
    logging::loginfo("Creating cluster of %s R processes", cores)
96
    cluster <- parallel::makeCluster(cores)
97
    clusterExport(
98
      cluster,
99
      c(
100
        "features",
101
        "metadata",
102
        "formula",
103
        "link",
104
        "optimizer",
105
        "na.action",
106
        "model_function",
107
        "summary_function"
108
      ),
109
      envir = environment()
110
    )
111
  }
112
  
113
  ##############################
114
  # Apply per-feature modeling #
115
  ##############################
116
  
117
  outputs <-
118
    pbapply::pblapply(seq_len(ncol(features)), cl = cluster, function(x) {
119
      
120
      #################################
121
      # Create per-feature data frame #
122
      #################################
123
      
124
      featuresVector <- features[, x]
125
      logging::loginfo("Fitting model to feature number %d, %s",
126
                       x,
127
                       colnames(features)[x])
128
      dat_sub <-
129
        data.frame(expr = as.numeric(featuresVector), metadata)
130
      
131
      #############
132
      # Fit model #
133
      #############
134
      
135
      fit <- tryCatch({
136
        fit1 <-
137
          model_function(
138
            formula = formula,
139
            data = dat_sub,
140
            link = link,
141
            optimizer = optimizer,
142
            na.action = na.action
143
          )
144
      }, error = function(err) {
145
        fit1 <-
146
          try({
147
            model_function(
148
              formula = formula,
149
              data = dat_sub,
150
              link = link,
151
              optimizer = optimizer,
152
              na.action = na.action
153
            )
154
          })
155
        return(fit1)
156
      })
157
      
158
      #################
159
      # Gather Output #
160
      #################
161
      
162
      output <- list()
163
      if (all(!inherits(fit, "try-error"))) {
164
        output$para <- summary_function(fit)
165
      }
166
      else{
167
        logging::logwarn(paste("Fitting problem for feature", x, "returning NA"))
168
        output$para <-
169
          as.data.frame(matrix(NA,  nrow = ncol(metadata) - 1, ncol = 5)) # Everything except offset
170
        output$para$name <-
171
          colnames(metadata)[-ncol(metadata)] # Everything except offset
172
      }
173
      colnames(output$para) <-
174
        c('coef',
175
          'stderr' ,
176
          'pval',
177
          'base.model',
178
          'tweedie.index',
179
          'name')
180
      output$para$feature <- colnames(features)[x]
181
      return(output)
182
    })
183
  
184
  ####################
185
  # Stop the cluster #
186
  ####################
187
  
188
  if (!is.null(cluster))
189
    parallel::stopCluster(cluster)
190
  
191
  #####################################
192
  # Bind the results for each feature #
193
  #####################################
194
  
195
  paras <-
196
    do.call(rbind, lapply(outputs, function(x) {
197
      return(x$para)
198
    }))
199
  
200
  ################################
201
  # Apply correction to p-values #
202
  ################################
203
  
204
  paras$qval <-
205
    as.numeric(p.adjust(paras$pval, method = correction))
206
  
207
  #####################################################
208
  # Determine the metadata names from the model names #
209
  #####################################################
210
  
211
  metadata_names <- setdiff(colnames(metadata), "offset")
212
  # order the metadata names by decreasing length
213
  metadata_names_ordered <-
214
    metadata_names[order(nchar(metadata_names), decreasing = TRUE)]
215
  # find the metadata name based on the match
216
  # to the beginning of the string
217
  extract_metadata_name <- function(name) {
218
    return(metadata_names_ordered[mapply(startsWith,
219
                                         name,
220
                                         metadata_names_ordered)][1])
221
  }
222
  paras$metadata <-
223
    unlist(lapply(paras$name, extract_metadata_name))
224
  # compute the value as the model contrast minus metadata
225
  paras$value <-
226
    mapply(function(x, y) {
227
      if (x == y)
228
        x
229
      else
230
        gsub(x, "", y)
231
    }, paras$metadata, paras$name)
232
  
233
  ##############################
234
  # Sort by decreasing q-value #
235
  ##############################
236
  
237
  paras <- paras[order(paras$qval, decreasing = FALSE),]
238
  paras <-
239
    dplyr::select(paras,
240
                  c('feature', 'metadata', 'value'),
241
                  dplyr::everything())
242
  paras <- dplyr::select(paras,-name)
243
  rownames(paras) <- NULL
244
  return(list("results" = paras))
245
}