Diff of /R/fit_ZICP.R [000000] .. [727782]

Switch to unified view

a b/R/fit_ZICP.R
1
fit.ZICP <- function(features,
2
                     metadata,
3
                     link = "log",
4
                     formula = NULL,
5
                     random_effects_formula = NULL,
6
                     correction = 'BH',
7
                     cores = 4,
8
                     optimizer = 'nlminb',
9
                     na.action = na.exclude) {
10
  
11
  ######################################
12
  # Fit and summary functions for ZICP #
13
  ######################################
14
  
15
  if (is.null(random_effects_formula)) {
16
    
17
    ##########################
18
    # Fixed effects modeling #
19
    ##########################
20
    
21
    model_function <- function(formula,
22
                               data,
23
                               link,
24
                               optimizer,
25
                               na.action) {
26
      return(
27
        zcpglm(
28
          formula = formula,
29
          data = data,
30
          link = link,
31
          optimizer = optimizer,
32
          na.action = na.action
33
        )
34
      )
35
    }
36
    
37
    summary_function <- function(fit) {
38
      zicp_out <-
39
        capture.output(zicp_summary <-
40
                         cplm::summary(fit)$coefficients$tweedie)
41
      para <- as.data.frame(zicp_summary)[-1,-3]
42
      para$base.model <- 'ZICP'
43
      para$tweedie.index <- round(fit$p, 3)
44
      para$name <- rownames(zicp_summary)[-1]
45
      return(para)
46
    }
47
    
48
  } else{
49
    ###########################
50
    # Random effects modeling #
51
    ###########################
52
    
53
    formula <-
54
      paste('. ~', paste(all.vars(formula)[-1], collapse = ' + '), '.', sep = ' + ')
55
    formula <- update(random_effects_formula, formula)
56
    
57
    model_function <- function(formula,
58
                               data,
59
                               link,
60
                               optimizer,
61
                               na.action) {
62
      return(
63
        glmmTMB::glmmTMB(
64
          formula = formula,
65
          data = data,
66
          family = glmmTMB::tweedie(link = link),
67
          ziformula = ~ 1,
68
          na.action = na.action
69
        )
70
      )
71
      
72
    }
73
    
74
    summary_function <- function(fit) {
75
      glmmTMB_summary <- coef(summary(fit))
76
      para <- as.data.frame(glmmTMB_summary$cond)[-1,-3]
77
      para$base.model <-
78
        ifelse(is.null(glmmTMB_summary$zi), 'CPLM', 'ZICP')
79
      para$tweedie.index <-
80
        round(unname(plogis(fit$fit$par["thetaf"]) + 1), 3)
81
      para$name <- rownames(glmmTMB_summary$cond)[-1]
82
      return(para)
83
    }
84
  }
85
  
86
  
87
  #######################################
88
  # Init cluster for parallel computing #
89
  #######################################
90
  
91
  cluster <- NULL
92
  if (cores > 1)
93
  {
94
    logging::loginfo("Creating cluster of %s R processes", cores)
95
    cluster <- parallel::makeCluster(cores)
96
    clusterExport(
97
      cluster,
98
      c(
99
        "features",
100
        "metadata",
101
        "formula",
102
        "link",
103
        "optimizer",
104
        "na.action",
105
        "model_function",
106
        "summary_function"
107
      ),
108
      envir = environment()
109
    )
110
  }
111
  
112
  ##############################
113
  # Apply per-feature modeling #
114
  ##############################
115
  
116
  outputs <-
117
    pbapply::pblapply(seq_len(ncol(features)), cl = cluster, function(x) {
118
      #################################
119
      # Create per-feature data frame #
120
      #################################
121
      
122
      featuresVector <- features[, x]
123
      logging::loginfo("Fitting model to feature number %d, %s",
124
                       x,
125
                       colnames(features)[x])
126
      dat_sub <-
127
        data.frame(expr = as.numeric(featuresVector), metadata)
128
      
129
      #############
130
      # Fit model #
131
      #############
132
      
133
      fit <- tryCatch({
134
        fit1 <-
135
          model_function(
136
            formula = formula,
137
            data = dat_sub,
138
            link = link,
139
            optimizer = optimizer,
140
            na.action = na.action
141
          )
142
      }, error = function(err) {
143
        fit1 <-
144
          try({
145
            model_function(
146
              formula = formula,
147
              data = dat_sub,
148
              link = link,
149
              optimizer = optimizer,
150
              na.action = na.action
151
            )
152
          })
153
        return(fit1)
154
      })
155
      
156
      #################
157
      # Gather Output #
158
      #################
159
      
160
      output <- list()
161
      if (all(!inherits(fit, "try-error"))) {
162
        output$para <- summary_function(fit)
163
      }
164
      else{
165
        logging::logwarn(paste("Fitting problem for feature", x, "returning NA"))
166
        output$para <-
167
          as.data.frame(matrix(NA,  nrow = ncol(metadata) - 1, ncol = 5)) # Everything except offset
168
        output$para$name <-
169
          colnames(metadata)[-ncol(metadata)] # Everything except offset
170
      }
171
      colnames(output$para) <-
172
        c('coef',
173
          'stderr' ,
174
          'pval',
175
          'base.model',
176
          'tweedie.index',
177
          'name')
178
      output$para$feature <- colnames(features)[x]
179
      return(output)
180
    })
181
  
182
  ####################
183
  # Stop the cluster #
184
  ####################
185
  
186
  if (!is.null(cluster))
187
    parallel::stopCluster(cluster)
188
  
189
  #####################################
190
  # Bind the results for each feature #
191
  #####################################
192
  
193
  paras <-
194
    do.call(rbind, lapply(outputs, function(x) {
195
      return(x$para)
196
    }))
197
  
198
  ################################
199
  # Apply correction to p-values #
200
  ################################
201
  
202
  paras$qval <-
203
    as.numeric(p.adjust(paras$pval, method = correction))
204
  
205
  #####################################################
206
  # Determine the metadata names from the model names #
207
  #####################################################
208
  
209
  metadata_names <- setdiff(colnames(metadata), "offset")
210
  # order the metadata names by decreasing length
211
  metadata_names_ordered <-
212
    metadata_names[order(nchar(metadata_names), decreasing = TRUE)]
213
  # find the metadata name based on the match
214
  # to the beginning of the string
215
  extract_metadata_name <- function(name) {
216
    return(metadata_names_ordered[mapply(startsWith,
217
                                         name,
218
                                         metadata_names_ordered)][1])
219
  }
220
  paras$metadata <-
221
    unlist(lapply(paras$name, extract_metadata_name))
222
  # compute the value as the model contrast minus metadata
223
  paras$value <-
224
    mapply(function(x, y) {
225
      if (x == y)
226
        x
227
      else
228
        gsub(x, "", y)
229
    }, paras$metadata, paras$name)
230
  
231
  ##############################
232
  # Sort by decreasing q-value #
233
  ##############################
234
  
235
  paras <- paras[order(paras$qval, decreasing = FALSE),]
236
  paras <-
237
    dplyr::select(paras,
238
                  c('feature', 'metadata', 'value'),
239
                  dplyr::everything())
240
  paras <- dplyr::select(paras,-name)
241
  rownames(paras) <- NULL
242
  return(list("results" = paras))
243
}