|
a |
|
b/data-raw/lmmSpline-method.R |
|
|
1 |
# Jasmin Straube, Queensland Facility of Advanced Bioinformatics |
|
|
2 |
# Part of this script was borrowed from the lm function from the Stats package the lme function of the nlme package |
|
|
3 |
# and functions from the lmeSpline, reshape and gdata packages |
|
|
4 |
# |
|
|
5 |
# This program is free software; you can redistribute it and/or |
|
|
6 |
# modify it under the terms of the GNU Moleculesral Public License |
|
|
7 |
# as published by the Free Software Foundation; either version 2 |
|
|
8 |
# of the License, or (at your option) any later version. |
|
|
9 |
# |
|
|
10 |
# This program is distributed in the hope that it will be useful, |
|
|
11 |
# but WITHOUT ANY WARRANTY; without even the implied warranty of |
|
|
12 |
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the |
|
|
13 |
# GNU Moleculesral Public License for more details. |
|
|
14 |
# |
|
|
15 |
# You should have received a copy of the GNU Moleculesral Public License |
|
|
16 |
# along with this program; if not, write to the Free Software |
|
|
17 |
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. |
|
|
18 |
|
|
|
19 |
|
|
|
20 |
#' Data-driven linear mixed effect model spline modelling |
|
|
21 |
#' |
|
|
22 |
#' Function that models a linear or limear mixed model depending on the best fit. Alternatively, the function can return THE derivation information of the fitted models |
|
|
23 |
#' for the fixed (original) times points and a chosen \code{basis}. |
|
|
24 |
#' |
|
|
25 |
#' @import methods |
|
|
26 |
#' @importFrom nlme lme lmeControl pdIdent pdDiag |
|
|
27 |
#' @importFrom parallel parLapply detectCores makeCluster clusterExport stopCluster |
|
|
28 |
#' @importFrom gdata drop.levels |
|
|
29 |
#' @importFrom lmeSplines smspline approx.Z |
|
|
30 |
#' @importFrom reshape2 melt dcast |
|
|
31 |
#' @importFrom stats lm predict.lm predict anova quantile na.exclude |
|
|
32 |
#' @usage lmmSpline(data, time, sampleID, timePredict, deri, basis, knots, keepModels,numCores) |
|
|
33 |
#' @param data \code{data.frame} or \code{matrix} containing the samples as rows and features as columns |
|
|
34 |
#' @param time \code{numeric} vector containing the sample time point information. |
|
|
35 |
#' @param sampleID \code{character}, \code{numeric} or \code{factor} vector containing information about the unique identity of each sample |
|
|
36 |
#' @param timePredict \code{numeric} vector containing the time points to be predicted. By default set to the original time points observed in the experiment. |
|
|
37 |
#' @param deri \code{logical} value. If \code{TRUE} returns the predicted derivative information on the observed time points.By default set to \code{FALSE}. |
|
|
38 |
#' @param basis \code{character} string. What type of basis to use, matching one of \code{"cubic"}, \code{"p-spline"} or \code{"cubic p-spline"}. The \code{"cubic"} basis (\code{default}) is the cubic smoothing spline as defined by Verbyla \emph{et al.} 1999, the \code{"p-spline"} is the truncated p-spline basis as defined by Durban \emph{et al.} 2005. |
|
|
39 |
#' @param knots Alternatively an \code{integer}, the number of knots used for the \code{"p-spline"} or \code{"cubic p-spline"} basis calculation. Otherwise calculated as proposed by Ruppert 2002. Not used for the "cubic" smoothing spline basis as it used the inner design points. |
|
|
40 |
#' @param keepModels alternative \code{logical} value if you want to keep the model output. Default value is FALSE |
|
|
41 |
#' @param numCores Alternative \code{numeric} value indicating the number of CPU cores to be used. Default value is automatically estimated. |
|
|
42 |
#' @details |
|
|
43 |
#' The first model (\code{modelsUsed}=0) assumes the response is a straight line not affected by individual variation. |
|
|
44 |
#' |
|
|
45 |
#' Let \eqn{y_{ij}(t_{ij})} be the expression of a feature for individual (or biological replicate) \eqn{i} at time \eqn{t_{ij}}, where \eqn{i=1,2,...,n}, \eqn{j=1,2,...,m_i}, \eqn{n} is the sample size and \eqn{m_i} is the number of observations for individual \eqn{i} for the given feature. |
|
|
46 |
#' We fit a simple linear regression of expression \eqn{y_{ij}(t_{ij})} on time \eqn{t_{ij}}. |
|
|
47 |
#' The intercept \eqn{\beta_0} and slope \eqn{\beta_1} are estimated via ordinary least squares: |
|
|
48 |
#' \eqn{y_{ij}(t_{ij})= \beta_0 + \beta_1 t_{ij} + \epsilon_{ij}}, where \eqn{\epsilon_{ij} ~ N(0,\sigma^2_{\epsilon}).} |
|
|
49 |
#' The second model (\code{modelsUsed}=1) is nonlinear where the straight line in regression replaced with a curve modelled using here for example a spline truncated line basis (\code{basis}="p-spline") as proposed Durban \emph{et al.} 2005: |
|
|
50 |
#' |
|
|
51 |
#' \deqn{y_{ij}(t_{ij})= f(t_{ij}) +\epsilon_{ij},} |
|
|
52 |
#' |
|
|
53 |
#' where \eqn{\epsilon_{ij}~ N(0,\sigma_{\epsilon}^2).} |
|
|
54 |
#' |
|
|
55 |
#' The penalized spline is represented by \eqn{f}, which depends on a set of knot positions \eqn{\kappa_1,...,\kappa_K} in the range of \eqn{{t_{ij}}}, some unknown coefficients \eqn{u_k}, an intercept \eqn{\beta_0} and a slope \eqn{\beta_1}. The first term in the above equation can therefore be expanded as: |
|
|
56 |
#' \deqn{f(t_{ij})= \beta_0+ \beta_1t_{ij}+\sum\limits_{k=1}^{K}u_k(t_{ij}-\kappa_k)_+,} |
|
|
57 |
#' with \eqn{(t_{ij}-\kappa_k)_+=t_{ij}-\kappa_k}, if \eqn{t_{ij}-\kappa_k > 0, 0} otherwise. |
|
|
58 |
#' |
|
|
59 |
#' The choice of the number of knots \eqn{K} and their positions influences the flexibility of the curve. |
|
|
60 |
#' If the argument \code{knots}=missing, we use a method proposed by Ruppert 2002 to estimate the number of knots given the measured number of time points \eqn{T}, so that the knots \eqn{\kappa_1 \ldots \kappa_K} are placed at quantiles of the time interval of interest: |
|
|
61 |
#' |
|
|
62 |
#' \deqn{K= max(5,min(floor(\frac{T}{4}) , 40)).} |
|
|
63 |
#' |
|
|
64 |
#' In order to account for individual variation, our third model (\code{modelsUsed}=2) adds a subject-specific random effect \eqn{U_i} to the mean response \eqn{f(t_{ij})}. |
|
|
65 |
#' Assuming \eqn{f(t_{ij})} to be a fixed (yet unknown) population curve, \eqn{U_i} is treated as a random realization of an underlying Gaussian process with zero-mean and variance \eqn{\sigma_U^2} and is independent from the random error \eqn{\epsilon_{ij}}: |
|
|
66 |
#' |
|
|
67 |
#' \deqn{y_{ij}(t_{ij}) = f(t_{ij}) + U_i + \epsilon_{ij}} |
|
|
68 |
#' |
|
|
69 |
#' with \eqn{U_{i} ~ N(0,\sigma_U^2)} and \eqn{\epsilon_{ij} ~ N(0,\sigma_{\epsilon}^2)}. |
|
|
70 |
|
|
|
71 |
#' In the equation above, the individual curves are expected to be parallel to the mean curve as we assume the individual expression curves to be constant over time. |
|
|
72 |
#' A simple extension to this model is to assume individual deviations are straight lines. The fourth model (\code{modelsUsed}=3) therefore fits individual-specific random intercepts \eqn{a_{i0}} and slopes \eqn{a_{i1}}: |
|
|
73 |
#' |
|
|
74 |
#' \deqn{y_{ij}(t_{ij}) = f(t_{ij}) + a_{i0} + a_{i1}t_{ij} + \epsilon_{ij}} |
|
|
75 |
#' |
|
|
76 |
#' with \eqn{\epsilon_{ij} ~ N(0,\sigma_\epsilon^2)} and \eqn{(a_{i0},a_{i1})^T} ~ \eqn{ N(0,\Sigma).} |
|
|
77 |
#' We assume independence between the random intercept and slope. |
|
|
78 |
#' @return lmmSpline returns an object of class \code{lmmspline} containing the following components: |
|
|
79 |
#' \itemize{ |
|
|
80 |
#' \item{predSpline}{\code{data.frame} containing predicted values based on linear model object or linear mixed effect model object.} |
|
|
81 |
#' \item{modelsUsed}{\code{numeric} vector indicating the model used to fit the data. 0 = linear model, 1=linear mixed effect model spline (LMMS) with defined basis ('cubic' by default) 2 = LMMS taking subject-specific random intercept, 3 = LMMS with subject specific intercept and slope.} |
|
|
82 |
#' \item{model}{\code{list} of models used to model time profiles.} |
|
|
83 |
#' \item{derivative}{\code{logical} value indicating if the predicted values are the derivative information.} |
|
|
84 |
#' } |
|
|
85 |
#' @references Durban, M., Harezlak, J., Wand, M. P., & Carroll, R. J. (2005). \emph{Simple fitting of subject-specific curves for longitudinal data.} Stat. Med., 24(8), 1153-67. |
|
|
86 |
#' @references Ruppert, D. (2002). \emph{Selecting the number of knots for penalized splines.} J. Comp. Graph. Stat. 11, 735-757 |
|
|
87 |
#' @references Verbyla, A. P., Cullis, B. R., & Kenward, M. G. (1999). \emph{The analysis of designed experiments and longitudinal data by using smoothing splines.} Appl.Statist, 18(3), 269-311. |
|
|
88 |
#' @references Straube J., Gorse A.-D., Huang B.E., Le Cao K.-A. (2015). \emph{A linear mixed model spline framework for analyzing time course 'omics' data} PLOSONE, 10(8), e0134540. |
|
|
89 |
# @seealso \code{\link[lmms]{summary.lmmspline}}, \code{\link[lmms]{plot.lmmspline}}, \code{\link[lmms]{predict.lmmspline}}, \code{\link[lmms]{deriv.lmmspline}} |
|
|
90 |
#' @examples |
|
|
91 |
#' \dontrun{ |
|
|
92 |
#' data(kidneySimTimeGroup) |
|
|
93 |
#' # running for samples in group 1 |
|
|
94 |
#' G1 <- which(kidneySimTimeGroup$group=="G1") |
|
|
95 |
#' testLMMSpline<- lmmSpline(data=kidneySimTimeGroup$data[G1,],time=kidneySimTimeGroup$time[G1], |
|
|
96 |
#' sampleID=kidneySimTimeGroup$sampleID[G1]) |
|
|
97 |
#' summary(testLMMSpline) |
|
|
98 |
#' DerivTestLMMSplineTG<- lmmSpline(data=as.data.frame(kidneySimTimeGroup$data[G1,]), |
|
|
99 |
#' time=kidneySimTimeGroup$time[G1],sampleID=kidneySimTimeGroup$sampleID[G1], |
|
|
100 |
#' deri=TRUE,basis="p-spline") |
|
|
101 |
#' summary(DerivTestLMMSplineTG)} |
|
|
102 |
# setGeneric('lmmSpline',function(data,time,sampleID,timePredict,deri,basis,knots,keepModels,numCores){standardGeneric('lmmSpline')}) |
|
|
103 |
# setClassUnion("matrixOrFrame",c('matrix','data.frame')) |
|
|
104 |
# setClassUnion("missingOrnumeric", c("missing", "numeric")) |
|
|
105 |
# setClassUnion("missingOrcharacter", c("missing", "character")) |
|
|
106 |
# setClassUnion("missingOrlogical", c("missing", "logical")) |
|
|
107 |
# setClassUnion("factorOrcharacterOrnumeric", c("factor", "character","numeric")) |
|
|
108 |
# # @rdname lmmSpline-methods |
|
|
109 |
# # @aliases lmmSpline,matrixOrFrame,numeric,factorOrcharacterOrnumeric, |
|
|
110 |
# # missingOrlogical,missingOrcharacter,missingOrnumeric,missingOrlogical,missingOrnumeric-method |
|
|
111 |
# # @exportMethod lmmSpline |
|
|
112 |
# |
|
|
113 |
# setMethod('lmmSpline',c(data="matrixOrFrame",time="numeric",sampleID="factorOrcharacterOrnumeric",timePredict="missingOrnumeric", deri="missingOrlogical", basis="missingOrcharacter",knots="missingOrnumeric",keepModels="missingOrlogical",numCores="missingOrnumeric"), function(data,time,sampleID,timePredict,deri,basis,knots,keepModels,numCores){ |
|
|
114 |
# |
|
|
115 |
# lmmSplinePara(data=data,time=time,sampleID=sampleID,timePredict=timePredict,deri=deri,basis=basis, knots=knots,keepModels=keepModels,numCores=numCores) |
|
|
116 |
# }) |
|
|
117 |
# @name lmmSpline |
|
|
118 |
|
|
|
119 |
#' @docType methods |
|
|
120 |
#' @rdname lmmSpline-methods |
|
|
121 |
#' @importFrom parallel detectCores parLapply clusterExport |
|
|
122 |
#' @export |
|
|
123 |
lmmSpline <- function(data, time, sampleID, timePredict, deri, basis, knots,keepModels, numCores){ |
|
|
124 |
|
|
|
125 |
if(missing(keepModels)) |
|
|
126 |
keepModels <- F |
|
|
127 |
if(missing(timePredict)) |
|
|
128 |
timePredict <- sort(unique(time)) |
|
|
129 |
if(missing(basis)) |
|
|
130 |
basis <- "cubic" |
|
|
131 |
|
|
|
132 |
if(missing(deri)){ |
|
|
133 |
deri <- FALSE |
|
|
134 |
}else{ |
|
|
135 |
deri <- deri |
|
|
136 |
} |
|
|
137 |
|
|
|
138 |
basis.collection <- c("cubic","p-spline","cubic p-spline") |
|
|
139 |
if(!basis%in% basis.collection) |
|
|
140 |
stop(cat("Chosen basis is not available. Choose:", paste(basis.collection,collapse=', '))) |
|
|
141 |
if(diff(range(c(length(sampleID),length(time),nrow(data))))>0) |
|
|
142 |
stop("Size of the input vectors rep, time and nrow(data) are not equal") |
|
|
143 |
if(missing(knots)& (basis=="p-spline"|basis=='cubic p-spline')) |
|
|
144 |
warning("The number of knots is automatically estimated") |
|
|
145 |
if(deri & basis=='cubic') |
|
|
146 |
stop('To calculate the derivative choose either "p-spline" or "cubic p-spline" as basis') |
|
|
147 |
|
|
|
148 |
options(show.error.messages = TRUE) |
|
|
149 |
|
|
|
150 |
i <- NULL |
|
|
151 |
fits <- NULL |
|
|
152 |
error <- NULL |
|
|
153 |
|
|
|
154 |
if(missing(numCores)){ |
|
|
155 |
num.Cores <- detectCores() |
|
|
156 |
}else{ |
|
|
157 |
num.Cores <- detectCores() |
|
|
158 |
if(num.Cores<numCores){ |
|
|
159 |
warning(paste('The number of cores is bigger than the number of detected cores. Using the number of detected cores',num.Cores,'instead.')) |
|
|
160 |
}else{ |
|
|
161 |
num.Cores <- numCores |
|
|
162 |
} |
|
|
163 |
|
|
|
164 |
} |
|
|
165 |
Molecule <- '' |
|
|
166 |
|
|
|
167 |
derivLme <- function(fit){ |
|
|
168 |
#random slopes |
|
|
169 |
|
|
|
170 |
if(class(fit)=='lm'){ |
|
|
171 |
beta.hat <- rep(fit$coefficients[2],length(unique(fit$model$time))) |
|
|
172 |
return(beta.hat) |
|
|
173 |
|
|
|
174 |
}else if(class(fit)=='lme'){ |
|
|
175 |
u <- unlist(fit$coefficients$random$all) |
|
|
176 |
beta.hat <- fit$coefficients$fixed[2] |
|
|
177 |
Zt <- fit$data$Zt[!duplicated(fit$data$Zt),]>0 |
|
|
178 |
deriv.all <- beta.hat + rowSums(Zt%*%t(u)) |
|
|
179 |
return(deriv.all) |
|
|
180 |
} |
|
|
181 |
} |
|
|
182 |
|
|
|
183 |
#penalized cubic |
|
|
184 |
|
|
|
185 |
derivLmeCubic <- function(fit){ |
|
|
186 |
#random slopes |
|
|
187 |
if(class(fit)=='lm'){ |
|
|
188 |
beta.hat <- rep(fit$coefficients[2],length(unique(fit$model$time))) |
|
|
189 |
return(beta.hat) |
|
|
190 |
|
|
|
191 |
}else if(class(fit)=='lme'){ |
|
|
192 |
u <- unlist(fit$coefficients$random$all) |
|
|
193 |
beta.hat <- fit$coefficients$fixed[2] |
|
|
194 |
PZ <- fit$data$Zt[!duplicated(fit$data$Zt),] |
|
|
195 |
PZ <-PZ^(1/3) |
|
|
196 |
deriv.all <- beta.hat + rowSums((PZ*PZ)%*%(t(u)*3)) |
|
|
197 |
return(deriv.all) |
|
|
198 |
} |
|
|
199 |
|
|
|
200 |
} |
|
|
201 |
|
|
|
202 |
if(missing(knots)) |
|
|
203 |
knots <-NULL |
|
|
204 |
nMolecules <- NULL |
|
|
205 |
nMolecules <- ncol(data) |
|
|
206 |
|
|
|
207 |
|
|
|
208 |
lme <- nlme::lme |
|
|
209 |
cl <- makeCluster(num.Cores,"SOCK") |
|
|
210 |
clusterExport(cl, list('data','lm','try','class','unique','anova','drop.levels','pdDiag','pdIdent','time','sampleID','melt','dcast','predict','derivLme','knots','derivLmeCubic','lme','keepModels','basis','data','other.reshape'),envir=environment()) |
|
|
211 |
|
|
|
212 |
models <-list() |
|
|
213 |
|
|
|
214 |
|
|
|
215 |
new.data <- parLapply(cl,1:nMolecules,fun = function(i){ |
|
|
216 |
# new.data <- list() |
|
|
217 |
# for(i in 1:nMolecules){ |
|
|
218 |
|
|
|
219 |
expr <- data[,i] |
|
|
220 |
|
|
|
221 |
dataM <- as.data.frame(other.reshape(Rep=sampleID,Time=time,Data=unlist(expr))) |
|
|
222 |
dataM$all = rep(1, nrow(dataM)) |
|
|
223 |
dataM$time = as.numeric(as.character(dataM$Time)) |
|
|
224 |
dataM$Expr = as.numeric(as.character(dataM$Expr)) |
|
|
225 |
|
|
|
226 |
|
|
|
227 |
#### CUBIC SPLINE BASIS #### |
|
|
228 |
if(basis=="cubic"){ |
|
|
229 |
dataM$Zt <- lmeSplines::smspline(~ time, data=dataM) |
|
|
230 |
knots <- sort(unique(time))[2:(length(unique(time))-1)] |
|
|
231 |
} |
|
|
232 |
#### PENALIZED SPLINE BASIS##### |
|
|
233 |
if(basis%in%c("p-spline","cubic p-spline")){ |
|
|
234 |
|
|
|
235 |
if(is.null(knots)){ |
|
|
236 |
K <- max(6,min(floor(length(unique(dataM$time))/4),40)) |
|
|
237 |
}else{ |
|
|
238 |
K <- max(knots,6) |
|
|
239 |
} |
|
|
240 |
knots <- quantile(unique(dataM$time),seq(0,1,length=K+2))[-c(1,K+2)] |
|
|
241 |
if(min(knots)<=min(dataM$time) | max(knots)>=max(dataM$time)) |
|
|
242 |
stop(cat('Make sure the knots are within the time range',range(dataM$time)[1],'to',range(dataM$time)[2])) |
|
|
243 |
PZ <- outer(dataM$time,knots,"-") |
|
|
244 |
if(basis=="cubic p-spline") |
|
|
245 |
PZ <- PZ^3 |
|
|
246 |
PZ <- PZ *(PZ>0) |
|
|
247 |
dataM$Zt <- PZ |
|
|
248 |
|
|
|
249 |
} |
|
|
250 |
|
|
|
251 |
|
|
|
252 |
|
|
|
253 |
if(deri){ |
|
|
254 |
pred.spline = rep(NA,length(timePredict)) |
|
|
255 |
}else{ |
|
|
256 |
pred.spline =rep(NA,length(timePredict)) |
|
|
257 |
pred.df <- data.frame(all=rep(1,length(timePredict)), time=timePredict) |
|
|
258 |
pred.df$Zt = lmeSplines::approx.Z(dataM$Zt, dataM$time, timePredict) |
|
|
259 |
|
|
|
260 |
} |
|
|
261 |
|
|
|
262 |
|
|
|
263 |
|
|
|
264 |
#library(nlme) |
|
|
265 |
fit0 <- NULL |
|
|
266 |
fit0 <- try(lm(Expr ~ time, data=dataM )) |
|
|
267 |
if(class(fit0) == 'try-error') { |
|
|
268 |
models <- list() |
|
|
269 |
error <- i |
|
|
270 |
pred.spline <- rep(NA,length(timePredict)) |
|
|
271 |
fits <- NA |
|
|
272 |
}else{ |
|
|
273 |
fit1 <- NULL |
|
|
274 |
fit1 <- try(lme(Expr ~ time, data=dataM, random=list(all=pdIdent(~Zt - 1)), |
|
|
275 |
na.action=na.exclude, control=lmeControl(opt = "optim"))) |
|
|
276 |
pvalue <-1 |
|
|
277 |
if(class(fit1) != 'try-error') { |
|
|
278 |
|
|
|
279 |
pvalue <- anova(fit1, fit0)$'p-value'[2] |
|
|
280 |
|
|
|
281 |
} |
|
|
282 |
|
|
|
283 |
if(pvalue <= 0.05){ |
|
|
284 |
|
|
|
285 |
fit2 <- NULL |
|
|
286 |
fit2 <- try(lme(Expr ~ time, data=dataM, |
|
|
287 |
random=list(all=pdIdent(~Zt - 1), Rep=pdIdent(~1)), |
|
|
288 |
na.action=na.exclude, control=lmeControl(opt = "optim"))) |
|
|
289 |
|
|
|
290 |
if(class(fit2) != 'try-error') { # to prevent errors stopping the loop |
|
|
291 |
|
|
|
292 |
pvalue = anova(fit1, fit2)$'p-value'[2] |
|
|
293 |
}else{ |
|
|
294 |
pvalue=1 |
|
|
295 |
} |
|
|
296 |
|
|
|
297 |
if(pvalue <= 0.05){ |
|
|
298 |
fit3 <-NULL |
|
|
299 |
fit3 <- try(lme(Expr ~ time, data=dataM, |
|
|
300 |
random=list(all=pdIdent(~Zt - 1), Rep=pdDiag(~time)), |
|
|
301 |
na.action=na.exclude, control=lmeControl(opt = "optim"))) |
|
|
302 |
|
|
|
303 |
if(class(fit3) != 'try-error') { # to prevent errors stopping the loop |
|
|
304 |
pvalue = anova(fit2, fit3)$'p-value'[2] |
|
|
305 |
|
|
|
306 |
}else{ |
|
|
307 |
pvalue=1 |
|
|
308 |
} |
|
|
309 |
if(pvalue <= 0.05){ |
|
|
310 |
fits <- 3 |
|
|
311 |
models<- fit3 |
|
|
312 |
if(deri){ |
|
|
313 |
if(basis=='p-spline') |
|
|
314 |
pred.spline = derivLme(fit3) |
|
|
315 |
if(basis=='cubic p-spline') |
|
|
316 |
pred.spline = derivLmeCubic(fit3) |
|
|
317 |
|
|
|
318 |
}else{ |
|
|
319 |
pred.spline = predict(fit3, newdata=pred.df, level=1, na.action=na.exclude) |
|
|
320 |
} |
|
|
321 |
}else{ # choose simpler model: fit2 |
|
|
322 |
fits <- 2 |
|
|
323 |
models <- fit2 |
|
|
324 |
if(deri){ |
|
|
325 |
if(basis=='p-spline') |
|
|
326 |
pred.spline = derivLme(fit2) |
|
|
327 |
if(basis=='cubic p-spline') |
|
|
328 |
pred.spline = derivLmeCubic(fit2) |
|
|
329 |
}else{ |
|
|
330 |
pred.spline = predict(fit2, newdata=pred.df, level=1, na.action=na.exclude) |
|
|
331 |
} |
|
|
332 |
} |
|
|
333 |
|
|
|
334 |
}else{ |
|
|
335 |
models <- fit1 |
|
|
336 |
fits <- 1 |
|
|
337 |
if(deri){ |
|
|
338 |
if(basis=='p-spline') |
|
|
339 |
pred.spline = derivLme(fit1) |
|
|
340 |
if(basis=='cubic p-spline') |
|
|
341 |
pred.spline = derivLmeCubic(fit1) |
|
|
342 |
}else{ |
|
|
343 |
pred.spline = predict(fit1, newdata=pred.df, level=1, na.action=na.exclude) |
|
|
344 |
} |
|
|
345 |
} |
|
|
346 |
}else{ |
|
|
347 |
|
|
|
348 |
models <- fit0 |
|
|
349 |
fits <-0 |
|
|
350 |
if(deri){ |
|
|
351 |
pred.spline = rep(fit0$coefficients[2],length(unique(dataM$time))) |
|
|
352 |
}else{ |
|
|
353 |
|
|
|
354 |
pred.spline = predict(fit0, newdata=pred.df, level=1, na.action=na.exclude) |
|
|
355 |
} |
|
|
356 |
} |
|
|
357 |
} |
|
|
358 |
if(!keepModels) |
|
|
359 |
keepModels <- list() |
|
|
360 |
|
|
|
361 |
return(list(pred.spl=pred.spline,fit=fits,models=models,error=error,knots=knots)) |
|
|
362 |
#new.data[[i]] <- list(pred.spl=pred.spline,fit=fits,models=models,error=error,knots=knots) |
|
|
363 |
|
|
|
364 |
}) |
|
|
365 |
#} |
|
|
366 |
stopCluster(cl) |
|
|
367 |
knots <- sort(unique(as.vector((sapply(new.data,'[[','knots'))))) |
|
|
368 |
pred.spl <- matrix(sapply(new.data,'[[','pred.spl'),nrow=nMolecules,byrow=T) |
|
|
369 |
fits <- unlist(sapply(new.data,'[[','fit')) |
|
|
370 |
error <- unlist(sapply(new.data,'[[','error')) |
|
|
371 |
models <-list() |
|
|
372 |
if(keepModels){ |
|
|
373 |
models <- sapply(new.data,'[[','models') |
|
|
374 |
|
|
|
375 |
if(is.matrix(models)) |
|
|
376 |
models <- sapply(new.data,'[','models') |
|
|
377 |
} |
|
|
378 |
|
|
|
379 |
pred.spl = as.data.frame(pred.spl) |
|
|
380 |
MolNames <- as.character(unlist(colnames(data))) |
|
|
381 |
|
|
|
382 |
if(is.null(MolNames)| sum(is.na(MolNames))>0) |
|
|
383 |
MolNames <- 1:nrow(pred.spl) |
|
|
384 |
if(nrow(pred.spl)==length(MolNames)) |
|
|
385 |
rownames(pred.spl)<-MolNames |
|
|
386 |
if(ncol(pred.spl)==length(timePredict)) |
|
|
387 |
colnames(pred.spl) <- timePredict |
|
|
388 |
error2 <- "All features were modelled" |
|
|
389 |
if(length(error)>0){ |
|
|
390 |
warning('The following features could not be fitted ',paste(MolNames[error],' ',sep='\n')) |
|
|
391 |
error2 <- '' |
|
|
392 |
error2 <- MolNames[error] |
|
|
393 |
|
|
|
394 |
} |
|
|
395 |
|
|
|
396 |
l <-new('lmmspline',predSpline=pred.spl,modelsUsed=fits,basis=basis,knots=knots,errorMolecules=error2,models=models, derivative=deri) |
|
|
397 |
return(l) |
|
|
398 |
|
|
|
399 |
} |
|
|
400 |
|
|
|
401 |
|
|
|
402 |
other.reshape <- function(Rep, Time, Data){ |
|
|
403 |
lme.data<-NULL |
|
|
404 |
lme.data <- data.frame(Time=Time,Rep=Rep,as.matrix(Data)) |
|
|
405 |
lme.data$Time = factor(drop.levels(lme.data$Time)) |
|
|
406 |
lme.data$Rep = factor(drop.levels(lme.data$Rep)) |
|
|
407 |
melt.lme.data <-NULL |
|
|
408 |
melt.lme.data <- melt(lme.data) |
|
|
409 |
cast.lme.data <- NULL |
|
|
410 |
cast.lme.data <- dcast(melt.lme.data, variable+ Rep ~ Time) |
|
|
411 |
melt.lme.data2 <- NULL |
|
|
412 |
melt.lme.data2 <- melt(data.frame(cast.lme.data)) |
|
|
413 |
names(melt.lme.data2) <- c("Molecule", "Rep", "Time", "Expr") |
|
|
414 |
melt.lme.data2$Time <- factor(gsub("^X", "", as.character(melt.lme.data2$Time))) |
|
|
415 |
return(as.data.frame(melt.lme.data2)) |
|
|
416 |
} |
|
|
417 |
|
|
|
418 |
|
|
|
419 |
|
|
|
420 |
#' \code{lmms} class a S4 superclass to extend \code{lmmspline} and \code{lmmsde} class. |
|
|
421 |
#' |
|
|
422 |
#' \code{lmms} class is a superclass for classes \code{lmmspline} and \code{lmmsde}. These classes inherit common slots. |
|
|
423 |
#' |
|
|
424 |
#' @slot basis An object of class \code{character} describing the basis used for modelling. |
|
|
425 |
#' @slot knots An object of class \code{numeric}, describing the boundaries of the splines. If not defined or if basis='cubic' knots are automatically estimated using Ruppert 2002 or are the design points when using 'cubic'. |
|
|
426 |
#' @slot errorMolecules Vector of class \code{character}, describing the molecules that could not be modelled. |
|
|
427 |
#' |
|
|
428 |
#' @name lmms-class |
|
|
429 |
#' @rdname lmms-class |
|
|
430 |
#' @exportClass lmms |
|
|
431 |
|
|
|
432 |
|
|
|
433 |
setClass('lmms',slots=c(basis="character", knots="numeric",errorMolecules="character")) |
|
|
434 |
|
|
|
435 |
#' \code{lmmspline} class a S4 class that extends \code{lmms} class. |
|
|
436 |
#' |
|
|
437 |
#' \code{lmmspline} class inherits from class \code{lmms} and extends it with three further slots: \code{predSpline}, \code{modelsUsed}, \code{models}. The class is returned when applying \code{\link{lmmSpline}} method. |
|
|
438 |
#' |
|
|
439 |
#' @slot predSpline A \code{data.frame} returning the fitted values for the time points of interest. |
|
|
440 |
#' @slot models A \code{list} of class \code{\link{lm}} or \code{\link{lme}} containing the models for every molecule |
|
|
441 |
#' @slot modelsUsed A \code{list} of class \code{lm} or \code{lme}, containing the models used to model the particular feature of interest. |
|
|
442 |
#' @slot derivative A \code{logical} value indicating if the derivative was calculated. |
|
|
443 |
#' |
|
|
444 |
#' |
|
|
445 |
#' @name lmmspline-class |
|
|
446 |
#' @rdname lmmspline-class |
|
|
447 |
#' @exportClass lmmspline |
|
|
448 |
|
|
|
449 |
setClass("lmmspline",slots= c(predSpline="data.frame", modelsUsed="numeric",models="list",derivative='logical'),contains='lmms') |
|
|
450 |
|
|
|
451 |
|
|
|
452 |
#' Predicts fitted values of an \code{lmmspline} Object |
|
|
453 |
#' |
|
|
454 |
#' Predicts the fitted values of an \code{lmmspline} object for time points of interest. |
|
|
455 |
#' |
|
|
456 |
#' @importFrom parallel parLapply |
|
|
457 |
#' @importFrom lmeSplines approx.Z |
|
|
458 |
#' @param object an object inheriting from class \code{lmmspline}. |
|
|
459 |
#' @param timePredict an optional \code{numeric} vector. Vector of time points to predict fitted values. If \code{missing} uses design points. |
|
|
460 |
#' @param numCores alternative \code{numeric} value indicating the number of CPU cores to be used for parallelization. By default estimated automatically. |
|
|
461 |
#' @param ... ignored. |
|
|
462 |
#' @return \code{matrix} containing predicted values for the requested time points from argument \code{timePredict}. |
|
|
463 |
#' @examples |
|
|
464 |
#' \dontrun{ |
|
|
465 |
#' data(kidneySimTimeGroup) |
|
|
466 |
#' G1 <- which(kidneySimTimeGroup$group=="G1") |
|
|
467 |
#' testLMMSpline<- lmmSpline(data=kidneySimTimeGroup$data[G1,], |
|
|
468 |
#' time=kidneySimTimeGroup$time[G1], |
|
|
469 |
#' sampleID=kidneySimTimeGroup$sampleID[G1],keepModels=T) |
|
|
470 |
#' mat.predict <- predict(testLMMSpline, timePredict=c(seq(1,4, by=0.5)))} |
|
|
471 |
|
|
|
472 |
#' @export |
|
|
473 |
predict.lmmspline<- function(object, timePredict, numCores, ...){ |
|
|
474 |
|
|
|
475 |
if(missing(timePredict)){ |
|
|
476 |
return(object@pred.spline) |
|
|
477 |
}else{ |
|
|
478 |
|
|
|
479 |
|
|
|
480 |
|
|
|
481 |
models <- object@models |
|
|
482 |
|
|
|
483 |
if(length(models)==0) |
|
|
484 |
stop('You will need to keep the models to predict time points.') |
|
|
485 |
cl <-sapply(models,class) |
|
|
486 |
i <- which(cl=="lme")[1] |
|
|
487 |
if(length(i)>0){ |
|
|
488 |
lme.model <- models[[i]] |
|
|
489 |
t <- na.omit(lme.model$data$time) |
|
|
490 |
|
|
|
491 |
pred.spline <- rep(NA,length(timePredict)) |
|
|
492 |
pred.df <- data.frame(all=rep(1,length(timePredict)), time=timePredict) |
|
|
493 |
pred.df$Zt = approx.Z(lme.model$data$Zt, lme.model$data$time, timePredict) |
|
|
494 |
|
|
|
495 |
}else{ |
|
|
496 |
lme.model <- models[[i]] |
|
|
497 |
i <- which(cl=="lm")[1] |
|
|
498 |
t <- na.omit(lme.model$model$time) |
|
|
499 |
pred.df <- data.frame(x=timePredict) |
|
|
500 |
} |
|
|
501 |
if(min(timePredict)<min(t) | max(timePredict)>max(t)) |
|
|
502 |
stop(cat('Can only predict values within the time range',range(t)[1],'to',range(t)[2])) |
|
|
503 |
|
|
|
504 |
|
|
|
505 |
if(missing(numCores)){ |
|
|
506 |
num.Cores <- detectCores() |
|
|
507 |
}else{ |
|
|
508 |
num.Cores <- detectCores() |
|
|
509 |
if(num.Cores<numCores){ |
|
|
510 |
warning(paste('The number of cores is bigger than the number of detected cores. Using the number of detected cores',num.Cores,'instead.')) |
|
|
511 |
}else{ |
|
|
512 |
num.Cores <- numCores |
|
|
513 |
} |
|
|
514 |
|
|
|
515 |
} |
|
|
516 |
lme <- nlme::lme |
|
|
517 |
cl <- makeCluster(num.Cores,"SOCK") |
|
|
518 |
clusterExport(cl, list('models','pred.spline','pred.df','predict'),envir=environment()) |
|
|
519 |
|
|
|
520 |
new.data <- parLapply(cl,1:length(models),fun = function(i){ |
|
|
521 |
# library(nlme) |
|
|
522 |
cl <- class(models[[i]]) |
|
|
523 |
pred.spline <- switch(cl, |
|
|
524 |
lm=predict.lm(models[[i]], newdata=pred.df, level=1, na.action=na.exclude), |
|
|
525 |
lme=predict(models[[i]], newdata=pred.df, level=1, na.action=na.exclude) |
|
|
526 |
) |
|
|
527 |
return(pred.spline) |
|
|
528 |
|
|
|
529 |
}) |
|
|
530 |
|
|
|
531 |
stopCluster(cl) |
|
|
532 |
pred.spl <- matrix(unlist(new.data),nrow=length(models),ncol=length(timePredict),byrow=T) |
|
|
533 |
return(pred.spl)} |
|
|
534 |
} |