[fbf06f]: / partyMod / man / mob.Rd

Download this file

176 lines (149 with data), 6.6 kB

  1
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
\name{mob}
\encoding{latin1}
\alias{mob}
\alias{mob-class}
\alias{coef.mob}
\alias{deviance.mob}
\alias{fitted.mob}
\alias{logLik.mob}
\alias{predict.mob}
\alias{print.mob}
\alias{residuals.mob}
\alias{sctest.mob}
\alias{summary.mob}
\alias{weights.mob}
\title{Model-based Recursive Partitioning}
\description{
MOB is an algorithm for model-based recursive partitioning yielding
a tree with fitted models associated with each terminal node.
}
\usage{
mob(formula, weights, data = list(), na.action = na.omit, model = glinearModel,
control = mob_control(), \dots)
\method{predict}{mob}(object, newdata = NULL, type = c("response", "node"), \dots)
\method{summary}{mob}(object, node = NULL, \dots)
\method{coef}{mob}(object, node = NULL, \dots)
\method{sctest}{mob}(x, node = NULL, \dots)
}
\arguments{
\item{formula}{A symbolic description of the model to be fit. This
should be of type \code{y ~ x1 + \dots + xk | z1 + \dots + zl} where
the variables before the \code{|} are passed to the \code{model} and
the variables after the \code{|} are used for partitioning.}
\item{weights}{An optional vector of weights to be used in the fitting
process. Only non-negative integer valued weights are allowed (default = 1).}
\item{data}{A data frame containing the variables in the model.}
\item{na.action}{A function which indicates what should happen when the data
contain \code{NA}s, defaulting to \code{\link{na.omit}}.}
\item{model}{A model of class \code{"\linkS4class{StatModel}"}. See details
for requirements.}
\item{control}{A list with control parameters as returned by
\code{\link{mob_control}}.}
\item{\dots}{Additional arguments passed to the \code{fit} call for
the \code{model}.}
\item{object, x}{A fitted \code{mob} object.}
\item{newdata}{A data frame with new inputs, by default the learning data
is used.}
\item{type}{A character string specifying whether the response should be
predicted (inherited from the \code{predict} method for the \code{model})
or the ID of the associated terminal node.}
\item{node}{A vector of node IDs for which the corresponding method should
be applied.}
}
\details{
Model-based partitioning fits a model tree using the following algorithm:
\enumerate{
\item \code{fit} a \code{model} (default: a generalized linear model
\code{"\linkS4class{StatModel}"} with formula \code{y ~ x1 + \dots + xk}
for the observations in the current node.
\item Assess the stability of the model parameters with respect to each
of the partitioning variables \code{z1}, \dots, \code{zl}. If
there is some overall instability, choose the variable \code{z}
associated with the smallest \eqn{p} value for partitioning, otherwise
stop. For performing the parameter instability fluctuation test,
a \code{\link[sandwich]{estfun}} method and a \code{\link{weights}} method is
needed.
\item Search for the locally optimal split in \code{z} by minimizing the
objective function of the \code{model}. Typically, this will be
something like \code{\link{deviance}} or the negative \code{\link{logLik}}
and can be specified in \code{\link{mob_control}}.
\item Re-fit the \code{model} in both children, using \code{\link{reweight}}
and repeat from step 2.
}
More details on the conceptual design of the algorithm can be found in
Zeileis, Hothorn, Hornik (2008) and some illustrations are provided in
\code{vignette("MOB")}.
For the fitted MOB tree, several standard methods are inherited if they are
available for fitted \code{model}s, such as \code{print}, \code{predict},
\code{residuals}, \code{logLik}, \code{deviance}, \code{weights}, \code{coef} and
\code{summary}. By default, the latter four return the result (deviance, weights,
coefficients, summary) for all terminal nodes, but take a \code{node} argument
that can be set to any node ID. The \code{sctest} method extracts the results
of the parameter stability tests (aka structural change tests) for any given
node, by default for all nodes. Some examples are given below.
}
\value{
An object of class \code{mob} inheriting from \code{\link{BinaryTree-class}}.
Every node of the tree is additionally associated with a fitted model.
}
\references{
Achim Zeileis, Torsten Hothorn, and Kurt Hornik (2008). Model-Based
Recursive Partitioning. \emph{Journal of Computational and Graphical Statistics},
\bold{17}(2), 492--514.
}
\seealso{\code{\link{plot.mob}}, \code{\link{mob_control}}}
\examples{
set.seed(290875)
if(require("mlbench")) {
## recursive partitioning of a linear regression model
## load data
data("BostonHousing", package = "mlbench")
## and transform variables appropriately (for a linear regression)
BostonHousing$lstat <- log(BostonHousing$lstat)
BostonHousing$rm <- BostonHousing$rm^2
## as well as partitioning variables (for fluctuation testing)
BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1,
labels = c("no", "yes"))
BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE)
## partition the linear regression model medv ~ lstat + rm
## with respect to all remaining variables:
fmBH <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age +
dis + rad + tax + crim + b + ptratio,
control = mob_control(minsplit = 40), data = BostonHousing,
model = linearModel)
## print the resulting tree
fmBH
## or better visualize it
plot(fmBH)
## extract coefficients in all terminal nodes
coef(fmBH)
## look at full summary, e.g., for node 7
summary(fmBH, node = 7)
## results of parameter stability tests for that node
sctest(fmBH, node = 7)
## -> no further significant instabilities (at 5\% level)
## compute mean squared error (on training data)
mean((BostonHousing$medv - fitted(fmBH))^2)
mean(residuals(fmBH)^2)
deviance(fmBH)/sum(weights(fmBH))
## evaluate logLik and AIC
logLik(fmBH)
AIC(fmBH)
## (Note that this penalizes estimation of error variances, which
## were treated as nuisance parameters in the fitting process.)
## recursive partitioning of a logistic regression model
## load data
data("PimaIndiansDiabetes", package = "mlbench")
## partition logistic regression diabetes ~ glucose
## wth respect to all remaining variables
fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps +
insulin + mass + pedigree + age,
data = PimaIndiansDiabetes, model = glinearModel,
family = binomial())
## fitted model
coef(fmPID)
plot(fmPID)
plot(fmPID, tp_args = list(cdplot = TRUE))
}
}
\keyword{tree}