|
a |
|
b/partyMod/man/mob.Rd |
|
|
1 |
\name{mob} |
|
|
2 |
\encoding{latin1} |
|
|
3 |
|
|
|
4 |
\alias{mob} |
|
|
5 |
\alias{mob-class} |
|
|
6 |
\alias{coef.mob} |
|
|
7 |
\alias{deviance.mob} |
|
|
8 |
\alias{fitted.mob} |
|
|
9 |
\alias{logLik.mob} |
|
|
10 |
\alias{predict.mob} |
|
|
11 |
\alias{print.mob} |
|
|
12 |
\alias{residuals.mob} |
|
|
13 |
\alias{sctest.mob} |
|
|
14 |
\alias{summary.mob} |
|
|
15 |
\alias{weights.mob} |
|
|
16 |
|
|
|
17 |
\title{Model-based Recursive Partitioning} |
|
|
18 |
|
|
|
19 |
\description{ |
|
|
20 |
MOB is an algorithm for model-based recursive partitioning yielding |
|
|
21 |
a tree with fitted models associated with each terminal node. |
|
|
22 |
} |
|
|
23 |
|
|
|
24 |
\usage{ |
|
|
25 |
mob(formula, weights, data = list(), na.action = na.omit, model = glinearModel, |
|
|
26 |
control = mob_control(), \dots) |
|
|
27 |
|
|
|
28 |
\method{predict}{mob}(object, newdata = NULL, type = c("response", "node"), \dots) |
|
|
29 |
\method{summary}{mob}(object, node = NULL, \dots) |
|
|
30 |
\method{coef}{mob}(object, node = NULL, \dots) |
|
|
31 |
\method{sctest}{mob}(x, node = NULL, \dots) |
|
|
32 |
} |
|
|
33 |
|
|
|
34 |
\arguments{ |
|
|
35 |
\item{formula}{A symbolic description of the model to be fit. This |
|
|
36 |
should be of type \code{y ~ x1 + \dots + xk | z1 + \dots + zl} where |
|
|
37 |
the variables before the \code{|} are passed to the \code{model} and |
|
|
38 |
the variables after the \code{|} are used for partitioning.} |
|
|
39 |
\item{weights}{An optional vector of weights to be used in the fitting |
|
|
40 |
process. Only non-negative integer valued weights are allowed (default = 1).} |
|
|
41 |
\item{data}{A data frame containing the variables in the model.} |
|
|
42 |
\item{na.action}{A function which indicates what should happen when the data |
|
|
43 |
contain \code{NA}s, defaulting to \code{\link{na.omit}}.} |
|
|
44 |
\item{model}{A model of class \code{"\linkS4class{StatModel}"}. See details |
|
|
45 |
for requirements.} |
|
|
46 |
\item{control}{A list with control parameters as returned by |
|
|
47 |
\code{\link{mob_control}}.} |
|
|
48 |
\item{\dots}{Additional arguments passed to the \code{fit} call for |
|
|
49 |
the \code{model}.} |
|
|
50 |
\item{object, x}{A fitted \code{mob} object.} |
|
|
51 |
\item{newdata}{A data frame with new inputs, by default the learning data |
|
|
52 |
is used.} |
|
|
53 |
\item{type}{A character string specifying whether the response should be |
|
|
54 |
predicted (inherited from the \code{predict} method for the \code{model}) |
|
|
55 |
or the ID of the associated terminal node.} |
|
|
56 |
\item{node}{A vector of node IDs for which the corresponding method should |
|
|
57 |
be applied.} |
|
|
58 |
} |
|
|
59 |
|
|
|
60 |
\details{ |
|
|
61 |
Model-based partitioning fits a model tree using the following algorithm: |
|
|
62 |
\enumerate{ |
|
|
63 |
\item \code{fit} a \code{model} (default: a generalized linear model |
|
|
64 |
\code{"\linkS4class{StatModel}"} with formula \code{y ~ x1 + \dots + xk} |
|
|
65 |
for the observations in the current node. |
|
|
66 |
\item Assess the stability of the model parameters with respect to each |
|
|
67 |
of the partitioning variables \code{z1}, \dots, \code{zl}. If |
|
|
68 |
there is some overall instability, choose the variable \code{z} |
|
|
69 |
associated with the smallest \eqn{p} value for partitioning, otherwise |
|
|
70 |
stop. For performing the parameter instability fluctuation test, |
|
|
71 |
a \code{\link[sandwich]{estfun}} method and a \code{\link{weights}} method is |
|
|
72 |
needed. |
|
|
73 |
\item Search for the locally optimal split in \code{z} by minimizing the |
|
|
74 |
objective function of the \code{model}. Typically, this will be |
|
|
75 |
something like \code{\link{deviance}} or the negative \code{\link{logLik}} |
|
|
76 |
and can be specified in \code{\link{mob_control}}. |
|
|
77 |
\item Re-fit the \code{model} in both children, using \code{\link{reweight}} |
|
|
78 |
and repeat from step 2. |
|
|
79 |
} |
|
|
80 |
|
|
|
81 |
More details on the conceptual design of the algorithm can be found in |
|
|
82 |
Zeileis, Hothorn, Hornik (2008) and some illustrations are provided in |
|
|
83 |
\code{vignette("MOB")}. |
|
|
84 |
|
|
|
85 |
For the fitted MOB tree, several standard methods are inherited if they are |
|
|
86 |
available for fitted \code{model}s, such as \code{print}, \code{predict}, |
|
|
87 |
\code{residuals}, \code{logLik}, \code{deviance}, \code{weights}, \code{coef} and |
|
|
88 |
\code{summary}. By default, the latter four return the result (deviance, weights, |
|
|
89 |
coefficients, summary) for all terminal nodes, but take a \code{node} argument |
|
|
90 |
that can be set to any node ID. The \code{sctest} method extracts the results |
|
|
91 |
of the parameter stability tests (aka structural change tests) for any given |
|
|
92 |
node, by default for all nodes. Some examples are given below. |
|
|
93 |
} |
|
|
94 |
|
|
|
95 |
\value{ |
|
|
96 |
An object of class \code{mob} inheriting from \code{\link{BinaryTree-class}}. |
|
|
97 |
Every node of the tree is additionally associated with a fitted model. |
|
|
98 |
} |
|
|
99 |
|
|
|
100 |
\references{ |
|
|
101 |
|
|
|
102 |
Achim Zeileis, Torsten Hothorn, and Kurt Hornik (2008). Model-Based |
|
|
103 |
Recursive Partitioning. \emph{Journal of Computational and Graphical Statistics}, |
|
|
104 |
\bold{17}(2), 492--514. |
|
|
105 |
|
|
|
106 |
} |
|
|
107 |
|
|
|
108 |
\seealso{\code{\link{plot.mob}}, \code{\link{mob_control}}} |
|
|
109 |
|
|
|
110 |
\examples{ |
|
|
111 |
|
|
|
112 |
set.seed(290875) |
|
|
113 |
|
|
|
114 |
if(require("mlbench")) { |
|
|
115 |
|
|
|
116 |
## recursive partitioning of a linear regression model |
|
|
117 |
## load data |
|
|
118 |
data("BostonHousing", package = "mlbench") |
|
|
119 |
## and transform variables appropriately (for a linear regression) |
|
|
120 |
BostonHousing$lstat <- log(BostonHousing$lstat) |
|
|
121 |
BostonHousing$rm <- BostonHousing$rm^2 |
|
|
122 |
## as well as partitioning variables (for fluctuation testing) |
|
|
123 |
BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, |
|
|
124 |
labels = c("no", "yes")) |
|
|
125 |
BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE) |
|
|
126 |
|
|
|
127 |
## partition the linear regression model medv ~ lstat + rm |
|
|
128 |
## with respect to all remaining variables: |
|
|
129 |
fmBH <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + |
|
|
130 |
dis + rad + tax + crim + b + ptratio, |
|
|
131 |
control = mob_control(minsplit = 40), data = BostonHousing, |
|
|
132 |
model = linearModel) |
|
|
133 |
|
|
|
134 |
## print the resulting tree |
|
|
135 |
fmBH |
|
|
136 |
## or better visualize it |
|
|
137 |
plot(fmBH) |
|
|
138 |
|
|
|
139 |
## extract coefficients in all terminal nodes |
|
|
140 |
coef(fmBH) |
|
|
141 |
## look at full summary, e.g., for node 7 |
|
|
142 |
summary(fmBH, node = 7) |
|
|
143 |
## results of parameter stability tests for that node |
|
|
144 |
sctest(fmBH, node = 7) |
|
|
145 |
## -> no further significant instabilities (at 5\% level) |
|
|
146 |
|
|
|
147 |
## compute mean squared error (on training data) |
|
|
148 |
mean((BostonHousing$medv - fitted(fmBH))^2) |
|
|
149 |
mean(residuals(fmBH)^2) |
|
|
150 |
deviance(fmBH)/sum(weights(fmBH)) |
|
|
151 |
|
|
|
152 |
## evaluate logLik and AIC |
|
|
153 |
logLik(fmBH) |
|
|
154 |
AIC(fmBH) |
|
|
155 |
## (Note that this penalizes estimation of error variances, which |
|
|
156 |
## were treated as nuisance parameters in the fitting process.) |
|
|
157 |
|
|
|
158 |
|
|
|
159 |
## recursive partitioning of a logistic regression model |
|
|
160 |
## load data |
|
|
161 |
data("PimaIndiansDiabetes", package = "mlbench") |
|
|
162 |
## partition logistic regression diabetes ~ glucose |
|
|
163 |
## wth respect to all remaining variables |
|
|
164 |
fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + |
|
|
165 |
insulin + mass + pedigree + age, |
|
|
166 |
data = PimaIndiansDiabetes, model = glinearModel, |
|
|
167 |
family = binomial()) |
|
|
168 |
|
|
|
169 |
## fitted model |
|
|
170 |
coef(fmPID) |
|
|
171 |
plot(fmPID) |
|
|
172 |
plot(fmPID, tp_args = list(cdplot = TRUE)) |
|
|
173 |
} |
|
|
174 |
} |
|
|
175 |
\keyword{tree} |