Diff of /partyMod/man/mob.Rd [000000] .. [fbf06f]

Switch to unified view

a b/partyMod/man/mob.Rd
1
\name{mob}
2
\encoding{latin1}
3
4
\alias{mob}
5
\alias{mob-class}
6
\alias{coef.mob}
7
\alias{deviance.mob}
8
\alias{fitted.mob}
9
\alias{logLik.mob}
10
\alias{predict.mob}
11
\alias{print.mob}
12
\alias{residuals.mob}
13
\alias{sctest.mob}
14
\alias{summary.mob}
15
\alias{weights.mob}
16
17
\title{Model-based Recursive Partitioning}
18
19
\description{
20
  MOB is an algorithm for model-based recursive partitioning yielding
21
  a tree with fitted models associated with each terminal node.
22
}
23
24
\usage{
25
mob(formula, weights, data = list(), na.action = na.omit, model = glinearModel,
26
  control = mob_control(), \dots)
27
28
\method{predict}{mob}(object, newdata = NULL, type = c("response", "node"), \dots)
29
\method{summary}{mob}(object, node = NULL, \dots)
30
\method{coef}{mob}(object, node = NULL, \dots)
31
\method{sctest}{mob}(x, node = NULL, \dots)
32
}
33
34
\arguments{
35
  \item{formula}{A symbolic description of the model to be fit. This
36
    should be of type \code{y ~ x1 + \dots + xk | z1 + \dots + zl} where
37
    the variables before the \code{|} are passed to the \code{model} and
38
    the variables after the \code{|} are used for partitioning.}
39
  \item{weights}{An optional vector of weights to be used in the fitting
40
    process. Only non-negative integer valued weights are allowed (default = 1).}
41
  \item{data}{A data frame containing the variables in the model.}
42
  \item{na.action}{A function which indicates what should happen when the data
43
    contain \code{NA}s, defaulting to \code{\link{na.omit}}.}
44
  \item{model}{A model of class \code{"\linkS4class{StatModel}"}. See details
45
    for requirements.}
46
  \item{control}{A list with control parameters as returned by
47
    \code{\link{mob_control}}.}
48
  \item{\dots}{Additional arguments passed to the \code{fit} call for
49
    the \code{model}.}
50
  \item{object, x}{A fitted \code{mob} object.}
51
  \item{newdata}{A data frame with new inputs, by default the learning data
52
    is used.}
53
  \item{type}{A character string specifying whether the response should be
54
    predicted (inherited from the \code{predict} method for the \code{model})
55
    or the ID of the associated terminal node.}
56
  \item{node}{A vector of node IDs for which the corresponding method should
57
    be applied.}
58
}
59
60
\details{
61
Model-based partitioning fits a model tree using the following algorithm:
62
  \enumerate{
63
    \item \code{fit} a \code{model} (default: a generalized linear model
64
          \code{"\linkS4class{StatModel}"} with formula \code{y ~ x1 + \dots + xk}
65
      for the observations in the current node.
66
    \item Assess the stability of the model parameters with respect to each
67
          of the partitioning variables \code{z1}, \dots, \code{zl}. If
68
      there is some overall instability, choose the variable \code{z}
69
      associated with the smallest \eqn{p} value for partitioning, otherwise
70
      stop. For performing the parameter instability fluctuation test,
71
      a \code{\link[sandwich]{estfun}} method and a \code{\link{weights}} method is
72
      needed.
73
    \item Search for the locally optimal split in \code{z} by minimizing the
74
          objective function of the \code{model}. Typically, this will be
75
      something like \code{\link{deviance}} or the negative \code{\link{logLik}}
76
      and can be specified in \code{\link{mob_control}}.
77
    \item Re-fit the \code{model} in both children, using \code{\link{reweight}}
78
          and repeat from step 2.
79
  }
80
  
81
More details on the conceptual design of the algorithm can be found in 
82
Zeileis, Hothorn, Hornik (2008) and some illustrations are provided in
83
\code{vignette("MOB")}.  
84
  
85
For the fitted MOB tree, several standard methods are inherited if they are
86
available for fitted \code{model}s, such as \code{print}, \code{predict},
87
\code{residuals}, \code{logLik}, \code{deviance}, \code{weights}, \code{coef} and
88
\code{summary}. By default, the latter four return the result (deviance, weights,
89
coefficients, summary) for all terminal nodes, but take a \code{node} argument
90
that can be set to any node ID. The \code{sctest} method extracts the results
91
of the parameter stability tests (aka structural change tests) for any given
92
node, by default for all nodes. Some examples are given below.
93
}
94
95
\value{
96
  An object of class \code{mob} inheriting from \code{\link{BinaryTree-class}}.
97
  Every node of the tree is additionally associated with a fitted model.
98
}
99
100
\references{ 
101
102
   Achim Zeileis, Torsten Hothorn, and Kurt Hornik (2008). Model-Based
103
   Recursive Partitioning. \emph{Journal of Computational and Graphical Statistics}, 
104
   \bold{17}(2), 492--514.
105
106
}
107
108
\seealso{\code{\link{plot.mob}}, \code{\link{mob_control}}}
109
110
\examples{
111
112
set.seed(290875)
113
114
if(require("mlbench")) {
115
116
## recursive partitioning of a linear regression model
117
## load data
118
data("BostonHousing", package = "mlbench")
119
## and transform variables appropriately (for a linear regression)
120
BostonHousing$lstat <- log(BostonHousing$lstat)
121
BostonHousing$rm <- BostonHousing$rm^2
122
## as well as partitioning variables (for fluctuation testing)
123
BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, 
124
                             labels = c("no", "yes"))
125
BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE)
126
127
## partition the linear regression model medv ~ lstat + rm
128
## with respect to all remaining variables:
129
fmBH <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + 
130
                                dis + rad + tax + crim + b + ptratio,
131
  control = mob_control(minsplit = 40), data = BostonHousing, 
132
  model = linearModel)
133
134
## print the resulting tree
135
fmBH
136
## or better visualize it
137
plot(fmBH)
138
139
## extract coefficients in all terminal nodes
140
coef(fmBH)
141
## look at full summary, e.g., for node 7
142
summary(fmBH, node = 7)
143
## results of parameter stability tests for that node
144
sctest(fmBH, node = 7)
145
## -> no further significant instabilities (at 5\% level)
146
147
## compute mean squared error (on training data)
148
mean((BostonHousing$medv - fitted(fmBH))^2)
149
mean(residuals(fmBH)^2)
150
deviance(fmBH)/sum(weights(fmBH))
151
152
## evaluate logLik and AIC
153
logLik(fmBH)
154
AIC(fmBH)
155
## (Note that this penalizes estimation of error variances, which
156
## were treated as nuisance parameters in the fitting process.)
157
158
159
## recursive partitioning of a logistic regression model
160
## load data
161
data("PimaIndiansDiabetes", package = "mlbench")
162
## partition logistic regression diabetes ~ glucose 
163
## wth respect to all remaining variables
164
fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + 
165
                                  insulin + mass + pedigree + age,
166
  data = PimaIndiansDiabetes, model = glinearModel, 
167
  family = binomial())
168
169
## fitted model
170
coef(fmPID)
171
plot(fmPID)
172
plot(fmPID, tp_args = list(cdplot = TRUE))
173
}
174
}
175
\keyword{tree}