a b/partyMod/man/ctree.Rd
1
\name{Conditional Inference Trees}
2
\alias{ctree}
3
\alias{conditionalTree}
4
\title{ Conditional Inference Trees }
5
\description{
6
  Recursive partitioning for continuous, censored, ordered, nominal and
7
  multivariate response variables in a conditional inference framework. 
8
}
9
\usage{
10
ctree(formula, data, subset = NULL, weights = NULL, 
11
      controls = ctree_control(), xtrafo = ptrafo, ytrafo = ptrafo, 
12
      scores = NULL)
13
}
14
\arguments{
15
  \item{formula}{ a symbolic description of the model to be fit. Note
16
                  that symbols like \code{:} and \code{-} will not work
17
                  and the tree will make use of all variables listed on the
18
                  rhs of \code{formula}.}
19
  \item{data}{ a data frame containing the variables in the model. }
20
  \item{subset}{ an optional vector specifying a subset of observations to be
21
                 used in the fitting process.}
22
  \item{weights}{ an optional vector of weights to be used in the fitting
23
                  process. Only non-negative integer valued weights are
24
                  allowed.}
25
  \item{controls}{an object of class \code{\link{TreeControl}}, which can be
26
                  obtained using \code{\link{ctree_control}}.}
27
  \item{xtrafo}{ a function to be applied to all input variables.
28
                By default, the \code{\link{ptrafo}} function is applied.}
29
  \item{ytrafo}{ a function to be applied to all response variables. 
30
                By default, the \code{\link{ptrafo}} function is applied.}
31
  \item{scores}{ an optional named list of scores to be attached to ordered
32
               factors.}
33
}
34
\details{
35
36
  Conditional inference trees estimate a regression relationship by binary recursive
37
  partitioning in a conditional inference framework. Roughly, the algorithm
38
  works as follows: 1) Test the global null hypothesis of independence between
39
  any of the input variables and the response (which may be multivariate as well). 
40
  Stop if this hypothesis cannot be rejected. Otherwise select the input
41
  variable with strongest association to the resonse. This
42
  association is measured by a p-value corresponding to a test for the
43
  partial null hypothesis of a single input variable and the response.
44
  2) Implement a binary split in the selected input variable. 
45
  3) Recursively repeate steps 1) and 2). 
46
47
  The implementation utilizes a unified framework for conditional inference,
48
  or permutation tests, developed by Strasser and Weber (1999). The stop
49
  criterion in step 1) is either based on multiplicity adjusted p-values 
50
  (\code{testtype == "Bonferroni"}
51
  or \code{testtype == "MonteCarlo"} in \code{\link{ctree_control}}),
52
  on the univariate p-values (\code{testtype == "Univariate"}),
53
  or on values of the test statistic
54
  (\code{testtype == "Teststatistic"}). In both cases, the
55
  criterion is maximized, i.e., 1 - p-value is used. A split is implemented 
56
  when the criterion exceeds the value given by \code{mincriterion} as
57
  specified in \code{\link{ctree_control}}. For example, when 
58
  \code{mincriterion = 0.95}, the p-value must be smaller than
59
  $0.05$ in order to split this node. This statistical approach ensures that
60
  the right sized tree is grown and no form of pruning or cross-validation
61
  or whatsoever is needed. The selection of the input variable to split in
62
  is based on the univariate p-values avoiding a variable selection bias
63
  towards input variables with many possible cutpoints.
64
65
  Multiplicity-adjusted Monte-Carlo p-values are computed 
66
  following a "min-p" approach. The univariate 
67
  p-values based on the limiting distribution (chi-square
68
  or normal) are computed for each of the random 
69
  permutations of the data. This means that one should
70
  use a quadratic test statistic when factors are in
71
  play (because the evaluation of the corresponding
72
  multivariate normal distribution is time-consuming).
73
74
  By default, the scores for each ordinal factor \code{x} are
75
  \code{1:length(x)}, this may be changed using \code{scores = list(x =
76
  c(1,5,6))}, for example.
77
78
  Predictions can be computed using \code{\link{predict}} or
79
  \code{\link{treeresponse}}.  The first function accepts arguments
80
  \code{type = c("response", "node", "prob")} where \code{type = "response"}
81
  returns predicted means, predicted classes or median predicted survival
82
  times, \code{type = "node"} returns terminal node IDs (identical to
83
  \code{\link{where}}) and \code{type = "prob"} gives more information about
84
  the conditional distribution of the response, i.e., class probabilities or
85
  predicted Kaplan-Meier curves and is identical to
86
  \code{\link{treeresponse}}.  For observations with zero weights,
87
  predictions are computed from the fitted tree when \code{newdata = NULL}.
88
89
  For a general description of the methodology see Hothorn, Hornik and
90
  Zeileis (2006) and Hothorn, Hornik, van de Wiel and Zeileis (2006). 
91
  Introductions for novices can be found in Strobl et al. (2009) and
92
  at \url{http://github.com/christophM/overview-ctrees.git}.
93
94
}
95
\value{
96
  An object of class \code{\link{BinaryTree-class}}.
97
}
98
\references{ 
99
100
   Helmut Strasser and Christian Weber (1999). On the asymptotic theory of permutation
101
   statistics. \emph{Mathematical Methods of Statistics}, \bold{8}, 220--250.
102
103
   Torsten Hothorn, Kurt Hornik, Mark A. van de Wiel and Achim Zeileis (2006).
104
   A Lego System for Conditional Inference. \emph{The American Statistician},
105
   \bold{60}(3), 257--263.
106
107
   Torsten Hothorn, Kurt Hornik and Achim Zeileis (2006). Unbiased Recursive
108
   Partitioning: A Conditional Inference Framework. \emph{Journal of
109
   Computational and Graphical Statistics}, \bold{15}(3), 651--674. 
110
   Preprint available
111
   from \url{http://statmath.wu-wien.ac.at/~zeileis/papers/Hothorn+Hornik+Zeileis-2006.pdf}
112
113
   Carolin Strobl, James Malley and Gerhard Tutz (2009).
114
   An Introduction to Recursive Partitioning: Rationale, Application, and Characteristics of 
115
   Classification and Regression Trees, Bagging, and Random forests.
116
   \emph{Psychological Methods}, \bold{14}(4), 323--348. 
117
118
}
119
\examples{
120
121
    set.seed(290875)
122
123
    ### regression
124
    airq <- subset(airquality, !is.na(Ozone))
125
    airct <- ctree(Ozone ~ ., data = airq, 
126
                   controls = ctree_control(maxsurrogate = 3))
127
    airct
128
    plot(airct)
129
    mean((airq$Ozone - predict(airct))^2)
130
    ### extract terminal node ID, two ways
131
    all.equal(predict(airct, type = "node"), where(airct))
132
133
    ### classification
134
    irisct <- ctree(Species ~ .,data = iris)
135
    irisct
136
    plot(irisct)
137
    table(predict(irisct), iris$Species)
138
139
    ### estimated class probabilities, a list
140
    tr <- treeresponse(irisct, newdata = iris[1:10,])
141
142
    ### ordinal regression
143
    data("mammoexp", package = "TH.data")
144
    mammoct <- ctree(ME ~ ., data = mammoexp) 
145
    plot(mammoct)
146
147
    ### estimated class probabilities
148
    treeresponse(mammoct, newdata = mammoexp[1:10,])
149
150
    ### survival analysis
151
    if (require("TH.data") && require("survival")) {
152
        data("GBSG2", package = "TH.data")
153
        GBSG2ct <- ctree(Surv(time, cens) ~ .,data = GBSG2)
154
        plot(GBSG2ct)
155
        treeresponse(GBSG2ct, newdata = GBSG2[1:2,])        
156
    }
157
158
    ### if you are interested in the internals:
159
    ### generate doxygen documentation
160
    \dontrun{
161
162
        ### download src package into temp dir
163
        tmpdir <- tempdir()
164
        tgz <- download.packages("party", destdir = tmpdir)[2]
165
        ### extract
166
        untar(tgz, exdir = tmpdir)
167
        wd <- setwd(file.path(tmpdir, "party"))
168
        ### run doxygen (assuming it is there)
169
        system("doxygen inst/doxygen.cfg")
170
        setwd(wd)
171
        ### have fun
172
        browseURL(file.path(tmpdir, "party", "inst", 
173
                            "documentation", "html", "index.html")) 
174
    }
175
}
176
\keyword{tree}