Switch to unified view

a b/partyMod/tests/RandomForest-regtest.Rout.save
1
2
R Under development (unstable) (2014-06-29 r66051) -- "Unsuffered Consequences"
3
Copyright (C) 2014 The R Foundation for Statistical Computing
4
Platform: x86_64-unknown-linux-gnu (64-bit)
5
6
R is free software and comes with ABSOLUTELY NO WARRANTY.
7
You are welcome to redistribute it under certain conditions.
8
Type 'license()' or 'licence()' for distribution details.
9
10
R is a collaborative project with many contributors.
11
Type 'contributors()' for more information and
12
'citation()' on how to cite R or R packages in publications.
13
14
Type 'demo()' for some demos, 'help()' for on-line help, or
15
'help.start()' for an HTML browser interface to help.
16
Type 'q()' to quit R.
17
18
> 
19
> set.seed(290875)
20
> library("party")
21
Loading required package: grid
22
Loading required package: zoo
23
24
Attaching package: 'zoo'
25
26
The following objects are masked from 'package:base':
27
28
    as.Date, as.Date.numeric
29
30
Loading required package: sandwich
31
Loading required package: strucchange
32
Loading required package: modeltools
33
Loading required package: stats4
34
> if (!require("TH.data"))
35
+     stop("cannot load package TH.data")
36
Loading required package: TH.data
37
> if (!require("coin"))
38
+     stop("cannot load package coin")
39
Loading required package: coin
40
Loading required package: survival
41
Loading required package: splines
42
> 
43
> data("GlaucomaM", package = "TH.data")
44
> rf <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 30))
45
> stopifnot(mean(GlaucomaM$Class != predict(rf)) < 
46
+           mean(GlaucomaM$Class != predict(rf, OOB = TRUE)))
47
> 
48
> data("GBSG2", package = "TH.data")
49
> rfS <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 30))
50
> treeresponse(rfS, newdata = GBSG2[1:2,])
51
$`1`
52
Call: survfit(formula = y ~ 1, weights = weights)
53
54
records   n.max n.start  events  median 0.95LCL 0.95UCL 
55
    143     394     394     237    1528    1306    1675 
56
57
$`2`
58
Call: survfit(formula = y ~ 1, weights = weights)
59
60
records   n.max n.start  events  median 0.95LCL 0.95UCL 
61
    145     380     380     160    2015    1807    2018 
62
63
> 
64
> ### give it a try, at least
65
> varimp(rf, pre1.0_0 = TRUE)
66
           ag            at            as            an            ai 
67
 0.0000000000  0.0004629630  0.0027777778  0.0013888889  0.0032407407 
68
          eag           eat           eas           ean           eai 
69
 0.0000000000  0.0000000000  0.0000000000  0.0027777778 -0.0004629630 
70
         abrg          abrt          abrs          abrn          abri 
71
 0.0000000000  0.0023148148  0.0013888889  0.0018518519  0.0046296296 
72
          hic          mhcg          mhct          mhcs          mhcn 
73
 0.0069444444  0.0000000000  0.0009259259  0.0000000000  0.0009259259 
74
         mhci          phcg          phct          phcs          phcn 
75
 0.0078703704  0.0097222222  0.0000000000  0.0000000000 -0.0004629630 
76
         phci           hvc          vbsg          vbst          vbss 
77
 0.0171296296  0.0018518519  0.0013888889 -0.0004629630  0.0018518519 
78
         vbsn          vbsi          vasg          vast          vass 
79
 0.0000000000  0.0000000000 -0.0023148148  0.0000000000  0.0000000000 
80
         vasn          vasi          vbrg          vbrt          vbrs 
81
 0.0000000000  0.0018518519  0.0000000000  0.0013888889 -0.0004629630 
82
         vbrn          vbri          varg          vart          vars 
83
 0.0032407407  0.0004629630  0.0351851852  0.0000000000  0.0254629630 
84
         varn          vari           mdg           mdt           mds 
85
 0.0138888889  0.0425925926  0.0000000000  0.0000000000 -0.0023148148 
86
          mdn           mdi           tmg           tmt           tms 
87
 0.0032407407  0.0004629630  0.0222222222  0.0009259259  0.0069444444 
88
          tmn           tmi            mr           rnf          mdic 
89
-0.0027777778  0.0273148148  0.0000000000  0.0055555556  0.0074074074 
90
          emd            mv 
91
 0.0000000000 -0.0013888889 
92
> 
93
> P <- proximity(rf)
94
> stopifnot(max(abs(P - t(P))) == 0)
95
> 
96
> P[1:10,1:10]
97
            2         43        25        65         70         16         6
98
2  1.00000000 0.15384615 0.7500000 0.0000000 0.07142857 0.13333333 0.7142857
99
43 0.15384615 1.00000000 0.1818182 0.0000000 0.11111111 0.45454545 0.1111111
100
25 0.75000000 0.18181818 1.0000000 0.1818182 0.11111111 0.14285714 0.8000000
101
65 0.00000000 0.00000000 0.1818182 1.0000000 0.00000000 0.00000000 0.1666667
102
70 0.07142857 0.11111111 0.1111111 0.0000000 1.00000000 0.00000000 0.1428571
103
16 0.13333333 0.45454545 0.1428571 0.0000000 0.00000000 1.00000000 0.0000000
104
6  0.71428571 0.11111111 0.8000000 0.1666667 0.14285714 0.00000000 1.0000000
105
5  0.58823529 0.09090909 0.7692308 0.5000000 0.09090909 0.08333333 0.5000000
106
12 0.44444444 0.00000000 0.5714286 0.5833333 0.07692308 0.06666667 0.3333333
107
63 0.46153846 0.10000000 0.5000000 0.2222222 0.00000000 0.18181818 0.5000000
108
            5         12        63
109
2  0.58823529 0.44444444 0.4615385
110
43 0.09090909 0.00000000 0.1000000
111
25 0.76923077 0.57142857 0.5000000
112
65 0.50000000 0.58333333 0.2222222
113
70 0.09090909 0.07692308 0.0000000
114
16 0.08333333 0.06666667 0.1818182
115
6  0.50000000 0.33333333 0.5000000
116
5  1.00000000 0.76923077 0.5454545
117
12 0.76923077 1.00000000 0.5714286
118
63 0.54545455 0.57142857 1.0000000
119
> 
120
> ### variable importances
121
> a <- cforest(Species ~ ., data = iris,
122
+              control = cforest_unbiased(mtry = 2, ntree = 10))
123
> varimp(a, pre1.0_0 = TRUE)
124
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
125
 0.036363636  0.007272727  0.312727273  0.276363636 
126
> varimp(a, conditional = TRUE)
127
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
128
 0.003636364 -0.003636364  0.167272727  0.194545455 
129
> 
130
> airq <- subset(airquality, complete.cases(airquality))
131
> a <- cforest(Ozone ~ ., data = airq,
132
+              control = cforest_unbiased(mtry = 2, ntree = 10))
133
> varimp(a, pre1.0_0 = TRUE)   
134
   Solar.R       Wind       Temp      Month        Day 
135
139.397699 501.974401 500.220403  28.532700   3.806919 
136
> varimp(a, conditional = TRUE)
137
   Solar.R       Wind       Temp      Month        Day 
138
 93.220640 334.737163 212.686904  14.329278   2.061793 
139
> 
140
> data("mammoexp", package = "TH.data")
141
> a <- cforest(ME ~ ., data = mammoexp, control = cforest_classical(ntree = 10))
142
> varimp(a, pre1.0_0 = TRUE)   
143
      SYMPT          PB        HIST         BSE        DECT 
144
0.027998627 0.021174836 0.018630793 0.002646901 0.005578231 
145
> varimp(a, conditional = TRUE)
146
      SYMPT          PB        HIST         BSE        DECT 
147
0.021408831 0.012420497 0.013407572 0.001282682 0.002857143 
148
> 
149
> stopifnot(all.equal(unique(sapply(a@weights, sum)), nrow(mammoexp)))
150
> 
151
> ### check user-defined weights
152
> nobs <- nrow(GlaucomaM)
153
> i <- rep(0.0, nobs)
154
> i[1:floor(.632 * nobs)] <- 1
155
> folds <- replicate(100, sample(i))
156
> rf2 <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 100), weights = folds)
157
> table(predict(rf), predict(rf2))
158
          
159
           glaucoma normal
160
  glaucoma       89      4
161
  normal          2    101
162
> 
163
> proc.time()
164
   user  system elapsed 
165
  3.132   0.052   3.185