[fbf06f]: / partyMod / tests / RandomForest-regtest.Rout.save

Download this file

166 lines (151 with data), 7.0 kB

R Under development (unstable) (2014-06-29 r66051) -- "Unsuffered Consequences"
Copyright (C) 2014 The R Foundation for Statistical Computing
Platform: x86_64-unknown-linux-gnu (64-bit)

R is free software and comes with ABSOLUTELY NO WARRANTY.
You are welcome to redistribute it under certain conditions.
Type 'license()' or 'licence()' for distribution details.

R is a collaborative project with many contributors.
Type 'contributors()' for more information and
'citation()' on how to cite R or R packages in publications.

Type 'demo()' for some demos, 'help()' for on-line help, or
'help.start()' for an HTML browser interface to help.
Type 'q()' to quit R.

> 
> set.seed(290875)
> library("party")
Loading required package: grid
Loading required package: zoo

Attaching package: 'zoo'

The following objects are masked from 'package:base':

    as.Date, as.Date.numeric

Loading required package: sandwich
Loading required package: strucchange
Loading required package: modeltools
Loading required package: stats4
> if (!require("TH.data"))
+     stop("cannot load package TH.data")
Loading required package: TH.data
> if (!require("coin"))
+     stop("cannot load package coin")
Loading required package: coin
Loading required package: survival
Loading required package: splines
> 
> data("GlaucomaM", package = "TH.data")
> rf <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 30))
> stopifnot(mean(GlaucomaM$Class != predict(rf)) < 
+           mean(GlaucomaM$Class != predict(rf, OOB = TRUE)))
> 
> data("GBSG2", package = "TH.data")
> rfS <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 30))
> treeresponse(rfS, newdata = GBSG2[1:2,])
$`1`
Call: survfit(formula = y ~ 1, weights = weights)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
    143     394     394     237    1528    1306    1675 

$`2`
Call: survfit(formula = y ~ 1, weights = weights)

records   n.max n.start  events  median 0.95LCL 0.95UCL 
    145     380     380     160    2015    1807    2018 

> 
> ### give it a try, at least
> varimp(rf, pre1.0_0 = TRUE)
           ag            at            as            an            ai 
 0.0000000000  0.0004629630  0.0027777778  0.0013888889  0.0032407407 
          eag           eat           eas           ean           eai 
 0.0000000000  0.0000000000  0.0000000000  0.0027777778 -0.0004629630 
         abrg          abrt          abrs          abrn          abri 
 0.0000000000  0.0023148148  0.0013888889  0.0018518519  0.0046296296 
          hic          mhcg          mhct          mhcs          mhcn 
 0.0069444444  0.0000000000  0.0009259259  0.0000000000  0.0009259259 
         mhci          phcg          phct          phcs          phcn 
 0.0078703704  0.0097222222  0.0000000000  0.0000000000 -0.0004629630 
         phci           hvc          vbsg          vbst          vbss 
 0.0171296296  0.0018518519  0.0013888889 -0.0004629630  0.0018518519 
         vbsn          vbsi          vasg          vast          vass 
 0.0000000000  0.0000000000 -0.0023148148  0.0000000000  0.0000000000 
         vasn          vasi          vbrg          vbrt          vbrs 
 0.0000000000  0.0018518519  0.0000000000  0.0013888889 -0.0004629630 
         vbrn          vbri          varg          vart          vars 
 0.0032407407  0.0004629630  0.0351851852  0.0000000000  0.0254629630 
         varn          vari           mdg           mdt           mds 
 0.0138888889  0.0425925926  0.0000000000  0.0000000000 -0.0023148148 
          mdn           mdi           tmg           tmt           tms 
 0.0032407407  0.0004629630  0.0222222222  0.0009259259  0.0069444444 
          tmn           tmi            mr           rnf          mdic 
-0.0027777778  0.0273148148  0.0000000000  0.0055555556  0.0074074074 
          emd            mv 
 0.0000000000 -0.0013888889 
> 
> P <- proximity(rf)
> stopifnot(max(abs(P - t(P))) == 0)
> 
> P[1:10,1:10]
            2         43        25        65         70         16         6
2  1.00000000 0.15384615 0.7500000 0.0000000 0.07142857 0.13333333 0.7142857
43 0.15384615 1.00000000 0.1818182 0.0000000 0.11111111 0.45454545 0.1111111
25 0.75000000 0.18181818 1.0000000 0.1818182 0.11111111 0.14285714 0.8000000
65 0.00000000 0.00000000 0.1818182 1.0000000 0.00000000 0.00000000 0.1666667
70 0.07142857 0.11111111 0.1111111 0.0000000 1.00000000 0.00000000 0.1428571
16 0.13333333 0.45454545 0.1428571 0.0000000 0.00000000 1.00000000 0.0000000
6  0.71428571 0.11111111 0.8000000 0.1666667 0.14285714 0.00000000 1.0000000
5  0.58823529 0.09090909 0.7692308 0.5000000 0.09090909 0.08333333 0.5000000
12 0.44444444 0.00000000 0.5714286 0.5833333 0.07692308 0.06666667 0.3333333
63 0.46153846 0.10000000 0.5000000 0.2222222 0.00000000 0.18181818 0.5000000
            5         12        63
2  0.58823529 0.44444444 0.4615385
43 0.09090909 0.00000000 0.1000000
25 0.76923077 0.57142857 0.5000000
65 0.50000000 0.58333333 0.2222222
70 0.09090909 0.07692308 0.0000000
16 0.08333333 0.06666667 0.1818182
6  0.50000000 0.33333333 0.5000000
5  1.00000000 0.76923077 0.5454545
12 0.76923077 1.00000000 0.5714286
63 0.54545455 0.57142857 1.0000000
> 
> ### variable importances
> a <- cforest(Species ~ ., data = iris,
+              control = cforest_unbiased(mtry = 2, ntree = 10))
> varimp(a, pre1.0_0 = TRUE)
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
 0.036363636  0.007272727  0.312727273  0.276363636 
> varimp(a, conditional = TRUE)
Sepal.Length  Sepal.Width Petal.Length  Petal.Width 
 0.003636364 -0.003636364  0.167272727  0.194545455 
> 
> airq <- subset(airquality, complete.cases(airquality))
> a <- cforest(Ozone ~ ., data = airq,
+              control = cforest_unbiased(mtry = 2, ntree = 10))
> varimp(a, pre1.0_0 = TRUE)   
   Solar.R       Wind       Temp      Month        Day 
139.397699 501.974401 500.220403  28.532700   3.806919 
> varimp(a, conditional = TRUE)
   Solar.R       Wind       Temp      Month        Day 
 93.220640 334.737163 212.686904  14.329278   2.061793 
> 
> data("mammoexp", package = "TH.data")
> a <- cforest(ME ~ ., data = mammoexp, control = cforest_classical(ntree = 10))
> varimp(a, pre1.0_0 = TRUE)   
      SYMPT          PB        HIST         BSE        DECT 
0.027998627 0.021174836 0.018630793 0.002646901 0.005578231 
> varimp(a, conditional = TRUE)
      SYMPT          PB        HIST         BSE        DECT 
0.021408831 0.012420497 0.013407572 0.001282682 0.002857143 
> 
> stopifnot(all.equal(unique(sapply(a@weights, sum)), nrow(mammoexp)))
> 
> ### check user-defined weights
> nobs <- nrow(GlaucomaM)
> i <- rep(0.0, nobs)
> i[1:floor(.632 * nobs)] <- 1
> folds <- replicate(100, sample(i))
> rf2 <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 100), weights = folds)
> table(predict(rf), predict(rf2))
          
           glaucoma normal
  glaucoma       89      4
  normal          2    101
> 
> proc.time()
   user  system elapsed 
  3.132   0.052   3.185