R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("tree-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('tree') > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "cv.tree" > > ### * cv.tree > > flush(stderr()); flush(stdout()) > > ### Name: cv.tree > ### Title: Cross-validation for Choosing Tree Complexity > ### Aliases: cv.tree > ### Keywords: tree > > ### ** Examples > > library(MASS) > data(cpus) > cpus.ltr <- tree(log10(perf) ~ syct + mmin + mmax + cach + + chmin + chmax, data=cpus) > cv.tree(cpus.ltr, , prune.tree) $size [1] 10 8 7 6 5 4 3 2 1 $dev [1] 10.68810 12.37266 12.42372 12.53046 13.79215 13.96116 21.12216 21.12216 [9] 43.44125 $k [1] -Inf 0.6808309 0.7243056 0.8000558 1.1607588 1.4148749 3.7783549 [8] 3.8519002 23.6820624 $method [1] "deviance" attr(,"class") [1] "prune" "tree.sequence" > > > > cleanEx(); ..nameEx <- "misclass.tree" > > ### * misclass.tree > > flush(stderr()); flush(stdout()) > > ### Name: misclass.tree > ### Title: Misclassifications by a Classification Tree > ### Aliases: misclass.tree > ### Keywords: tree > > ### ** Examples > > data(iris) > ir.tr <- tree(Species ~., iris) > misclass.tree(ir.tr) [1] 4 > misclass.tree(ir.tr, detail=TRUE) 1 2 3 6 12 24 25 13 7 14 15 100 0 50 5 1 1 0 2 1 1 0 > > > > cleanEx(); ..nameEx <- "partition.tree" > > ### * partition.tree > > flush(stderr()); flush(stdout()) > > ### Name: partition.tree > ### Title: Plot the Partitions of a simple Tree Model > ### Aliases: partition.tree > ### Keywords: tree hplot > > ### ** Examples > > data(iris) > ir.tr <- tree(Species ~., iris) > ir.tr node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 ) 2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) * 3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 ) 6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259 ) 12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917 0.02083 ) 24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000 0.20000 ) * 25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000 0.00000 ) * 13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333 0.66667 ) * 7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174 0.97826 ) 14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667 0.83333 ) * 15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000 1.00000 ) * > ir.tr1 <- snip.tree(ir.tr, nodes = c(12, 7)) > summary(ir.tr1) Classification tree: snip.tree(tree = ir.tr, nodes = c(12, 7)) Variables actually used in tree construction: [1] "Petal.Length" "Petal.Width" Number of terminal nodes: 4 Residual mean deviance: 0.1849 = 26.99 / 146 Misclassification error rate: 0.02667 = 4 / 150 > par(pty = "s") > plot(iris[, 3],iris[, 4], type="n", + xlab="petal length", ylab="petal width") > text(iris[, 3], iris[, 4], c("s", "c", "v")[iris[, 5]]) > partition.tree(ir.tr1, add = TRUE, cex = 1.5) > > # 1D example > ir.tr <- tree(Petal.Width ~ Petal.Length, iris) > plot(iris[,3], iris[,4], type="n", xlab="Length", ylab="Width") > partition.tree(ir.tr, add = TRUE, cex = 1.5) > > > > graphics::par(get("par.postscript", env = .CheckExEnv)) > cleanEx(); ..nameEx <- "plot.tree.sequence" > > ### * plot.tree.sequence > > flush(stderr()); flush(stdout()) > > ### Name: plot.tree.sequence > ### Title: Plot a Tree Sequence > ### Aliases: plot.tree.sequence > ### Keywords: tree > > ### ** Examples > > library(MASS) > data(cpus) > cpus.ltr <- tree(log(perf) ~ syct + mmin + mmax + cach + chmin + chmax, + data = cpus) > plot(prune.tree(cpus.ltr)) > > > > cleanEx(); ..nameEx <- "predict.tree" > > ### * predict.tree > > flush(stderr()); flush(stdout()) > > ### Name: predict.tree > ### Title: Predictions from a Fitted Tree Object > ### Aliases: predict.tree > ### Keywords: tree > > ### ** Examples > > data(shuttle, package="MASS") > shuttle.tr <- tree(use ~ ., shuttle, subset=1:253, + mindev=1e-6, minsize=2) > shuttle.tr node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 253 345.300 auto ( 0.5731 0.4269 ) 2) vis: no 128 0.000 auto ( 1.0000 0.0000 ) * 3) vis: yes 125 99.410 noauto ( 0.1360 0.8640 ) 6) stability: stab 61 72.190 noauto ( 0.2787 0.7213 ) 12) error: MM,SS 29 39.340 auto ( 0.5862 0.4138 ) 24) magn: Light,Medium,Strong 21 20.450 auto ( 0.8095 0.1905 ) 48) error: MM 9 12.370 auto ( 0.5556 0.4444 ) 96) sign: nn 3 0.000 noauto ( 0.0000 1.0000 ) * 97) sign: pp 6 5.407 auto ( 0.8333 0.1667 ) 194) magn: Light,Medium 4 0.000 auto ( 1.0000 0.0000 ) * 195) magn: Strong 2 2.773 auto ( 0.5000 0.5000 ) 390) wind: head 1 0.000 noauto ( 0.0000 1.0000 ) * 391) wind: tail 1 0.000 auto ( 1.0000 0.0000 ) * 49) error: SS 12 0.000 auto ( 1.0000 0.0000 ) * 25) magn: Out 8 0.000 noauto ( 0.0000 1.0000 ) * 13) error: LX,XL 32 0.000 noauto ( 0.0000 1.0000 ) * 7) stability: xstab 64 0.000 noauto ( 0.0000 1.0000 ) * > shuttle1 <- shuttle[254:256, ] # 3 missing cases > predict(shuttle.tr, shuttle1) auto noauto 254 0 1 255 0 1 256 0 1 > > > > cleanEx(); ..nameEx <- "prune.tree" > > ### * prune.tree > > flush(stderr()); flush(stdout()) > > ### Name: prune.tree > ### Title: Cost-complexity Pruning of Tree Object > ### Aliases: prune.tree prune.misclass > ### Keywords: tree > > ### ** Examples > > library(MASS) > data(fgl) > fgl.tr <- tree(type ~ ., fgl) > plot(print(fgl.tr)) node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 214 645.700 WinNF ( 0.327103 0.355140 0.079439 0.060748 0.042056 0.135514 ) 2) Mg < 2.695 61 159.200 Head ( 0.000000 0.213115 0.000000 0.213115 0.147541 0.426230 ) 4) Na < 13.785 24 40.160 Con ( 0.000000 0.458333 0.000000 0.500000 0.000000 0.041667 ) 8) Al < 1.38 8 6.028 WinNF ( 0.000000 0.875000 0.000000 0.000000 0.000000 0.125000 ) * 9) Al > 1.38 16 17.990 Con ( 0.000000 0.250000 0.000000 0.750000 0.000000 0.000000 ) 18) Fe < 0.085 10 0.000 Con ( 0.000000 0.000000 0.000000 1.000000 0.000000 0.000000 ) * 19) Fe > 0.085 6 7.638 WinNF ( 0.000000 0.666667 0.000000 0.333333 0.000000 0.000000 ) * 5) Na > 13.785 37 63.940 Head ( 0.000000 0.054054 0.000000 0.027027 0.243243 0.675676 ) 10) Ba < 0.2 12 17.320 Tabl ( 0.000000 0.166667 0.000000 0.000000 0.750000 0.083333 ) 20) RI < 1.265 7 0.000 Tabl ( 0.000000 0.000000 0.000000 0.000000 1.000000 0.000000 ) * 21) RI > 1.265 5 10.550 WinNF ( 0.000000 0.400000 0.000000 0.000000 0.400000 0.200000 ) * 11) Ba > 0.2 25 8.397 Head ( 0.000000 0.000000 0.000000 0.040000 0.000000 0.960000 ) * 3) Mg > 2.695 153 319.600 WinF ( 0.457516 0.411765 0.111111 0.000000 0.000000 0.019608 ) 6) Al < 1.42 101 189.000 WinF ( 0.633663 0.227723 0.128713 0.000000 0.000000 0.009901 ) 12) RI < -0.93 14 28.970 Veh ( 0.214286 0.285714 0.500000 0.000000 0.000000 0.000000 ) 24) RI < -1.885 5 6.730 WinF ( 0.600000 0.000000 0.400000 0.000000 0.000000 0.000000 ) * 25) RI > -1.885 9 12.370 Veh ( 0.000000 0.444444 0.555556 0.000000 0.000000 0.000000 ) * 13) RI > -0.93 87 142.200 WinF ( 0.701149 0.218391 0.068966 0.000000 0.000000 0.011494 ) 26) K < 0.29 28 42.500 WinF ( 0.714286 0.071429 0.214286 0.000000 0.000000 0.000000 ) 52) Ca < 9.67 17 22.070 WinF ( 0.647059 0.000000 0.352941 0.000000 0.000000 0.000000 ) * 53) Ca > 9.67 11 10.430 WinF ( 0.818182 0.181818 0.000000 0.000000 0.000000 0.000000 ) * 27) K > 0.29 59 80.310 WinF ( 0.694915 0.288136 0.000000 0.000000 0.000000 0.016949 ) 54) Mg < 3.75 49 49.640 WinF ( 0.836735 0.142857 0.000000 0.000000 0.000000 0.020408 ) 108) Fe < 0.145 38 18.440 WinF ( 0.947368 0.026316 0.000000 0.000000 0.000000 0.026316 ) 216) RI < 1.045 33 0.000 WinF ( 1.000000 0.000000 0.000000 0.000000 0.000000 0.000000 ) * 217) RI > 1.045 5 9.503 WinF ( 0.600000 0.200000 0.000000 0.000000 0.000000 0.200000 ) * 109) Fe > 0.145 11 15.160 WinNF ( 0.454545 0.545455 0.000000 0.000000 0.000000 0.000000 ) 218) Al < 1.17 5 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) * 219) Al > 1.17 6 5.407 WinF ( 0.833333 0.166667 0.000000 0.000000 0.000000 0.000000 ) * 55) Mg > 3.75 10 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) * 7) Al > 1.42 52 80.450 WinNF ( 0.115385 0.769231 0.076923 0.000000 0.000000 0.038462 ) 14) Mg < 3.455 17 29.710 WinNF ( 0.000000 0.647059 0.235294 0.000000 0.000000 0.117647 ) 28) Si < 72.84 8 16.640 Veh ( 0.000000 0.250000 0.500000 0.000000 0.000000 0.250000 ) * 29) Si > 72.84 9 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) * 15) Mg > 3.455 35 32.070 WinNF ( 0.171429 0.828571 0.000000 0.000000 0.000000 0.000000 ) 30) Na < 12.835 7 9.561 WinF ( 0.571429 0.428571 0.000000 0.000000 0.000000 0.000000 ) * 31) Na > 12.835 28 14.410 WinNF ( 0.071429 0.928571 0.000000 0.000000 0.000000 0.000000 ) 62) K < 0.55 6 7.638 WinNF ( 0.333333 0.666667 0.000000 0.000000 0.000000 0.000000 ) * 63) K > 0.55 22 0.000 WinNF ( 0.000000 1.000000 0.000000 0.000000 0.000000 0.000000 ) * > fgl.cv <- cv.tree(fgl.tr,, prune.tree) > for(i in 2:5) fgl.cv$dev <- fgl.cv$dev + + cv.tree(fgl.tr,, prune.tree)$dev > fgl.cv$dev <- fgl.cv$dev/5 > plot(fgl.cv) > > > > cleanEx(); ..nameEx <- "tile.tree" > > ### * tile.tree > > flush(stderr()); flush(stdout()) > > ### Name: tile.tree > ### Title: Add Class Barplots to a Classification Tree Plot > ### Aliases: tile.tree > ### Keywords: tree hplot > > ### ** Examples > > library(MASS) > data(fgl) > fgl.tr <- tree(type ~ ., fgl) > summary(fgl.tr) Classification tree: tree(formula = type ~ ., data = fgl) Number of terminal nodes: 20 Residual mean deviance: 0.6853 = 133 / 194 Misclassification error rate: 0.1542 = 33 / 214 > plot(fgl.tr); text(fgl.tr, all=TRUE, cex=0.5) > fgl.tr1 <- snip.tree(fgl.tr, node=c(108, 31, 26)) > tree.screens() [1] 1 2 > plot(fgl.tr1) > text(fgl.tr1) > tile.tree(fgl.tr1, fgl$type) Warning in data.tree(tree) : retrieving data from fgl > close.screen(all = TRUE) > > > > cleanEx(); ..nameEx <- "tree" > > ### * tree > > flush(stderr()); flush(stdout()) > > ### Name: tree > ### Title: Fit a Classification or Regression Tree > ### Aliases: tree print.tree summary.tree print.summary.tree residuals.tree > ### Keywords: tree > > ### ** Examples > > library(MASS) > data(cpus) > cpus.ltr <- tree(log10(perf) ~ syct+mmin+mmax+cach+chmin+chmax, cpus) > cpus.ltr node), split, n, deviance, yval * denotes terminal node 1) root 209 43.12000 1.753 2) cach < 27 143 11.79000 1.525 4) mmax < 6100 78 3.89400 1.375 8) mmax < 1750 12 0.78430 1.089 * 9) mmax > 1750 66 1.94900 1.427 * 5) mmax > 6100 65 4.04500 1.704 10) syct < 360 58 2.50100 1.756 20) chmin < 5.5 46 1.22600 1.699 * 21) chmin > 5.5 12 0.55070 1.974 * 11) syct > 360 7 0.12910 1.280 * 3) cach > 27 66 7.64300 2.249 6) mmax < 28000 41 2.34100 2.062 12) cach < 96.5 34 1.59200 2.008 24) mmax < 11240 14 0.42460 1.827 * 25) mmax > 11240 20 0.38340 2.135 * 13) cach > 96.5 7 0.17170 2.324 * 7) mmax > 28000 25 1.52300 2.555 14) cach < 56 7 0.06929 2.268 * 15) cach > 56 18 0.65350 2.667 * > summary(cpus.ltr) Regression tree: tree(formula = log10(perf) ~ syct + mmin + mmax + cach + chmin + chmax, data = cpus) Variables actually used in tree construction: [1] "cach" "mmax" "syct" "chmin" Number of terminal nodes: 10 Residual mean deviance: 0.03187 = 6.342 / 199 Distribution of residuals: Min. 1st Qu. Median Mean 3rd Qu. Max. -4.945e-01 -1.191e-01 3.571e-04 2.385e-16 1.141e-01 4.680e-01 > plot(cpus.ltr); text(cpus.ltr) > > data(iris) > ir.tr <- tree(Species ~., iris) > ir.tr node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 150 329.600 setosa ( 0.33333 0.33333 0.33333 ) 2) Petal.Length < 2.45 50 0.000 setosa ( 1.00000 0.00000 0.00000 ) * 3) Petal.Length > 2.45 100 138.600 versicolor ( 0.00000 0.50000 0.50000 ) 6) Petal.Width < 1.75 54 33.320 versicolor ( 0.00000 0.90741 0.09259 ) 12) Petal.Length < 4.95 48 9.721 versicolor ( 0.00000 0.97917 0.02083 ) 24) Sepal.Length < 5.15 5 5.004 versicolor ( 0.00000 0.80000 0.20000 ) * 25) Sepal.Length > 5.15 43 0.000 versicolor ( 0.00000 1.00000 0.00000 ) * 13) Petal.Length > 4.95 6 7.638 virginica ( 0.00000 0.33333 0.66667 ) * 7) Petal.Width > 1.75 46 9.635 virginica ( 0.00000 0.02174 0.97826 ) 14) Petal.Length < 4.95 6 5.407 virginica ( 0.00000 0.16667 0.83333 ) * 15) Petal.Length > 4.95 40 0.000 virginica ( 0.00000 0.00000 1.00000 ) * > summary(ir.tr) Classification tree: tree(formula = Species ~ ., data = iris) Variables actually used in tree construction: [1] "Petal.Length" "Petal.Width" "Sepal.Length" Number of terminal nodes: 6 Residual mean deviance: 0.1253 = 18.05 / 144 Misclassification error rate: 0.02667 = 4 / 150 > ## Don't show: > ## tests of singlenode tree > set.seed(1) > Nvars <- 4; > Nsamples <- 5 > N <- Nvars*Nsamples; > dat <- matrix(sample(1:2, N, c(0.5,0.5), replace=TRUE), Nsamples, Nvars) > dat <- as.data.frame(dat) > for (i in 1:Nvars) { + dat[,i]<- factor(dat[,i], 1:2) + } > names(dat) <- c(paste("X", 1:3, sep=""), "Y") > tr <- tree(Y ~ ., dat) > tr node), split, n, deviance, yval, (yprob) * denotes terminal node 1) root 5 6.73 1 ( 0.6 0.4 ) * > summary(tr) Classification tree: tree(formula = Y ~ ., data = dat) Variables actually used in tree construction: character(0) Number of terminal nodes: 1 Residual mean deviance: 1.683 = 6.73 / 4 Misclassification error rate: 0.4 = 2 / 5 > deviance(tr) [1] 6.730117 > residuals(tr) 1 2 3 4 5 1 0 0 1 0 > ## End Don't show > > > cleanEx(); ..nameEx <- "tree.screens" > > ### * tree.screens > > flush(stderr()); flush(stdout()) > > ### Name: tree.screens > ### Title: Split Screen for Plotting Trees > ### Aliases: tree.screens > ### Keywords: tree hplot > > ### ** Examples > > library(MASS) > data(fgl) > fgl.tr <- tree(type ~ ., fgl) > summary(fgl.tr) Classification tree: tree(formula = type ~ ., data = fgl) Number of terminal nodes: 20 Residual mean deviance: 0.6853 = 133 / 194 Misclassification error rate: 0.1542 = 33 / 214 > plot(fgl.tr); text(fgl.tr, all=TRUE, cex=0.5) > fgl.tr1 <- snip.tree(fgl.tr, node=c(108, 31, 26)) > tree.screens() [1] 1 2 > plot(fgl.tr1) > tile.tree(fgl.tr1, fgl$type) Warning in data.tree(tree) : retrieving data from fgl > close.screen(all = TRUE) > > > > ### *