R : Copyright 2005, The R Foundation for Statistical Computing Version 2.1.1 (2005-06-20), ISBN 3-900051-07-0 R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for a HTML browser interface to help. Type 'q()' to quit R. > ### *
> ### > attach(NULL, name = "CheckExEnv") > assign(".CheckExEnv", as.environment(2), pos = length(search())) # base > ## add some hooks to label plot pages for base and grid graphics > setHook("plot.new", ".newplot.hook") > setHook("persp", ".newplot.hook") > setHook("grid.newpage", ".gridplot.hook") > > assign("cleanEx", + function(env = .GlobalEnv) { + rm(list = ls(envir = env, all.names = TRUE), envir = env) + RNGkind("default", "default") + set.seed(1) + options(warn = 1) + delayedAssign("T", stop("T used instead of TRUE"), + assign.env = .CheckExEnv) + delayedAssign("F", stop("F used instead of FALSE"), + assign.env = .CheckExEnv) + sch <- search() + newitems <- sch[! sch %in% .oldSearch] + for(item in rev(newitems)) + eval(substitute(detach(item), list(item=item))) + missitems <- .oldSearch[! .oldSearch %in% sch] + if(length(missitems)) + warning("items ", paste(missitems, collapse=", "), + " have been removed from the search path") + }, + env = .CheckExEnv) > assign("..nameEx", "__{must remake R-ex/*.R}__", env = .CheckExEnv) # for now > assign("ptime", proc.time(), env = .CheckExEnv) > grDevices::postscript("hopach-Examples.ps") > assign("par.postscript", graphics::par(no.readonly = TRUE), env = .CheckExEnv) > options(contrasts = c(unordered = "contr.treatment", ordered = "contr.poly")) > options(warn = 1) > library('hopach') Loading required package: cluster > > assign(".oldSearch", search(), env = .CheckExEnv) > assign(".oldNS", loadedNamespaces(), env = .CheckExEnv) > cleanEx(); ..nameEx <- "bootplot" > > ### * bootplot > > flush(stderr()); flush(stdout()) > > ### Name: bootplot > ### Title: function to make a barplot of bootstrap estimated cluster > ### membership probabilities > ### Aliases: bootplot > ### Keywords: cluster nonparametric multivariate > > ### ** Examples > > mydata<-rbind(cbind(rnorm(10,0,0.5),rnorm(10,0,0.5),rnorm(10,0,0.5)),cbind(rnorm(15,5,0.5),rnorm(15,5,0.5),rnorm(15,5,0.5))) > dimnames(mydata)<-list(paste("Var",1:25,sep=""),paste("Exp",1:3,sep="")) > mydist<-distancematrix(mydata,d="euclid") > > #hopach clustering > clustresult<-hopach(mydata,dmat=mydist) Searching for main clusters... Level 1 Level 2 Identified 12 main clusters in level 2 with MSS = 0.1322266 Running down without collapsing from Level 2 Level 3 > > #bootstrap > myobj<-boothopach(mydata,clustresult) > > #plots > bootplot(myobj,clustresult,showclusters=FALSE) > bootplot(myobj,clustresult,labels=paste("Sample",LETTERS[1:25],sep=" ")) > > > > cleanEx(); ..nameEx <- "bootstrap" > > ### * bootstrap > > flush(stderr()); flush(stdout()) > > ### Name: boothopach > ### Title: functions to perform non-parametric bootstrap resampling of > ### hopach clustering results > ### Aliases: boothopach bootmedoids > ### Keywords: cluster nonparametric multivariate > > ### ** Examples > > > #25 variables from two groups with 3 observations per variable > mydata<-rbind(cbind(rnorm(10,0,0.5),rnorm(10,0,0.5),rnorm(10,0,0.5)),cbind(rnorm(15,5,0.5),rnorm(15,5,0.5),rnorm(15,5,0.5))) > dimnames(mydata)<-list(paste("Var",1:25,sep=""),paste("Exp",1:3,sep="")) > mydist<-distancematrix(mydata,d="cosangle") #compute the distance matrix. > > #clusters and final tree > clustresult<-hopach(mydata,dmat=mydist) Searching for main clusters... Level 1 Level 2 Level 3 Identified 13 main clusters in level 3 with MSS = 0.3101384 Running down without collapsing from Level 3 Level 4 > > #bootstrap resampling > myobj<-boothopach(mydata,clustresult) > table(apply(myobj,1,sum)) # all 1 1 25 > myobj[clustresult$clust$medoids,] # identity matrix Cluster0 Cluster1 Cluster2 Cluster3 Cluster4 Cluster5 Cluster6 Cluster7 Var23 1 0 0 0 0 0 0 0 Var11 0 1 0 0 0 0 0 0 Var24 0 0 1 0 0 0 0 0 Var25 0 0 0 1 0 0 0 0 Var22 0 0 0 0 1 0 0 0 Var13 0 0 0 0 0 1 0 0 Var20 0 0 0 0 0 0 1 0 Var21 0 0 0 0 0 0 0 1 Var2 0 0 0 0 0 0 0 0 Var5 0 0 0 0 0 0 0 0 Var10 0 0 0 0 0 0 0 0 Var8 0 0 0 0 0 0 0 0 Var3 0 0 0 0 0 0 0 0 Cluster8 Cluster9 Cluster10 Cluster11 Cluster12 Var23 0 0 0 0 0 Var11 0 0 0 0 0 Var24 0 0 0 0 0 Var25 0 0 0 0 0 Var22 0 0 0 0 0 Var13 0 0 0 0 0 Var20 0 0 0 0 0 Var21 0 0 0 0 0 Var2 1 0 0 0 0 Var5 0 1 0 0 0 Var10 0 0 1 0 0 Var8 0 0 0 1 0 Var3 0 0 0 0 1 > > > > cleanEx(); ..nameEx <- "correlationordering" > > ### * correlationordering > > flush(stderr()); flush(stdout()) > > ### Name: correlationordering > ### Title: function to compute empirical correlation between distance in a > ### list and distance by a metric > ### Aliases: correlationordering improveordering > ### Keywords: multivariate cluster > > ### ** Examples > > mydata<-matrix(rnorm(50),nrow=10) > mydist<-distancematrix(mydata,d="euclid") > image(mydist) > correlationordering(mydist) [1] -0.1125598 > neword<-improveordering(mydist,echo=TRUE) Old order: -0.1125598 New order: 0.5048191 > correlationordering(mydist[neword,neword]) [1] 0.5048191 > image(mydist[neword,neword]) > > > > cleanEx(); ..nameEx <- "disscosangle" > > ### * disscosangle > > flush(stderr()); flush(stdout()) > > ### Name: disscosangle > ### Title: Functions to compute pair-wise distances > ### Aliases: disscosangle disseuclid disscor dissabscosangle dissabseuclid > ### dissabscor vdisscosangle vdisseuclid vdisscor vdissabscosangle > ### vdissabseuclid vdissabscor > ### Keywords: multivariate internal > > ### ** Examples > > data<-matrix(rnorm(50),nr=5) > disscosangle(data) [,1] [,2] [,3] [,4] [,5] [1,] 0.000000 0.8961000 1.0776777 1.3176414 1.0332131 [2,] 0.896100 0.0000000 0.9426031 1.2382288 0.7924644 [3,] 1.077678 0.9426031 0.0000000 0.8832926 1.1244381 [4,] 1.317641 1.2382288 0.8832926 0.0000000 1.1337363 [5,] 1.033213 0.7924644 1.1244381 1.1337363 0.0000000 > > > > cleanEx(); ..nameEx <- "distancematrix" > > ### * distancematrix > > flush(stderr()); flush(stdout()) > > ### Name: distancematrix > ### Title: functions to compute pair wise distances between vectors > ### Aliases: distancematrix distancevector dissmatrix dissvector vectmatrix > ### Keywords: multivariate cluster > > ### ** Examples > > mydata<-matrix(rnorm(50),nrow=10) > deuclid<-distancematrix(mydata,d="euclid") > vdeuclid<-dissvector(deuclid) > ddaisy<-daisy(mydata) > vdeuclid [1] 0.9030511 1.1825045 2.4411623 1.3350727 1.1706898 1.2714891 1.4681477 [8] 0.8881252 0.7272679 0.8608753 1.8501919 0.6930096 0.6638780 0.5663379 [15] 1.1607106 0.8239934 0.7000490 1.6076549 1.3955974 0.7702230 0.7604702 [22] 1.2263538 1.0526821 0.6403975 2.1384664 1.7893929 1.3838738 1.4848965 [29] 1.6922662 1.9009769 0.9026475 0.8948789 1.2985060 1.2511306 1.2466693 [36] 0.7577775 1.2420813 1.0514112 0.9808063 0.7731878 0.8091249 0.7663512 [43] 0.7933131 1.0457646 0.7391256 > ddaisy/sqrt(length(mydata[1,])) Dissimilarities : 1 2 3 4 5 6 7 2 0.9030511 3 1.1825045 0.8608753 4 2.4411623 1.8501919 1.6076549 5 1.3350727 0.6930096 1.3955974 2.1384664 6 1.1706898 0.6638780 0.7702230 1.7893929 0.9026475 7 1.2714891 0.5663379 0.7604702 1.3838738 0.8948789 0.7577775 8 1.4681477 1.1607106 1.2263538 1.4848965 1.2985060 1.2420813 0.7731878 9 0.8881252 0.8239934 1.0526821 1.6922662 1.2511306 1.0514112 0.8091249 10 0.7272679 0.7000490 0.6403975 1.9009769 1.2466693 0.9808063 0.7663512 8 9 2 3 4 5 6 7 8 9 0.7933131 10 1.0457646 0.7391256 Metric : euclidean Number of objects : 10 > > d1<-distancematrix(mydata,d="abscosangle") > d2<-distancevector(mydata,mydata[1,],d="abscosangle") > d1[1,] [1] 0.0000000 0.7036935 0.9893119 0.4502015 0.9670227 0.9901421 0.6163663 [8] 0.9354477 0.6806108 0.5256130 > d2 #equal to d1[1,] [1] 0.0000000 0.7036935 0.9893119 0.4502015 0.9670227 0.9901421 0.6163663 [8] 0.9354477 0.6806108 0.5256130 > > d3<-dissvector(d1) > pair<-vectmatrix(5,10) > d1[pair[1],pair[2]] [1] 0.990142 > d3[5] [1] 0.990142 > > > > cleanEx(); ..nameEx <- "dplot" > > ### * dplot > > flush(stderr()); flush(stdout()) > > ### Name: dplot > ### Title: function to make a pseudo-color image of a distance matrix with > ### the row and column ordering based on HOPACH clustering results. > ### Aliases: dplot > ### Keywords: cluster multivariate > > ### ** Examples > > mydata<-matrix(rnorm(50),nrow=10) > mydist<-distancematrix(mydata,d="euclid") > clustresult<-hopach(mydata,dmat=mydist) Searching for main clusters... Level 1 Identified 6 main clusters in level 1 with MSS = 0.05972382 Running down without collapsing from Level 1 Level 2 > dplot(mydist,clustresult,showclusters=FALSE) > dplot(mydist,clustresult,col=topo.colors(15)) > > > > > cleanEx(); ..nameEx <- "hopach" > > ### * hopach > > flush(stderr()); flush(stdout()) > > ### Name: hopach > ### Title: function to perform HOPACH hierarchical clustering > ### Aliases: hopach > ### Keywords: cluster multivariate > > ### ** Examples > > > #25 variables from two groups with 3 observations per variable > mydata<-rbind(cbind(rnorm(10,0,0.5),rnorm(10,0,0.5),rnorm(10,0,0.5)),cbind(rnorm(15,5,0.5),rnorm(15,5,0.5),rnorm(15,5,0.5))) > dimnames(mydata)<-list(paste("Var",1:25,sep=""),paste("Exp",1:3,sep="")) > mydist<-distancematrix(mydata,d="cosangle") #compute the distance matrix. > > #clusters and final tree > clustresult<-hopach(mydata,dmat=mydist) Searching for main clusters... Level 1 Level 2 Level 3 Identified 13 main clusters in level 3 with MSS = 0.3101384 Running down without collapsing from Level 3 Level 4 > clustresult$clustering$k #number of clusters. [1] 13 > dimnames(mydata)[[1]][clustresult$clustering$medoids] #medoids of clusters. [1] "Var23" "Var11" "Var24" "Var25" "Var22" "Var13" "Var20" "Var21" "Var2" [10] "Var5" "Var10" "Var8" "Var3" > table(clustresult$clustering$labels) #equal to clustresult$clustering$sizes. 110 120 130 140 151 152 160 170 180 190 200 300 400 2 1 2 1 1 4 2 2 1 1 2 4 2 > > #faster, sometimes fewer clusters > greedyresult<-hopach(mydata,clusters="greedy",dmat=mydist) Searching for main clusters... Level 1 Identified 4 main clusters in level 1 with MSS = 0.6294898 Running down without collapsing from Level 1 Level 2 Level 3 Level 4 Level 5 > > #only get the final ordering (no partitioning into clusters) > orderonly<-hopach(mydata,clusters="none",dmat=mydist) Running down without collapsing from Level 1 Level 2 Level 3 Level 4 Level 5 > > #cluster the columns (rather than rows) > colresult<-hopach(t(mydata),dmat=distancematrix(t(mydata),d="euclid")) No strong evidence for clusters in the first level - continuing to split root node anyway. Searching for main clusters... Running down without collapsing from Level 1 > > > > > cleanEx(); ..nameEx <- "hopach.internal" > > ### * hopach.internal > > flush(stderr()); flush(stdout()) > > ### Name: hopach-internal > ### Title: Functions used internally by the hopach package > ### Aliases: collap cutdigits cutzeros digits msscollap msscomplete > ### mssinitlevel mssmulticollap mssnextlevel mssrundown msssplitcluster > ### newnextlevel newsplitcluster nonzeros orderelements paircoll > ### Keywords: internal cluster multivariate > > ### ** Examples > > #These are not user level functions > #See: hopach examples > #? hopach > > > > cleanEx(); ..nameEx <- "labelstomss" > > ### * labelstomss > > flush(stderr()); flush(stdout()) > > ### Name: labelstomss > ### Title: Functions to compute silhouettes and split silhouettes > ### Aliases: labelstomss labelstosil medstosil msscheck silcheck > ### Keywords: cluster multivariate > > ### ** Examples > > > mydata<-rbind(cbind(rnorm(10,0,0.5),rnorm(10,0,0.5),rnorm(10,0,0.5)),cbind(rnorm(15,5,0.5),rnorm(15,5,0.5),rnorm(15,5,0.5))) > mydist<-distancematrix(mydata,d="cosangle") #compute the distance matrix. > > #pam > result1<-pam(mydata,k=2) > result2<-pam(mydata,k=5) > labelstomss(result1$clust,mydist,hierarchical=FALSE) [1] 0.5741288 > labelstomss(result2$clust,mydist,hierarchical=FALSE) [1] 0.5509017 > > #hopach > result3<-hopach(mydata,dmat=mydist) Searching for main clusters... Level 1 Level 2 Level 3 Identified 13 main clusters in level 3 with MSS = 0.3101384 Running down without collapsing from Level 3 Level 4 > labelstomss(result3$clustering$labels,mydist) [1] 0.3101384 > labelstomss(result3$clustering$labels,mydist,within="mean",between="mean") [1] 0.291803 > > > > > cleanEx(); ..nameEx <- "prune" > > ### * prune > > flush(stderr()); flush(stdout()) > > ### Name: prune > ### Title: function to prune a HOPACH tree. > ### Aliases: prune > ### Keywords: cluster multivariate > > ### ** Examples > > mydata<-matrix(rnorm(600),nrow=100) > mydist<-distancematrix(mydata,d="cosangle") > clustresult<-hopach(mydata,dmat=mydist) No strong evidence for clusters in the first level - continuing to split root node anyway. Searching for main clusters... Level 1 Level 2 Level 3 Identified 16 main clusters in level 3 with MSS = 0.1871271 Running down without collapsing from Level 3 Level 4 Level 5 Level 6 Level 7 > level2<-prune(mydata,clustresult,level=2,dmat=mydist,ord="own") Pruning tree to a level above the main clusters... > clustresult$clustering$k [1] 16 > level2$clustering$k [1] 9 > > > > ### *