### Different approaches for clustering ### CSE 8331 - Spring 2012 - Michael Hahsler ### create data library("mlbench") set.seed(1234) Cassini <- mlbench.cassini(1000) plot(Cassini) x<-Cassini$x k<-3 ### k-means cl <- kmeans(x, centers=k) plot(x, col=cl$cluster+1) points(cl$centers, cex=5, lwd=2) ### k-medoits library("cluster") cl <- pam(x, k=k) plot(x, col=cl$cluster+1) points(cl$medoids, cex=5, lwd=2) ### hierarchical clustering d<- dist(x) hc <- hclust(d) ### default is complete linkage plot(hc) cl <- cutree(hc, k=k) plot(x, col=cl+1) hc <- hclust(d, method="single") plot(hc) cl <- cutree(hc, k=k) plot(x, col=cl+1) hc <- hclust(d, method="average") plot(hc) cl <- cutree(hc, k=k) plot(x, col=cl+1) ### density based clustering library(fpc) cl <- dbscan(x, eps=.2, MinPts=4) plot(x, col=cl$cluster+1) ### model based clustering (estimates a Gaussian mixture model using EM and BIC) library(mclust) cl <- Mclust(x, G=1:(k+1)) ### try 1 to 6 components plot(x, col=cl$classification+1) ### try other dataset twonorm <- mlbench.twonorm(500, d=2) plot(twonorm) x <- twonorm$x k <- 2 Smiley <- mlbench.smiley() plot(Smiley) x <- Smiley$x k <- 4 Spirals <- mlbench.spirals(500,1,0.05) plot(Spirals) x <- Spirals$x k <- 2