library("mlbench")
set.seed(2015)
Cassini <- mlbench.cassini(1250)
plot(Cassini)
x<-Cassini$x
dim(x)
## [1] 1250 2
k<-3
system.time(cl <- kmeans(x, centers=k))
## user system elapsed
## 0.002 0.001 0.002
plot(x, col=cl$cluster+1)
points(cl$centers, cex=5, lwd=2)
library("cluster")
system.time(cl <- pam(x, k=k))
## user system elapsed
## 0.107 0.000 0.107
plot(x, col=cl$cluster+1)
points(cl$medoids, cex=5, lwd=2)
system.time(cl <- clara(x, k=k))
## user system elapsed
## 0.003 0.000 0.002
plot(x, col=cl$cluster+1)
points(cl$medoids, cex=5, lwd=2)
system.time(d <- dist(x))
## user system elapsed
## 0.003 0.004 0.007
system.time(hc <- hclust(d)) ### default is complete linkage
## user system elapsed
## 0.054 0.008 0.062
plot(hc, labels = FALSE)
cl <- cutree(hc, k=k)
plot(x, col=cl+1)
system.time(hc <- hclust(d, method="single"))
## user system elapsed
## 0.054 0.004 0.058
plot(hc, labels = FALSE)
cl <- cutree(hc, k=k)
plot(x, col=cl+1)
system.time(hc <- hclust(d, method="average"))
## user system elapsed
## 0.050 0.004 0.054
plot(hc, labels = FALSE)
cl <- cutree(hc, k=k)
plot(x, col=cl+1)
library(kernlab)
This is too slow!
#cl <- specc(x, centers=3)
#plot(x, col=cl+1)
Estimates a Gaussian mixture model using EM and BIC. Tries different k.
library(mclust)
## Package 'mclust' version 5.4.2
## Type 'citation("mclust")' for citing this R package in publications.
system.time(cl <- Mclust(x, G=1:(k+3))) ### try 1 to 6 components
## user system elapsed
## 1.639 0.008 1.648
cl
## 'Mclust' model object: (EEV,5)
##
## Available components:
## [1] "call" "data" "modelName" "n"
## [5] "d" "G" "BIC" "bic"
## [9] "loglik" "df" "hypvol" "parameters"
## [13] "z" "classification" "uncertainty"
plot(x, col=cl$classification+1)
library(dbscan)
uses Eps and MinPts Ester et al. (1996) suggest to use MinPts=4 and select Eps using a sorted k-dist graph
kNNdistplot(x, k = 4)
Find the knee. Points to the right of the knee are considered noise.
system.time(cl <- dbscan(x, eps=.15, minPts=4))
## user system elapsed
## 0.004 0.000 0.004
cl
## DBSCAN clustering for 1250 objects.
## Parameters: eps = 0.15, minPts = 4
## The clustering contains 3 cluster(s) and 0 noise points.
##
## 1 2 3
## 500 500 250
##
## Available fields: cluster, eps, minPts
plot(x, col=cl$cluster+1)
Cassini <- mlbench.cassini(1000)
noise <- cbind(runif(250, -3, 3), runif(250, -3, 3))
x <- rbind(Cassini$x, noise)
x <- x[sample(nrow(x)),]
plot(x)
dim(x)
## [1] 1250 2
k<-3
Cassini <- mlbench.cassini(12500)
plot(Cassini)
x<-Cassini$x
dim(x)
## [1] 12500 2
k<-3
twonorm <- mlbench.twonorm(500, d=3)
plot(twonorm)
x <- twonorm$x
dim(x)
## [1] 500 3
k <- 2
Smiley <- mlbench.smiley()
plot(Smiley)
x <- Smiley$x
dim(x)
## [1] 500 2
k <- 4
Spirals <- mlbench.spirals(500,1,0.05)
plot(Spirals)
x <- Spirals$x
k <- 2