library("mlbench")
set.seed(2015)
Cassini <- mlbench.cassini(1250)
plot(Cassini)
x <- Cassini$x
dim(x)
## [1] 1250 2
k <- 3
system.time(cl <- kmeans(x, centers = k))
## user system elapsed
## 0.003 0.000 0.003
plot(x, col = cl$cluster + 1)
points(cl$centers, cex = 5, lwd = 2)
library("cluster")
system.time(cl <- pam(x, k = k))
## user system elapsed
## 0.099 0.004 0.103
plot(x, col = cl$cluster + 1)
points(cl$medoids, cex = 5, lwd = 2)
system.time(cl <- clara(x, k = k))
## user system elapsed
## 0.002 0.000 0.003
plot(x, col = cl$cluster + 1)
points(cl$medoids, cex = 5, lwd = 2)
system.time(d <- dist(x))
## user system elapsed
## 0.008 0.000 0.008
system.time(hc <- hclust(d)) ### default is complete linkage
## user system elapsed
## 0.054 0.000 0.054
plot(hc, labels = FALSE)
cl <- cutree(hc, k = k)
plot(x, col = cl + 1)
system.time(hc <- hclust(d, method = "single"))
## user system elapsed
## 0.043 0.000 0.042
plot(hc, labels = FALSE)
cl <- cutree(hc, k = k)
plot(x, col = cl + 1)
system.time(hc <- hclust(d, method = "average"))
## user system elapsed
## 0.043 0.008 0.051
plot(hc, labels = FALSE)
cl <- cutree(hc, k = k)
plot(x, col = cl + 1)
library(kernlab)
This is too slow!
#cl <- specc(x, centers=3)
#plot(x, col=cl+1)
Estimates a Gaussian mixture model using EM and BIC. Tries different k.
library(mclust)
## Package 'mclust' version 5.4.7
## Type 'citation("mclust")' for citing this R package in publications.
system.time(cl <- Mclust(x, G = 1:(k + 3))) ### try 1 to 6 components
## user system elapsed
## 1.374 0.000 1.375
cl
## 'Mclust' model object: (EEV,5)
##
## Available components:
## [1] "call" "data" "modelName" "n"
## [5] "d" "G" "BIC" "loglik"
## [9] "df" "bic" "icl" "hypvol"
## [13] "parameters" "z" "classification" "uncertainty"
plot(x, col = cl$classification + 1)
library(dbscan)
uses Eps and MinPts Ester et al. (1996) suggest to use MinPts=4 and select Eps using a sorted k-dist graph
kNNdistplot(x, k = 4)
Find the knee. Points to the right of the knee are considered noise.
system.time(cl <- dbscan(x, eps = .15, minPts = 4))
## user system elapsed
## 0.003 0.000 0.003
cl
## DBSCAN clustering for 1250 objects.
## Parameters: eps = 0.15, minPts = 4
## The clustering contains 3 cluster(s) and 0 noise points.
##
## 1 2 3
## 500 500 250
##
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster + 1)
Cassini <- mlbench.cassini(1000)
noise <- cbind(runif(250,-3, 3), runif(250,-3, 3))
x <- rbind(Cassini$x, noise)
x <- x[sample(nrow(x)), ]
plot(x)
dim(x)
## [1] 1250 2
k <- 3
Cassini <- mlbench.cassini(12500)
plot(Cassini)
x <- Cassini$x
dim(x)
## [1] 12500 2
k <- 3
twonorm <- mlbench.twonorm(500, d = 3)
plot(twonorm)
x <- twonorm$x
dim(x)
## [1] 500 3
k <- 2
Smiley <- mlbench.smiley()
plot(Smiley)
x <- Smiley$x
dim(x)
## [1] 500 2
k <- 4
Spirals <- mlbench.spirals(500, 1, 0.05)
plot(Spirals)
x <- Spirals$x
k <- 2