Introduction

Compare DBSCAN and other methods on a dataset with two clusters of different density with and without noise.

Dataset 1: Two squares with different density

x <- rbind(
  cbind(x = runif(100, -2,2), y = runif(100, -2,2)),
  cbind(x = runif(200, 2.5,3.5), y = runif(200, -.5,.5))
)

plot(x)

k-means

cl <- kmeans(x, 2)
plot(x, col = cl$cluster)

PAM

library(cluster)
cl <- pam(x, 2)
plot(x, col = cl$cluster)

DBSCAN

library("dbscan")
kNNdistplot(x, 4)

cl <- dbscan(x, eps = .15, minPts = 4)
cl
## DBSCAN clustering for 300 objects.
## Parameters: eps = 0.15, minPts = 4
## The clustering contains 2 cluster(s) and 96 noise points.
## 
##   0   1   2 
##  96   4 200 
## 
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster+1L)

cl <- dbscan(x, eps = .7, minPts = 4)
cl
## DBSCAN clustering for 300 objects.
## Parameters: eps = 0.7, minPts = 4
## The clustering contains 2 cluster(s) and 1 noise points.
## 
##   0   1   2 
##   1  99 200 
## 
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster+1L)

Jarvis-Patrick clustering

cl <- jpclust(x, k = 10, kt = 5)
plot(x, col = cl$cluster+1L)

Dataset 2: Added noise

x <- rbind(x, cbind(x = runif(50, -5, 5), y = runif(50, -5, 5)))
plot(x)

k-means

cl <- kmeans(x, 2)
plot(x, col = cl$cluster)

PAM

library(cluster)
cl <- pam(x, 2)
plot(x, col = cl$cluster)

DBSCAN

library("dbscan")
kNNdistplot(x, 4)

cl <- dbscan(x, eps = .15, minPts = 4)
cl
## DBSCAN clustering for 350 objects.
## Parameters: eps = 0.15, minPts = 4
## The clustering contains 2 cluster(s) and 145 noise points.
## 
##   0   1   2 
## 145   4 201 
## 
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster+1L)

cl <- dbscan(x, eps = .5, minPts = 4)
cl
## DBSCAN clustering for 350 objects.
## Parameters: eps = 0.5, minPts = 4
## The clustering contains 2 cluster(s) and 47 noise points.
## 
##   0   1   2 
##  47 102 201 
## 
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster+1L)

Jarvis-Patrick clustering

cl <- jpclust(x, k = 10, kt = 5)
plot(x, col = cl$cluster+1L)