Several clustering methods have these common components:
These algorithms can be implemented using these building blocks.
library(dbscan)
library(igraph)
##
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
##
## decompose, spectrum
## The following object is masked from 'package:base':
##
## union
remove_noise <- function(x, eps, minPts)
x[!sapply(frNN(x, eps)$id, length)+1L < minPts, ]
nn_reachability <- function(x, eps) frNN(x, eps)
connected <- function(nn_graph)
components(graph_from_adj_list(adjacencylist(nn_graph),
mode = "total"))$membership
library(mlbench)
set.seed(1234)
x <- mlbench.spirals(500,1,0.05)
plot(x)
x <- x$x
cl <- dbscan(x, eps = .1, minPts = 4, borderPoints = FALSE)
cl
## DBSCAN clustering for 500 objects.
## Parameters: eps = 0.1, minPts = 4
## The clustering contains 2 cluster(s) and 16 noise points.
##
## 0 1 2
## 16 240 244
##
## Available fields: cluster, eps, minPts
plot(x, col = cl$cluster + 1L, xlim = c(-1,1), ylim = c(-1,1))
x2 <- remove_noise(x, eps = .1, minPts = 4)
g <- nn_reachability(x2, eps = .1)
comp <- connected(g)
plot(g, x2, xlim = c(-1,1), ylim = c(-1,1))
points(x)
points(x2, col = comp+1L, xlim = c(-1,1), ylim = c(-1,1), pch = 19)
cl <- sNNclust(x, k = 10, eps = 5, minPts = 5)
plot(x, col = cl$cluster + 1L, xlim = c(-1,1), ylim = c(-1,1))
eps <- 5
minPts <- 5
g <- nn_SNN(x, k = 10, kt = eps)
g2 <- structure(list(id = adjacencylist(g), eps = eps), class = "frNN")
cl <- dbscan(g2, minPts = minPts)
plot(g, x,
xlim = c(-1,1), ylim = c(-1,1))
points(x, col = cl$cluster+1L, xlim = c(-1,1), ylim = c(-1,1), pch = 19)