ABACUS: Mining Arbitrary Shaped Clusters from Large Datasets based on Backbone Identification by Vineet Chaoji, Geng Li, Hilmi Yildirim, Mohammed J. Zaki

Incomplete test implementation for EMIS/CSE 8331 by Michael Hahsler

library("dbscan")
library("proxy")
## 
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix

Use a dataset from the CHAMELEON paper.

data("Chameleon")
x <- chameleon_ds4

plot(x)

ABACUS globbing and object movement

k <- 10

inital weights

w <- rep(1L, nrow(x))

globing radius (removes top 5% for robustness)

kNN_d <- kNNdist(x, k)[,k]
r <- mean(kNN_d[kNN_d < quantile(kNN_d, .95)])

# do one round

# make used that in each "round" each point is visited once
visited <- rep(FALSE, nrow(x))
i <- 0L
while(any(!visited)) {
  # we should start from the densest region; strategy is not quite
  # clear in the paper
  j <- sample(which(!visited), 1)
  visited[j] <- TRUE

  # glob object d <= r
  d <- as.vector(dist(x[j,], x))
  glob <- setdiff(which(d <= r), j)

  # move object for kNN which were not globbed
  kNN <- setdiff(order(d, decreasing = FALSE)[1:(k+1L)], j)

  if(length(glob)<1) mv <- kNN
  else mv <- setdiff(kNN, glob)

  if(length(mv) > 0) {
    x[j,] <- (x[j,] * w[j] + colSums(x[mv, ] * w[mv] /
        d[mv])) / (w[j] + sum(w[mv] / d[mv]))
  }

  # update weight and remove globbed objects
  if(length(glob) > 0) {
    w[j] <- sum(w[glob]) + 1L
    w <- w[-glob]

    kNN_d <- kNN_d[-glob]

    x <- x[-glob,]
    visited <- visited[-glob]
  }

  # plot after each 100 points
  i <- i + 1L
  if(! i %% 100) {
    plot(chameleon_ds4, col = "grey", cex = .1,
      sub = paste(nrow(x), "points left"))
    points(x, cex = w/max(w), pch=19)
  }
}