ABACUS: Mining Arbitrary Shaped Clusters from Large Datasets based on Backbone Identification by Vineet Chaoji, Geng Li, Hilmi Yildirim, Mohammed J. Zaki

Incomplete test implementation for EMIS/CSE 8331 by Michael Hahsler

```
library("dbscan")
library("proxy")
```

```
##
## Attaching package: 'proxy'
```

```
## The following objects are masked from 'package:stats':
##
## as.dist, dist
```

```
## The following object is masked from 'package:base':
##
## as.matrix
```

```
data("Chameleon")
x <- chameleon_ds4
plot(x)
```

`k <- 10`

inital weights

`w <- rep(1L, nrow(x))`

globing radius (removes top 5% for robustness)

```
kNN_d <- kNNdist(x, k)[,k]
r <- mean(kNN_d[kNN_d < quantile(kNN_d, .95)])
# do one round
# make used that in each "round" each point is visited once
visited <- rep(FALSE, nrow(x))
i <- 0L
while(any(!visited)) {
# we should start from the densest region; strategy is not quite
# clear in the paper
j <- sample(which(!visited), 1)
visited[j] <- TRUE
# glob object d <= r
d <- as.vector(dist(x[j,], x))
glob <- setdiff(which(d <= r), j)
# move object for kNN which were not globbed
kNN <- setdiff(order(d, decreasing = FALSE)[1:(k+1L)], j)
if(length(glob)<1) mv <- kNN
else mv <- setdiff(kNN, glob)
if(length(mv) > 0) {
x[j,] <- (x[j,] * w[j] + colSums(x[mv, ] * w[mv] /
d[mv])) / (w[j] + sum(w[mv] / d[mv]))
}
# update weight and remove globbed objects
if(length(glob) > 0) {
w[j] <- sum(w[glob]) + 1L
w <- w[-glob]
kNN_d <- kNN_d[-glob]
x <- x[-glob,]
visited <- visited[-glob]
}
# plot after each 100 points
i <- i + 1L
if(! i %% 100) {
plot(chameleon_ds4, col = "grey", cex = .1,
sub = paste(nrow(x), "points left"))
points(x, cex = w/max(w), pch=19)
}
}
```