Implementation of the Cylinder-Bell-Funnel experiment in:

Kreogh and Lin, Clustering of time-series subsequences is meaningless: implications for previous and future research, KAIS, 2005.

set.seed(1234)

Create data

pattern1 <- rep(0, 128)
pattern1[31:90] <- (1:60)/6
plot(pattern1, type = "l")

pattern2 <- rev(pattern1)
plot(pattern2, type = "l")

pattern3 <- rep(0, 128)
pattern3[31:90] <- 10
plot(pattern3, type = "l")

patterns <- list(pattern1, pattern2, pattern3)

take <- sample(length(patterns), 90, replace = T)

y <- patterns[take]
y <- t(sapply(y, I))

add noise

y <- y + rnorm(0, sd = 2, n = length(y))

plot(y[1,], type="l")

plot(y[2,], type="l")

plot(y[3,], type="l")

Whole sequence clustering

cl <- kmeans(y, centers = 3, nstart = 10)

Look at the centroids (black) and the assigned patterns (grey).

old.par <- par(mar=c(0,2,0,0), mfrow=c(3,1))
for(i in 1:3) {
  plot(cl$centers[i,], type="l", ylim = c(-5,20))
  for(j in which(cl$cluster==i)) lines(y[j,], col="gray")
  lines(cl$centers[i,], lwd=2)
}

par(old.par)

Note: Whole sequence clustering works as expected.

Subsequence clustering

y2 <- as.vector(t(y))
plot(y2[1:1000], type = "l")

window.size <- 128
y.window <- t(sapply(1:(length(y)-window.size), 
  FUN = function(i)
  window(y2, i, i+window.size-1)))

dim(y.window)

## [1] 11392   128

look at some consecutive windows.

plot(y.window[1,], type="l")
for(i in 2:5){ lines(y.window[i,], type="l", col = "grey") }

cl <- kmeans(y.window, centers = 3, nstart = 10)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

plot of the centroids from the paper.

plot(cl$centers[1,], type = "l")
lines(cl$centers[2,], type = "l", col = "red")
lines(cl$centers[3,], type = "l", col = "blue")

Look at the centroids (black) and the assigned patterns (grey).

old.par <- par(mar=c(0,2,0,0), mfrow=c(3,1))
for(i in 1:3) {
  plot(cl$centers[i,], type="l", ylim = c(-5,20))
  for(j in sample(which(cl$cluster==i), 50)) lines(y.window[j,], col="gray")
  lines(cl$centers[i,], lwd=2)
}

par(old.par)

try 5 clusters instead

cl <- kmeans(y.window, centers = 5, nstart = 10)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)

old.par <- par(mar=c(0,2,0,0), mfrow=c(5,1))
for(i in 1:5) {
  plot(cl$centers[i,], type="l", ylim = c(-5,20))
  for(j in sample(which(cl$cluster==i), 50)) lines(y.window[j,], col="gray")
  lines(cl$centers[i,], lwd=2)
}

par(old.par)

Conclusion: Subsequence clustering does not work this way. The clustering just discretizes the signal into k evenly spaced positions where a mixture of all three patterns has their peak. We would need to replace the moving window identifying the begining and the end of the patterns that we want to cluster.

EMIS/CSE 8331: Example for Subsequence Clustering

Michael Hahsler

Thu Feb 28 12:44:59 2019

Create data

Whole sequence clustering

Subsequence clustering