Implementation of the Cylinder-Bell-Funnel experiment in:
Kreogh and Lin, Clustering of time-series subsequences is meaningless: implications for previous and future research, KAIS, 2005.
set.seed(1234)
pattern1 <- rep(0, 128)
pattern1[31:90] <- (1:60)/6
plot(pattern1, type = "l")
pattern2 <- rev(pattern1)
plot(pattern2, type = "l")
pattern3 <- rep(0, 128)
pattern3[31:90] <- 10
plot(pattern3, type = "l")
patterns <- list(pattern1, pattern2, pattern3)
take <- sample(length(patterns), 90, replace = T)
y <- patterns[take]
y <- t(sapply(y, I))
add noise
y <- y + rnorm(0, sd = 2, n = length(y))
plot(y[1,], type="l")
plot(y[2,], type="l")
plot(y[3,], type="l")
cl <- kmeans(y, centers = 3, nstart = 10)
Look at the centroids (black) and the assigned patterns (grey).
old.par <- par(mar=c(0,2,0,0), mfrow=c(3,1))
for(i in 1:3) {
plot(cl$centers[i,], type="l", ylim = c(-5,20))
for(j in which(cl$cluster==i)) lines(y[j,], col="gray")
lines(cl$centers[i,], lwd=2)
}
par(old.par)
Note: Whole sequence clustering works as expected.
y2 <- as.vector(t(y))
plot(y2[1:1000], type = "l")
window.size <- 128
y.window <- t(sapply(1:(length(y)-window.size),
FUN = function(i)
window(y2, i, i+window.size-1)))
dim(y.window)
## [1] 11392 128
look at some consecutive windows.
plot(y.window[1,], type="l")
for(i in 2:5){ lines(y.window[i,], type="l", col = "grey") }
cl <- kmeans(y.window, centers = 3, nstart = 10)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
plot of the centroids from the paper.
plot(cl$centers[1,], type = "l")
lines(cl$centers[2,], type = "l", col = "red")
lines(cl$centers[3,], type = "l", col = "blue")
Look at the centroids (black) and the assigned patterns (grey).
old.par <- par(mar=c(0,2,0,0), mfrow=c(3,1))
for(i in 1:3) {
plot(cl$centers[i,], type="l", ylim = c(-5,20))
for(j in sample(which(cl$cluster==i), 50)) lines(y.window[j,], col="gray")
lines(cl$centers[i,], lwd=2)
}
par(old.par)
try 5 clusters instead
cl <- kmeans(y.window, centers = 5, nstart = 10)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
## Warning: Quick-TRANSfer stage steps exceeded maximum (= 569600)
old.par <- par(mar=c(0,2,0,0), mfrow=c(5,1))
for(i in 1:5) {
plot(cl$centers[i,], type="l", ylim = c(-5,20))
for(j in sample(which(cl$cluster==i), 50)) lines(y.window[j,], col="gray")
lines(cl$centers[i,], lwd=2)
}
par(old.par)
Conclusion: Subsequence clustering does not work this way. The clustering just discretizes the signal into k evenly spaced positions where a mixture of all three patterns has their peak. We would need to replace the moving window identifying the begining and the end of the patterns that we want to cluster.