TRACDS CSE 8331 - Spring 2012 - Michael Hahsler

library(rEMM)
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
## Loading required package: igraph
## 
## Attaching package: 'igraph'
## The following objects are masked from 'package:stats':
## 
##     decompose, spectrum
## The following object is masked from 'package:base':
## 
##     union
## 
## Attaching package: 'rEMM'
## The following objects are masked from 'package:igraph':
## 
##     as.igraph, clusters

create a stream

stream <-  synthetic_stream(
    k=5,
    d=2,
    n_subseq=100,
    p_transition=.5,
    p_swap=0,
    n_train=0,
    n_test=1000,
    p_outlier = .1,
    rangeVar = c(0.005, 0.01)
    )

str(stream)
## List of 8
##  $ test            : num [1:1000, 1:2] 0.358 0.242 0.43 0.26 0.399 ...
##  $ train           : logi NA
##  $ sequence_test   : int [1:1000] 1 4 2 2 2 3 3 5 2 2 ...
##  $ sequence_train  : logi NA
##  $ swap_test       : logi NA
##  $ swap_train      : logi NA
##  $ outlier_position: logi [1:1000] FALSE FALSE FALSE FALSE FALSE FALSE ...
##  $ model           :List of 5
##   ..$ k     : num 5
##   ..$ d     : num 2
##   ..$ mu    : num [1:5, 1:2] 0.58 0.36 0.743 0.225 0.113 ...
##   ..$ Sigma :List of 5
##   .. ..$ : num [1:2, 1:2] 0.00695 0.00192 0.00192 0.00601
##   .. ..$ : num [1:2, 1:2] 0.00835 -0.0014 -0.0014 0.00861
##   .. ..$ : num [1:2, 1:2] 0.00587 -0.00578 -0.00578 0.00575
##   .. ..$ : num [1:2, 1:2] 6.61e-03 7.15e-05 7.15e-05 6.50e-03
##   .. ..$ : num [1:2, 1:2] 0.00808 0.0037 0.0037 0.00658
##   ..$ subseq: int [1:100] 1 4 2 2 2 3 3 5 2 2 ...
data <- stream$test
class <- stream$sequence_test
class[stream$outlier] <- "gray"

plot(data, col= class)

choose a suitable threshold (25% quantile of distances of a sample)

emm_threshold <- quantile(dist(data[sample(1:nrow(data), 100)]), .25)

build EMM (TRACDS+tNN)

emm <- EMM(threshold=emm_threshold)
emm <- build(emm, data, verb=TRUE)
## Adding 1000 observations. 
## Added 100 observations -  18 clusters.
## Added 200 observations -  21 clusters.
## Added 300 observations -  26 clusters.
## Added 400 observations -  29 clusters.
## Added 500 observations -  32 clusters.
## Added 600 observations -  36 clusters.
## Added 700 observations -  36 clusters.
## Added 800 observations -  38 clusters.
## Added 900 observations -  39 clusters.
## Added 1000 observations -  40 clusters.
## Done - 40 clusters.
## Update for 1000 assignments 
## Resizing matrix from 10 to 20 
## Processing assignment 100 
## Resizing matrix from 20 to 40 
## Processing assignment 200 
## Processing assignment 300 
## Processing assignment 400 
## Processing assignment 500 
## Processing assignment 600 
## Processing assignment 700 
## Processing assignment 800 
## Processing assignment 900 
## Processing assignment 1000 
## Update done.

show micro-cluster centers

plot(data, col= class)
points(cluster_centers(emm), cex=cluster_counts(emm)/max(cluster_counts(emm))*10)