Load an image

library("png")
library("seriation")

p <- readPNG("horses.png")
dim(p)
## [1] 536 750   3
plot(c(0, dim(p)[1]), c(0, dim(p)[2]), type = "n", xlab = "", ylab = "")
rasterImage(p, 0, 0, dim(p)[1], dim(p)[2])

Show the different channels

pimage(p[,,1], prop = TRUE,
  col = rgb(seq(0,1, length.out = 10), 0,0))

pimage(p[,,2], prop = TRUE,
  col = rgb(0,seq(0,1, length.out = 10),0))

pimage(p[,,3], prop = TRUE,
  col = rgb(0,0,seq(0,1, length.out = 10)))

Prepare image

Create a data frame with RGB channels for each pixel

df <- data.frame(
  red = as.vector(p[,,1]),
  green = as.vector(p[,,2]),
  blue = as.vector(p[,,3])
)


head(df)
##         red     green      blue
## 1 0.3686275 0.3803922 0.2705882
## 2 0.3803922 0.3921569 0.2823529
## 3 0.3490196 0.3607843 0.2431373
## 4 0.3921569 0.4000000 0.2901961
## 5 0.3568627 0.3647059 0.2392157
## 6 0.2862745 0.2980392 0.1568627
dim(df)
## [1] 402000      3

Color distribution and correlation

hist(df[,1], main = "red")

hist(df[,2], main = "green")

hist(df[,3], main = "blue")

Green seems to be a bi-modal distribution.

I sample for the scatter plot since there are too many data points.

plot(df[sample(nrow(df), 1000),])

Cluster with \(k\)-means and different \(k\)

cl <- kmeans(df, centers = 2)

convert clusters back into a matrix (with the same layout as the original image) for visualization of the clusters

m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)

cl <- kmeans(df, 3)
m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)

cl <- kmeans(df, 5)
m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)

Horses and the sky are hard to differentiate! But edge detection will be easily able to find the outline of the objects

Cluster with scaled data

df <- scale(df)

cl <- kmeans(df, centers = 2)

convert clusters back into a matrix (with the same layout as the original image) for visualization of the clusters

m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)

cl <- kmeans(df, 3)
m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)

cl <- kmeans(df, 5)
m <- matrix(cl$cluster, nrow= nrow(p))
pimage(m, prop = TRUE)