CC This work is licensed under the Creative Commons Attribution 4.0 International License. For questions please contact Michael Hahsler.

library("recommenderlab")
## Loading required package: Matrix
## Loading required package: arules
## 
## Attaching package: 'arules'
## 
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## 
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## 
## The following object is masked from 'package:Matrix':
## 
##     as.matrix
## 
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## 
## The following object is masked from 'package:base':
## 
##     as.matrix
## 
## Loading required package: registry

Rating Data

data("Jester5k")
Jester5k
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
getRatingMatrix(Jester5k)[1:10,1:5]
## 10 x 5 sparse Matrix of class "dgCMatrix"
##           j1    j2    j3    j4    j5
## u2841   7.91  9.17  5.34  8.16 -8.74
## u15547 -3.20 -3.50 -9.56 -8.74 -6.36
## u15221 -1.70  1.21  1.55  2.77  5.58
## u15573 -7.38 -8.93 -3.88 -7.23 -4.90
## u21505  0.10  4.17  4.90  1.55  5.53
## u15994  0.83 -4.90  0.68 -7.18  0.34
## u238    2.91  4.76  .     .     .   
## u5809  -2.77 -6.31  2.23  .     0.19
## u16636 -3.35 -5.92 -5.63 -3.01  1.70
## u12843 -1.99 -6.89  2.09 -4.42 -4.90
as(Jester5k, "matrix")[1:10, 1:5]
##           j1    j2    j3    j4    j5
## u2841   7.91  9.17  5.34  8.16 -8.74
## u15547 -3.20 -3.50 -9.56 -8.74 -6.36
## u15221 -1.70  1.21  1.55  2.77  5.58
## u15573 -7.38 -8.93 -3.88 -7.23 -4.90
## u21505  0.10  4.17  4.90  1.55  5.53
## u15994  0.83 -4.90  0.68 -7.18  0.34
## u238    2.91  4.76    NA    NA    NA
## u5809  -2.77 -6.31  2.23    NA  0.19
## u16636 -3.35 -5.92 -5.63 -3.01  1.70
## u12843 -1.99 -6.89  2.09 -4.42 -4.90
head(rowCounts(Jester5k))
##  u2841 u15547 u15221 u15573 u21505 u15994 
##     81     71    100    100     72    100
hist(rowCounts(Jester5k))

hist(colCounts(Jester5k))

Original data

hist(getRatings(Jester5k))

image(Jester5k[1:100,], main = "Raw Ratings")

Note: each user has a personal rating “style” leading to rating bias.

hist(rowMeans(Jester5k))

hist(rowSds(Jester5k))

Normalization

The goal is to make ratings comparable

Center Data

Jester5k_center <- normalize(Jester5k)
## Note: method with signature 'CsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
##  target signature 'dgCMatrix#ngCMatrix#missing#numeric'.
##  "Matrix#nsparseMatrix#missing#replValue" would also be valid
Jester5k_center
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
## Normalized using center on rows.
hist(getRatings(Jester5k_center))

image(Jester5k_center[1:100,], main = "Raw Ratings")

summary(rowMeans(Jester5k_center))
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -5.760e-15 -2.807e-16  8.517e-18  1.095e-17  2.881e-16  6.193e-15
summary(rowSds(Jester5k_center))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.2369  3.5360  4.3660  4.4190  5.2910  8.9090

Normalize by Z-score

Jester5k_normZ <- normalize(Jester5k, method = "Z-score")
Jester5k_normZ
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
## Normalized using Z-score on rows.
hist(getRatings(Jester5k_normZ))

image(Jester5k_normZ[1:100,], main = "Raw Ratings")

summary(rowMeans(Jester5k_normZ))
##       Min.    1st Qu.     Median       Mean    3rd Qu.       Max. 
## -4.649e-15 -6.468e-17 -5.050e-19  3.435e-18  6.351e-17  1.107e-14
summary(rowSds(Jester5k_normZ))
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##       1       1       1       1       1       1

To get the original ratings back, we need to denormalize the ratings again.

denormalize(Jester5k_normZ)
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.

Binary/Unary 0-1 Ratings

Jester5k_binary <- binarize(Jester5k_center, minRating = 0)
image(Jester5k_binary[1:100,])

head(rowCounts(Jester5k_binary))
##  u2841 u15547 u15221 u15573 u21505 u15994 
##     61     36     62     45     38     43