This work is licensed under the Creative Commons Attribution 4.0 International License. For questions please contact Michael Hahsler.
library("recommenderlab")
## Loading required package: Matrix
## Loading required package: arules
##
## Attaching package: 'arules'
##
## The following objects are masked from 'package:base':
##
## abbreviate, write
##
## Loading required package: proxy
##
## Attaching package: 'proxy'
##
## The following object is masked from 'package:Matrix':
##
## as.matrix
##
## The following objects are masked from 'package:stats':
##
## as.dist, dist
##
## The following object is masked from 'package:base':
##
## as.matrix
##
## Loading required package: registry
data("Jester5k")
Jester5k
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
getRatingMatrix(Jester5k)[1:10,1:5]
## 10 x 5 sparse Matrix of class "dgCMatrix"
## j1 j2 j3 j4 j5
## u2841 7.91 9.17 5.34 8.16 -8.74
## u15547 -3.20 -3.50 -9.56 -8.74 -6.36
## u15221 -1.70 1.21 1.55 2.77 5.58
## u15573 -7.38 -8.93 -3.88 -7.23 -4.90
## u21505 0.10 4.17 4.90 1.55 5.53
## u15994 0.83 -4.90 0.68 -7.18 0.34
## u238 2.91 4.76 . . .
## u5809 -2.77 -6.31 2.23 . 0.19
## u16636 -3.35 -5.92 -5.63 -3.01 1.70
## u12843 -1.99 -6.89 2.09 -4.42 -4.90
as(Jester5k, "matrix")[1:10, 1:5]
## j1 j2 j3 j4 j5
## u2841 7.91 9.17 5.34 8.16 -8.74
## u15547 -3.20 -3.50 -9.56 -8.74 -6.36
## u15221 -1.70 1.21 1.55 2.77 5.58
## u15573 -7.38 -8.93 -3.88 -7.23 -4.90
## u21505 0.10 4.17 4.90 1.55 5.53
## u15994 0.83 -4.90 0.68 -7.18 0.34
## u238 2.91 4.76 NA NA NA
## u5809 -2.77 -6.31 2.23 NA 0.19
## u16636 -3.35 -5.92 -5.63 -3.01 1.70
## u12843 -1.99 -6.89 2.09 -4.42 -4.90
head(rowCounts(Jester5k))
## u2841 u15547 u15221 u15573 u21505 u15994
## 81 71 100 100 72 100
hist(rowCounts(Jester5k))
hist(colCounts(Jester5k))
Original data
hist(getRatings(Jester5k))
image(Jester5k[1:100,], main = "Raw Ratings")
Note: each user has a personal rating “style” leading to rating bias.
hist(rowMeans(Jester5k))
hist(rowSds(Jester5k))
The goal is to make ratings comparable
Center Data
Jester5k_center <- normalize(Jester5k)
## Note: method with signature 'CsparseMatrix#Matrix#missing#replValue' chosen for function '[<-',
## target signature 'dgCMatrix#ngCMatrix#missing#numeric'.
## "Matrix#nsparseMatrix#missing#replValue" would also be valid
Jester5k_center
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
## Normalized using center on rows.
hist(getRatings(Jester5k_center))
image(Jester5k_center[1:100,], main = "Raw Ratings")
summary(rowMeans(Jester5k_center))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -5.760e-15 -2.807e-16 8.517e-18 1.095e-17 2.881e-16 6.193e-15
summary(rowSds(Jester5k_center))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 0.2369 3.5360 4.3660 4.4190 5.2910 8.9090
Normalize by Z-score
Jester5k_normZ <- normalize(Jester5k, method = "Z-score")
Jester5k_normZ
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
## Normalized using Z-score on rows.
hist(getRatings(Jester5k_normZ))
image(Jester5k_normZ[1:100,], main = "Raw Ratings")
summary(rowMeans(Jester5k_normZ))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## -4.649e-15 -6.468e-17 -5.050e-19 3.435e-18 6.351e-17 1.107e-14
summary(rowSds(Jester5k_normZ))
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
To get the original ratings back, we need to denormalize the ratings again.
denormalize(Jester5k_normZ)
## 5000 x 100 rating matrix of class 'realRatingMatrix' with 362106 ratings.
Jester5k_binary <- binarize(Jester5k_center, minRating = 0)
image(Jester5k_binary[1:100,])
head(rowCounts(Jester5k_binary))
## u2841 u15547 u15221 u15573 u21505 u15994
## 61 36 62 45 38 43