Code examples for the paper:

Mohamed Azmia, George C. Runger, Abdelaziz Berrado. Interpretable regularized class association rules algorithm for classification in a categorical data space, Information Sciences, Vol 483, pp. 313-331, 2019.

set.seed(1234)

Implemented in the development version of arulesCBA

#library("devtools")
#install_github("ianjjohnson/arulesCBA")
library("arulesCBA")
## Loading required package: Matrix
## Loading required package: arules
## 
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
## 
##     abbreviate, write
## Loading required package: discretization
## Loading required package: glmnet
## Loading required package: foreach
## Loaded glmnet 2.0-16
library("arulesViz")
## Loading required package: grid

Votes data used in the paper

library(cba)
## Loading required package: proxy
## 
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
## 
##     as.matrix
## The following objects are masked from 'package:stats':
## 
##     as.dist, dist
## The following object is masked from 'package:base':
## 
##     as.matrix
data("Votes")
dim(Votes)
## [1] 435  17
Votes[1,]
##   handicapped-infants water-project-cost-sharing
## 1                   n                          y
##   adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1                                 n                    y               y
##   religious-groups-in-schools anti-satellite-test-ban
## 1                           y                       n
##   aid-to-nicaraguan-contras mx-missile immigration
## 1                         n          n           y
##   synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1                         <NA>                  y                      y
##   crime duty-free-exports export-administration-act-south-africa
## 1     y                 n                                      y
##        Class
## 1 republican

RCAR

rcar_model <- RCAR(Class ~ ., data = Votes, support = 0.1, confidence = .5, lambda = 0.01)
rcar_model
## CBA Classifier Object
## Class: Class=democrat, Class=republican
## Default Class: Class=democrat
## Number of rules: 36
## Classification method: logit (weighted)
## Description: RCAR algorithm by Azmi et al. 2019
rcar_model$biases
##                           s0
## Class=democrat    0.02414597
## Class=republican -0.02414597
rcar_model$weights
##       Class=democrat Class=republican
##  [1,]   0.3861155420    -0.3861155420
##  [2,]   0.0741464147    -0.0741464147
##  [3,]   0.1585779317    -0.1585779317
##  [4,]   1.1087970733    -1.1087970733
##  [5,]   0.2946603861    -0.2946603861
##  [6,]   0.1438230087    -0.1438230087
##  [7,]  -0.3146445433     0.3146445433
##  [8,]  -0.2025514534     0.2025514534
##  [9,]  -0.6431349029     0.6431349029
## [10,]  -0.1419677851     0.1419677851
## [11,]  -0.0998939888     0.0998939888
## [12,]   0.0290190307    -0.0290190307
## [13,]   0.0998697180    -0.0998697180
## [14,]   0.0041998171    -0.0041998171
## [15,]  -0.2371638466     0.2371638466
## [16,]   0.5092472782    -0.5092472782
## [17,]  -0.0649806420     0.0649806420
## [18,]  -0.0752837615     0.0752837615
## [19,]  -0.3854602761     0.3854602761
## [20,]  -0.1727557168     0.1727557168
## [21,]  -0.0661635882     0.0661635882
## [22,]  -0.1421705126     0.1421705126
## [23,]   0.0176267905    -0.0176267905
## [24,]   0.2079756662    -0.2079756662
## [25,]   0.3299596084    -0.3299596084
## [26,]  -0.4195364335     0.4195364335
## [27,]  -0.5601583424     0.5601583424
## [28,]  -0.1361433069     0.1361433069
## [29,]  -0.0090254833     0.0090254833
## [30,]  -0.0064944467     0.0064944467
## [31,]  -0.0473755948     0.0473755948
## [32,]   0.0460863811    -0.0460863811
## [33,]  -0.1355786753     0.1355786753
## [34,]  -0.0007402473     0.0007402473
## [35,]  -0.0053408712     0.0053408712
## [36,]   0.2597256156    -0.2597256156
inspectDT(rules(rcar_model))
Votes[1:5,]
##   handicapped-infants water-project-cost-sharing
## 1                   n                          y
## 2                   n                          y
## 3                <NA>                          y
## 4                   n                          y
## 5                   y                          y
##   adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1                                 n                    y               y
## 2                                 n                    y               y
## 3                                 y                 <NA>               y
## 4                                 y                    n            <NA>
## 5                                 y                    n               y
##   religious-groups-in-schools anti-satellite-test-ban
## 1                           y                       n
## 2                           y                       n
## 3                           y                       n
## 4                           y                       n
## 5                           y                       n
##   aid-to-nicaraguan-contras mx-missile immigration
## 1                         n          n           y
## 2                         n          n           n
## 3                         n          n           n
## 4                         n          n           n
## 5                         n          n           n
##   synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1                         <NA>                  y                      y
## 2                            n                  y                      y
## 3                            y                  n                      y
## 4                            y                  n                      y
## 5                            y               <NA>                      y
##   crime duty-free-exports export-administration-act-south-africa
## 1     y                 n                                      y
## 2     y                 n                                   <NA>
## 3     y                 n                                      n
## 4     n                 n                                      y
## 5     y                 y                                      y
##        Class
## 1 republican
## 2 republican
## 3   democrat
## 4   democrat
## 5   democrat
predict(rcar_model, Votes[1:5,])
## [1] republican republican democrat   democrat   democrat  
## Levels: democrat republican

Code

arulesCBA::RCAR
## function(formula, data, support = 0.1, confidence = 0.8, verbose = FALSE,
##   maxlen = 6, lambda = 0.001, alpha = 1, balanceSupport = FALSE, disc.method = 'mdlp') {
## 
##   disc_info <- NULL
##   if(is(data, "data.frame")){
##     data <- discretizeDF.supervised(formula, data, method=disc.method)
##     disc_info <- lapply(data, attr, "discretized:breaks")
##   }
## 
##   if(!is(data, "transactions")) data <- as(data, 'transactions')
##   form <- .parseformula(formula, data)
## 
##   model_rules <- mineCARs(formula, data, balanceSupport,
##     parameter=list(supp=support,conf=confidence,maxlen=maxlen),
##     control=list(verbose=verbose))
## 
##   X <- is.superset(data,lhs(model_rules))
##   y <- factor(as(data[, form$class_ids], "matrix") %*% seq(length(form$class_ids)),
##     labels = form$class_names)
## 
##   #model <- cv.glmnet(X, y, family='multinomial', alpha=alpha)
##   model <- glmnet(X, y, family='multinomial', alpha=alpha, lambda=lambda)
## 
##   weights <- sapply(model$beta, as.vector)
##   remove <- apply(weights, MARGIN = 1, FUN = function(x) all(x==0))
## 
##   structure(list(
##     rules=model_rules[!remove],
##     weights=weights[!remove,],
##     biases=model$a0,
##     class=form$class_names,
##     default=form$class_names[which.max(model$a0)],
##     discretization=disc_info,
##     description='RCAR algorithm by Azmi et al. 2019',
##     method='logit',
##     formula = formula,
##     all_rules=model_rules,
##     reg_model=model
##     ),
##     class = 'CBA')
## }
## <bytecode: 0x55b296bc45d0>
## <environment: namespace:arulesCBA>

CBA

cba_model <- CBA(Class ~ ., data = Votes, support = 0.1, confidence = .5)
cba_model
## CBA Classifier Object
## Class: Class=democrat, Class=republican
## Default Class: Class=republican
## Number of rules: 39
## Classification method: first 
## Description: CBA algorithm by Liu, et al. 1998 with support=0.1
##      and confidence=0.5
cba_model$biases
## NULL
cba_model$weights
## NULL
inspectDT(rules(cba_model))
Votes[1:5,]
##   handicapped-infants water-project-cost-sharing
## 1                   n                          y
## 2                   n                          y
## 3                <NA>                          y
## 4                   n                          y
## 5                   y                          y
##   adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1                                 n                    y               y
## 2                                 n                    y               y
## 3                                 y                 <NA>               y
## 4                                 y                    n            <NA>
## 5                                 y                    n               y
##   religious-groups-in-schools anti-satellite-test-ban
## 1                           y                       n
## 2                           y                       n
## 3                           y                       n
## 4                           y                       n
## 5                           y                       n
##   aid-to-nicaraguan-contras mx-missile immigration
## 1                         n          n           y
## 2                         n          n           n
## 3                         n          n           n
## 4                         n          n           n
## 5                         n          n           n
##   synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1                         <NA>                  y                      y
## 2                            n                  y                      y
## 3                            y                  n                      y
## 4                            y                  n                      y
## 5                            y               <NA>                      y
##   crime duty-free-exports export-administration-act-south-africa
## 1     y                 n                                      y
## 2     y                 n                                   <NA>
## 3     y                 n                                      n
## 4     n                 n                                      y
## 5     y                 y                                      y
##        Class
## 1 republican
## 2 republican
## 3   democrat
## 4   democrat
## 5   democrat
predict(cba_model, Votes[1:5,])
## [1] republican republican democrat   democrat   democrat  
## Levels: democrat republican

rulefit (in package pre)

library(pre)

# the code does not like missing values and "-" in column names!
Votes2 <- Votes[complete.cases(Votes),]
colnames(Votes2) <- make.names(colnames(Votes2))

rulefit_model <- pre(Class ~ ., data = Votes2, family = "binomial")
rulefit_model
## 
## Final ensemble with cv error within 1se of minimum: 
##   lambda =  0.02514122
##   number of terms = 8
##   mean cv error (se) = 0.269453 (0.04704076)
## 
##   cv error type : Binomial Deviance
## 
##                    rule    coefficient
##             (Intercept)  -2.824007e+00
##   physician.fee.freezey   3.549507e+00
##                   rule2   1.525615e+00
##                  rule59   1.887381e-01
##                  rule22   1.490721e-01
##                  rule74   7.090758e-02
##                  rule39   6.527622e-02
##                  rule16   4.385787e-02
##                   rule1  -1.972537e-14
##                                                                          description
##                                                                                    1
##                                                                physician.fee.freezey
##          physician.fee.freeze %in% c("y") & synfuels.corporation.cutback %in% c("n")
##   adoption.of.the.budget.resolution %in% c("n") & superfund.right.to.sue %in% c("y")
##                            physician.fee.freeze %in% c("y") & mx.missile %in% c("n")
##                         synfuels.corporation.cutback %in% c("n") & crime %in% c("y")
##        adoption.of.the.budget.resolution %in% c("n") & duty.free.exports %in% c("n")
##     physician.fee.freeze %in% c("y") & adoption.of.the.budget.resolution %in% c("n")
##                                                     physician.fee.freeze %in% c("n")
Votes2[1:5,]
##    handicapped.infants water.project.cost.sharing
## 6                    n                          y
## 9                    n                          y
## 20                   y                          y
## 24                   y                          y
## 26                   y                          n
##    adoption.of.the.budget.resolution physician.fee.freeze el.salvador.aid
## 6                                  y                    n               y
## 9                                  n                    y               y
## 20                                 y                    n               n
## 24                                 y                    n               n
## 26                                 y                    n               n
##    religious.groups.in.schools anti.satellite.test.ban
## 6                            y                       n
## 9                            y                       n
## 20                           n                       y
## 24                           n                       y
## 26                           n                       y
##    aid.to.nicaraguan.contras mx.missile immigration
## 6                          n          n           n
## 9                          n          n           n
## 20                         y          y           n
## 24                         y          y           n
## 26                         y          y           y
##    synfuels.corporation.cutback education.spending superfund.right.to.sue
## 6                             n                  n                      y
## 9                             n                  y                      y
## 20                            y                  n                      n
## 24                            n                  n                      n
## 26                            n                  n                      n
##    crime duty.free.exports export.administration.act.south.africa
## 6      y                 y                                      y
## 9      y                 n                                      y
## 20     n                 y                                      y
## 24     n                 y                                      y
## 26     n                 y                                      y
##         Class
## 6    democrat
## 9  republican
## 20   democrat
## 24   democrat
## 26   democrat
predict(rulefit_model, Votes2[1:5,], type = "class")
##            6            9           20           24           26 
##   "democrat" "republican"   "democrat"   "democrat"   "democrat"