Code examples for the paper:
Mohamed Azmia, George C. Runger, Abdelaziz Berrado. Interpretable regularized class association rules algorithm for classification in a categorical data space, Information Sciences, Vol 483, pp. 313-331, 2019.
set.seed(1234)
Implemented in the development version of arulesCBA
#library("devtools")
#install_github("ianjjohnson/arulesCBA")
library("arulesCBA")
## Loading required package: Matrix
## Loading required package: arules
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
## Loading required package: discretization
## Loading required package: glmnet
## Loading required package: foreach
## Loaded glmnet 2.0-16
library("arulesViz")
## Loading required package: grid
Votes data used in the paper
library(cba)
## Loading required package: proxy
##
## Attaching package: 'proxy'
## The following object is masked from 'package:Matrix':
##
## as.matrix
## The following objects are masked from 'package:stats':
##
## as.dist, dist
## The following object is masked from 'package:base':
##
## as.matrix
data("Votes")
dim(Votes)
## [1] 435 17
Votes[1,]
## handicapped-infants water-project-cost-sharing
## 1 n y
## adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1 n y y
## religious-groups-in-schools anti-satellite-test-ban
## 1 y n
## aid-to-nicaraguan-contras mx-missile immigration
## 1 n n y
## synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1 <NA> y y
## crime duty-free-exports export-administration-act-south-africa
## 1 y n y
## Class
## 1 republican
rcar_model <- RCAR(Class ~ ., data = Votes, support = 0.1, confidence = .5, lambda = 0.01)
rcar_model
## CBA Classifier Object
## Class: Class=democrat, Class=republican
## Default Class: Class=democrat
## Number of rules: 36
## Classification method: logit (weighted)
## Description: RCAR algorithm by Azmi et al. 2019
rcar_model$biases
## s0
## Class=democrat 0.02414597
## Class=republican -0.02414597
rcar_model$weights
## Class=democrat Class=republican
## [1,] 0.3861155420 -0.3861155420
## [2,] 0.0741464147 -0.0741464147
## [3,] 0.1585779317 -0.1585779317
## [4,] 1.1087970733 -1.1087970733
## [5,] 0.2946603861 -0.2946603861
## [6,] 0.1438230087 -0.1438230087
## [7,] -0.3146445433 0.3146445433
## [8,] -0.2025514534 0.2025514534
## [9,] -0.6431349029 0.6431349029
## [10,] -0.1419677851 0.1419677851
## [11,] -0.0998939888 0.0998939888
## [12,] 0.0290190307 -0.0290190307
## [13,] 0.0998697180 -0.0998697180
## [14,] 0.0041998171 -0.0041998171
## [15,] -0.2371638466 0.2371638466
## [16,] 0.5092472782 -0.5092472782
## [17,] -0.0649806420 0.0649806420
## [18,] -0.0752837615 0.0752837615
## [19,] -0.3854602761 0.3854602761
## [20,] -0.1727557168 0.1727557168
## [21,] -0.0661635882 0.0661635882
## [22,] -0.1421705126 0.1421705126
## [23,] 0.0176267905 -0.0176267905
## [24,] 0.2079756662 -0.2079756662
## [25,] 0.3299596084 -0.3299596084
## [26,] -0.4195364335 0.4195364335
## [27,] -0.5601583424 0.5601583424
## [28,] -0.1361433069 0.1361433069
## [29,] -0.0090254833 0.0090254833
## [30,] -0.0064944467 0.0064944467
## [31,] -0.0473755948 0.0473755948
## [32,] 0.0460863811 -0.0460863811
## [33,] -0.1355786753 0.1355786753
## [34,] -0.0007402473 0.0007402473
## [35,] -0.0053408712 0.0053408712
## [36,] 0.2597256156 -0.2597256156
inspectDT(rules(rcar_model))
Votes[1:5,]
## handicapped-infants water-project-cost-sharing
## 1 n y
## 2 n y
## 3 <NA> y
## 4 n y
## 5 y y
## adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1 n y y
## 2 n y y
## 3 y <NA> y
## 4 y n <NA>
## 5 y n y
## religious-groups-in-schools anti-satellite-test-ban
## 1 y n
## 2 y n
## 3 y n
## 4 y n
## 5 y n
## aid-to-nicaraguan-contras mx-missile immigration
## 1 n n y
## 2 n n n
## 3 n n n
## 4 n n n
## 5 n n n
## synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1 <NA> y y
## 2 n y y
## 3 y n y
## 4 y n y
## 5 y <NA> y
## crime duty-free-exports export-administration-act-south-africa
## 1 y n y
## 2 y n <NA>
## 3 y n n
## 4 n n y
## 5 y y y
## Class
## 1 republican
## 2 republican
## 3 democrat
## 4 democrat
## 5 democrat
predict(rcar_model, Votes[1:5,])
## [1] republican republican democrat democrat democrat
## Levels: democrat republican
Code
arulesCBA::RCAR
## function(formula, data, support = 0.1, confidence = 0.8, verbose = FALSE,
## maxlen = 6, lambda = 0.001, alpha = 1, balanceSupport = FALSE, disc.method = 'mdlp') {
##
## disc_info <- NULL
## if(is(data, "data.frame")){
## data <- discretizeDF.supervised(formula, data, method=disc.method)
## disc_info <- lapply(data, attr, "discretized:breaks")
## }
##
## if(!is(data, "transactions")) data <- as(data, 'transactions')
## form <- .parseformula(formula, data)
##
## model_rules <- mineCARs(formula, data, balanceSupport,
## parameter=list(supp=support,conf=confidence,maxlen=maxlen),
## control=list(verbose=verbose))
##
## X <- is.superset(data,lhs(model_rules))
## y <- factor(as(data[, form$class_ids], "matrix") %*% seq(length(form$class_ids)),
## labels = form$class_names)
##
## #model <- cv.glmnet(X, y, family='multinomial', alpha=alpha)
## model <- glmnet(X, y, family='multinomial', alpha=alpha, lambda=lambda)
##
## weights <- sapply(model$beta, as.vector)
## remove <- apply(weights, MARGIN = 1, FUN = function(x) all(x==0))
##
## structure(list(
## rules=model_rules[!remove],
## weights=weights[!remove,],
## biases=model$a0,
## class=form$class_names,
## default=form$class_names[which.max(model$a0)],
## discretization=disc_info,
## description='RCAR algorithm by Azmi et al. 2019',
## method='logit',
## formula = formula,
## all_rules=model_rules,
## reg_model=model
## ),
## class = 'CBA')
## }
## <bytecode: 0x55b296bc45d0>
## <environment: namespace:arulesCBA>
cba_model <- CBA(Class ~ ., data = Votes, support = 0.1, confidence = .5)
cba_model
## CBA Classifier Object
## Class: Class=democrat, Class=republican
## Default Class: Class=republican
## Number of rules: 39
## Classification method: first
## Description: CBA algorithm by Liu, et al. 1998 with support=0.1
## and confidence=0.5
cba_model$biases
## NULL
cba_model$weights
## NULL
inspectDT(rules(cba_model))
Votes[1:5,]
## handicapped-infants water-project-cost-sharing
## 1 n y
## 2 n y
## 3 <NA> y
## 4 n y
## 5 y y
## adoption-of-the-budget-resolution physician-fee-freeze el-salvador-aid
## 1 n y y
## 2 n y y
## 3 y <NA> y
## 4 y n <NA>
## 5 y n y
## religious-groups-in-schools anti-satellite-test-ban
## 1 y n
## 2 y n
## 3 y n
## 4 y n
## 5 y n
## aid-to-nicaraguan-contras mx-missile immigration
## 1 n n y
## 2 n n n
## 3 n n n
## 4 n n n
## 5 n n n
## synfuels-corporation-cutback education-spending superfund-right-to-sue
## 1 <NA> y y
## 2 n y y
## 3 y n y
## 4 y n y
## 5 y <NA> y
## crime duty-free-exports export-administration-act-south-africa
## 1 y n y
## 2 y n <NA>
## 3 y n n
## 4 n n y
## 5 y y y
## Class
## 1 republican
## 2 republican
## 3 democrat
## 4 democrat
## 5 democrat
predict(cba_model, Votes[1:5,])
## [1] republican republican democrat democrat democrat
## Levels: democrat republican
library(pre)
# the code does not like missing values and "-" in column names!
Votes2 <- Votes[complete.cases(Votes),]
colnames(Votes2) <- make.names(colnames(Votes2))
rulefit_model <- pre(Class ~ ., data = Votes2, family = "binomial")
rulefit_model
##
## Final ensemble with cv error within 1se of minimum:
## lambda = 0.02514122
## number of terms = 8
## mean cv error (se) = 0.269453 (0.04704076)
##
## cv error type : Binomial Deviance
##
## rule coefficient
## (Intercept) -2.824007e+00
## physician.fee.freezey 3.549507e+00
## rule2 1.525615e+00
## rule59 1.887381e-01
## rule22 1.490721e-01
## rule74 7.090758e-02
## rule39 6.527622e-02
## rule16 4.385787e-02
## rule1 -1.972537e-14
## description
## 1
## physician.fee.freezey
## physician.fee.freeze %in% c("y") & synfuels.corporation.cutback %in% c("n")
## adoption.of.the.budget.resolution %in% c("n") & superfund.right.to.sue %in% c("y")
## physician.fee.freeze %in% c("y") & mx.missile %in% c("n")
## synfuels.corporation.cutback %in% c("n") & crime %in% c("y")
## adoption.of.the.budget.resolution %in% c("n") & duty.free.exports %in% c("n")
## physician.fee.freeze %in% c("y") & adoption.of.the.budget.resolution %in% c("n")
## physician.fee.freeze %in% c("n")
Votes2[1:5,]
## handicapped.infants water.project.cost.sharing
## 6 n y
## 9 n y
## 20 y y
## 24 y y
## 26 y n
## adoption.of.the.budget.resolution physician.fee.freeze el.salvador.aid
## 6 y n y
## 9 n y y
## 20 y n n
## 24 y n n
## 26 y n n
## religious.groups.in.schools anti.satellite.test.ban
## 6 y n
## 9 y n
## 20 n y
## 24 n y
## 26 n y
## aid.to.nicaraguan.contras mx.missile immigration
## 6 n n n
## 9 n n n
## 20 y y n
## 24 y y n
## 26 y y y
## synfuels.corporation.cutback education.spending superfund.right.to.sue
## 6 n n y
## 9 n y y
## 20 y n n
## 24 n n n
## 26 n n n
## crime duty.free.exports export.administration.act.south.africa
## 6 y y y
## 9 y n y
## 20 n y y
## 24 n y y
## 26 n y y
## Class
## 6 democrat
## 9 republican
## 20 democrat
## 24 democrat
## 26 democrat
predict(rulefit_model, Votes2[1:5,], type = "class")
## 6 9 20 24 26
## "democrat" "republican" "democrat" "democrat" "democrat"