This demo uses data from the Stop-Question-and-Frisk program in New York City. “Stop-Question-and-Frisk is a practice of the New York City Police Department by which police officers stop and question hundreds of thousands of pedestrians annually, and frisk them for weapons and other contraband.” (Wikipedia)
The data was obtained from http://www.nyclu.org/content/stop-and-frisk-data.
Download files
if(!file.exists("SQF_Codebook.pdf")) {
download.file("http://michael.hahsler.net/research/arules_RUG_2015/demo/SQF_Codebook.pdf", "SQF_Codebook.pdf")
}
Click here for a description of the data.
if(!file.exists("SQF 2012.csv")) {
download.file("http://michael.hahsler.net/research/arules_RUG_2015/demo/Stop-and-Frisk-2012.zip",
"Stop-and-Frisk-2012.zip")
unzip("Stop-and-Frisk-2012.zip")
}
dat <- read.csv("SQF 2012.csv")
dim(dat)
## [1] 532911 101
summary(dat)
## year pct ser_num datestop
## Min. :2012 Min. : 1.0 Min. : 1 Min. : 1012012
## 1st Qu.:2012 1st Qu.: 41.0 1st Qu.: 1790 1st Qu.: 2292012
## Median :2012 Median : 70.0 Median : 3790 Median : 5052012
## Mean :2012 Mean : 66.5 Mean : 4980 Mean : 5563631
## 3rd Qu.:2012 3rd Qu.: 94.0 3rd Qu.: 6982 3rd Qu.: 8252012
## Max. :2012 Max. :123.0 Max. :24652 Max. :12312012
##
## timestop city sex race
## Min. : 0 Min. :1.000 Min. :0.000 Min. :1.000
## 1st Qu.:1002 1st Qu.:2.000 1st Qu.:1.000 1st Qu.:1.000
## Median :1615 Median :2.000 Median :1.000 Median :1.000
## Mean :1415 Mean :2.518 Mean :0.928 Mean :2.012
## 3rd Qu.:2030 3rd Qu.:3.000 3rd Qu.:1.000 3rd Qu.:3.000
## Max. :2359 Max. :5.000 Max. :1.000 Max. :6.000
## NA's :5 NA's :7784 NA's :13861
## dob age height weight
## Min. : 1011001 Min. : 0.00 Min. :36.0 Min. : 0.0
## 1st Qu.: 6051983 1st Qu.: 19.00 1st Qu.:67.0 1st Qu.:150.0
## Median :10311996 Median : 24.00 Median :69.0 Median :165.0
## Mean : 8947660 Mean : 28.77 Mean :68.6 Mean :169.3
## 3rd Qu.:12311900 3rd Qu.: 34.00 3rd Qu.:71.0 3rd Qu.:180.0
## Max. :12312012 Max. :999.00 Max. :95.0 Max. :999.0
##
## haircolr eyecolor build othfeatr
## Min. : 1.000 Min. :1.000 Min. :1.000 Min. :0.0
## 1st Qu.: 1.000 1st Qu.:2.000 1st Qu.:3.000 1st Qu.:2.0
## Median : 1.000 Median :2.000 Median :3.000 Median :5.0
## Mean : 1.406 Mean :1.992 Mean :3.161 Mean :3.9
## 3rd Qu.: 1.000 3rd Qu.:2.000 3rd Qu.:4.000 3rd Qu.:5.0
## Max. :10.000 Max. :8.000 Max. :4.000 Max. :8.0
## NA's :4449 NA's :3315 NA's :5398 NA's :532896
## frisked searched contrabn pistol
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.000000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000
## Median :1.0000 Median :0.00000 Median :0.00000 Median :0.000000
## Mean :0.5578 Mean :0.08303 Mean :0.01733 Mean :0.001242
## 3rd Qu.:1.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.000000
##
## riflshot asltweap knifcuti
## Min. :0.00e+00 Min. :0.00e+00 Min. :0.000000
## 1st Qu.:0.00e+00 1st Qu.:0.00e+00 1st Qu.:0.000000
## Median :0.00e+00 Median :0.00e+00 Median :0.000000
## Mean :2.44e-05 Mean :9.57e-05 Mean :0.008829
## 3rd Qu.:0.00e+00 3rd Qu.:0.00e+00 3rd Qu.:0.000000
## Max. :1.00e+00 Max. :1.00e+00 Max. :1.000000
##
## machgun othrweap arstmade arstoffn
## Min. :0.0e+00 Min. :0.000000 Min. :0.00000 :500600
## 1st Qu.:0.0e+00 1st Qu.:0.000000 1st Qu.:0.00000 CPM : 3080
## Median :0.0e+00 Median :0.000000 Median :0.00000 CPW : 2513
## Mean :5.6e-06 Mean :0.002481 Mean :0.06064 CPCS : 1369
## 3rd Qu.:0.0e+00 3rd Qu.:0.000000 3rd Qu.:0.00000 ROBBERY: 1186
## Max. :1.0e+00 Max. :1.000000 Max. :1.00000 221.1 : 1029
## (Other): 23134
## sumissue sumoffen crimsusp
## Min. :0.00000 :512042 FEL :104949
## 1st Qu.:0.00000 OPEN CONTAINER: 1613 MISD : 81113
## Median :0.00000 DISCON : 1289 FELONY : 69204
## Mean :0.05149 DIS CON : 690 CPW : 55974
## 3rd Qu.:0.00000 * : 673 ROBBERY : 48670
## Max. :1.00000 (Other) : 16603 BURGLARY: 19940
## NA's : 1 (Other) :153061
## detailcm perobs perstop pf_hands
## Min. : 1.00 Min. : 0.000 5 :236912 Min. :0.0000
## 1st Qu.: 20.00 1st Qu.: 1.000 2 : 66583 1st Qu.:0.0000
## Median : 28.00 Median : 1.000 3 : 65098 Median :0.0000
## Mean : 41.23 Mean : 2.446 10 : 63996 Mean :0.1319
## 3rd Qu.: 68.00 3rd Qu.: 2.000 1 : 27924 3rd Qu.:0.0000
## Max. :113.00 Max. :955.000 4 : 22344 Max. :1.0000
## NA's :2 (Other): 50054
## pf_wall pf_grnd pf_drwep
## Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.000000 Median :0.000000
## Mean :0.02369 Mean :0.003419 Mean :0.002439
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.000000 Max. :1.000000
##
## pf_ptwep pf_baton pf_hcuff
## Min. :0.000000 Min. :0.000000 Min. :0.00000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.00000
## Median :0.000000 Median :0.000000 Median :0.00000
## Mean :0.002314 Mean :0.000107 Mean :0.03611
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.00000
## Max. :1.000000 Max. :1.000000 Max. :1.00000
##
## pf_pepsp pf_other cs_objcs cs_descr
## Min. :0.000000 Min. :0.000000 Min. :0.00000 Min. :0.000
## 1st Qu.:0.000000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.000
## Median :0.000000 Median :0.000000 Median :0.00000 Median :0.000
## Mean :0.000107 Mean :0.006549 Mean :0.02524 Mean :0.165
## 3rd Qu.:0.000000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:0.000
## Max. :1.000000 Max. :1.000000 Max. :1.00000 Max. :1.000
##
## cs_casng cs_lkout cs_cloth cs_drgtr
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.0000 Median :0.00000 Median :0.00000
## Mean :0.3563 Mean :0.1888 Mean :0.04691 Mean :0.07314
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.00000
##
## cs_furtv cs_vcrim cs_bulge cs_other
## Min. :0.0000 Min. :0.0000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :1.0000 Median :0.0000 Median :0.00000 Median :0.0000
## Mean :0.5154 Mean :0.1105 Mean :0.07304 Mean :0.1563
## 3rd Qu.:1.0000 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.0000
## Max. :1.0000 Max. :1.0000 Max. :1.00000 Max. :1.0000
##
## rf_vcrim rf_othsw rf_attir rf_vcact
## Min. :0.0000 Min. :0.00000 Min. :0.00000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.00000
## Median :0.0000 Median :0.00000 Median :0.00000 Median :0.00000
## Mean :0.1294 Mean :0.03357 Mean :0.07405 Mean :0.08189
## 3rd Qu.:0.0000 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.00000 Max. :1.00000 Max. :1.00000
##
## rf_rfcmp rf_verbl rf_knowl rf_furt
## Min. :0.00000 Min. :0.000000 Min. :0.00000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.00000 1st Qu.:0.0000
## Median :0.00000 Median :0.000000 Median :0.00000 Median :0.0000
## Mean :0.07933 Mean :0.006309 Mean :0.01875 Mean :0.3931
## 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.00000 3rd Qu.:1.0000
## Max. :1.00000 Max. :1.000000 Max. :1.00000 Max. :1.0000
##
## rf_bulg sb_hdobj sb_outln sb_admis
## Min. :0.00000 Min. :0.00000 Min. :0.000000 Min. :0.000000
## 1st Qu.:0.00000 1st Qu.:0.00000 1st Qu.:0.000000 1st Qu.:0.000000
## Median :0.00000 Median :0.00000 Median :0.000000 Median :0.000000
## Mean :0.07005 Mean :0.04354 Mean :0.005245 Mean :0.002742
## 3rd Qu.:0.00000 3rd Qu.:0.00000 3rd Qu.:0.000000 3rd Qu.:0.000000
## Max. :1.00000 Max. :1.00000 Max. :1.000000 Max. :1.000000
##
## sb_other ac_proxm ac_evasv ac_assoc
## Min. :0.00000 Min. :0.0000 Min. :0.00 Min. :0.00000
## 1st Qu.:0.00000 1st Qu.:0.0000 1st Qu.:0.00 1st Qu.:0.00000
## Median :0.00000 Median :0.0000 Median :0.00 Median :0.00000
## Mean :0.03555 Mean :0.2259 Mean :0.19 Mean :0.04538
## 3rd Qu.:0.00000 3rd Qu.:0.0000 3rd Qu.:0.00 3rd Qu.:0.00000
## Max. :1.00000 Max. :1.0000 Max. :1.00 Max. :1.00000
##
## ac_cgdir ac_incid ac_time ac_stsnd
## Min. :0.0000 Min. :0.0000 Min. :0.0000 Min. :0.00000
## 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.0000 1st Qu.:0.00000
## Median :0.0000 Median :1.0000 Median :0.0000 Median :0.00000
## Mean :0.2383 Mean :0.6002 Mean :0.4383 Mean :0.02558
## 3rd Qu.:0.0000 3rd Qu.:1.0000 3rd Qu.:1.0000 3rd Qu.:0.00000
## Max. :1.0000 Max. :1.0000 Max. :1.0000 Max. :1.00000
##
## ac_other forceuse inout trhsloc
## Min. :0.00000 Min. :1.0 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:2.0 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :3.0 Median :0.0000 Median :0.0000
## Mean :0.03439 Mean :3.4 Mean :0.2168 Mean :0.3011
## 3rd Qu.:0.00000 3rd Qu.:5.0 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :6.0 Max. :1.0000 Max. :2.0000
## NA's :465609
## premname addrnum stname
## STREET :200106 :312070 :306954
## SIDEWALK: 94800 60 : 947 BROADWAY : 2906
## : 37665 300 : 907 8 AVENUE : 2417
## LOBBY : 16055 200 : 780 SUTTER AVENUE : 2258
## MEZZ : 10960 120 : 775 PARK AVENUE : 2009
## (Other) :173299 315 : 769 AMSTERDAM AVENU: 1909
## NA's : 26 (Other):216663 (Other) :214458
## stinter crossst addrpct
## BROADWAY : 11975 BROADWAY : 5110 Min. : 1.0
## 8 AVENUE : 6571 ROCKAWAY AVENUE: 4526 1st Qu.: 41.0
## LEXINGTON AVENUE : 5925 PARK AVENUE : 3931 Median : 70.0
## 3 AVENUE : 5886 NOSTRAND AVENUE: 3657 Mean : 66.5
## 7 AVENUE : 4952 3 AVENUE : 3387 3rd Qu.: 94.0
## SAINT NICHOLAS AVENUE: 4914 EAST 112 STREET: 3227 Max. :123.0
## (Other) :492688 (Other) :509073 NA's :5
## sector beat post xcoord
## A : 55505 :337499 Min. : 1.0 Min. : 913844
## E : 51528 * : 68491 1st Qu.:10.0 1st Qu.: 996734
## B : 51467 9 : 19792 Median :22.0 Median :1005807
## H : 48822 1 : 16183 Mean :28.1 Mean :1006808
## C : 48820 2 : 15779 3rd Qu.:41.0 3rd Qu.:1016476
## G : 46189 7 : 15598 Max. :99.0 Max. :1067249
## (Other):230580 (Other): 59569 NA's :477757 NA's :15585
## ycoord typeofid othpers explnstp
## Min. :121152 Min. :1.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:183094 1st Qu.:1.000 1st Qu.:0.0000 1st Qu.:1.0000
## Median :198317 Median :1.000 Median :0.0000 Median :1.0000
## Mean :205079 Mean :1.495 Mean :0.1914 Mean :0.9993
## 3rd Qu.:234311 3rd Qu.:2.000 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :271882 Max. :4.000 Max. :1.0000 Max. :1.0000
## NA's :15585
## repcmd revcmd offunif offverb
## Min. : 1.0 Min. : 1.0 Min. :0.0000 Min. :0.0000
## 1st Qu.: 69.0 1st Qu.: 67.0 1st Qu.:1.0000 1st Qu.:0.0000
## Median :110.0 Median :107.0 Median :1.0000 Median :0.0000
## Mean :240.6 Mean :237.8 Mean :0.7526 Mean :0.1931
## 3rd Qu.:165.0 3rd Qu.:165.0 3rd Qu.:1.0000 3rd Qu.:0.0000
## Max. :985.0 Max. :879.0 Max. :1.0000 Max. :1.0000
## NA's :4
## officrid offshld ac_rept ac_inves
## Min. :0.00000 Min. :0.000 Min. :0.0000 Min. :0.0000
## 1st Qu.:0.00000 1st Qu.:0.000 1st Qu.:0.0000 1st Qu.:0.0000
## Median :0.00000 Median :0.000 Median :0.0000 Median :0.0000
## Mean :0.01201 Mean :0.244 Mean :0.1257 Mean :0.1397
## 3rd Qu.:0.00000 3rd Qu.:0.000 3rd Qu.:0.0000 3rd Qu.:0.0000
## Max. :1.00000 Max. :1.000 Max. :1.0000 Max. :1.0000
##
## radio recstat linecm
## Min. :0.0000 1:269234 Min. :0.0000
## 1st Qu.:0.0000 A:263677 1st Qu.:1.0000
## Median :0.0000 Median :1.0000
## Mean :0.2421 Mean :0.8013
## 3rd Qu.:0.0000 3rd Qu.:1.0000
## Max. :1.0000 Max. :1.0000
## NA's :2
dat$datestop <- as.Date(sprintf("%08d", dat$datestop), format ="%m%d%Y")
dat$timestop <- as.integer(substr(sprintf("%04d", dat$timestop), 1, 2))
Fix observation period
dat$perobs[ dat$perobs<1 | dat$perobs>120 ] <- NA
Fix stop period
head(dat$perstop)
## [1] 5 5 5 5 10 5
## 87 Levels: ** 0 1 10 11 12 13 14 15 16 17 18 19 2 20 21 22 23 24 25 ... 99
dat$perstop[dat$perstop =="**"] <- NA
dat$perstop <- as.numeric(dat$perstop)
DOB is a really bad variable and we have age!
dat$dob <- NULL
Clean age
hist(dat$age)
table(dat$age)
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 53 712 53 37 20 95 17 6 4 13 41 117
## 12 13 14 15 16 17 18 19 20 21 22 23
## 616 2429 7682 17210 24413 28481 29548 29686 29649 28833 27618 22207
## 24 25 26 27 28 29 30 31 32 33 34 35
## 20051 19040 16546 15115 13669 13327 12989 11670 11809 9516 8889 8508
## 36 37 38 39 40 41 42 43 44 45 46 47
## 7048 6542 5927 5967 6298 6147 6512 5654 5561 5636 5254 5266
## 48 49 50 51 52 53 54 55 56 57 58 59
## 5108 5008 4843 4422 4395 3509 3144 2972 2596 2118 1645 1463
## 60 61 62 63 64 65 66 67 68 69 70 71
## 1363 1132 1121 780 665 509 407 346 291 221 214 162
## 72 73 74 75 76 77 78 79 80 81 82 83
## 143 122 70 55 58 50 39 27 17 18 18 8
## 84 85 86 87 88 89 91 95 96 97 98 99
## 9 3 2 2 3 2 3 2 2 1 1 362
## 100 108 110 113 114 115 116 117 118 119 120 121
## 77 1 2 1 1 3 2 3 6 3 15 2
## 123 124 125 127 128 129 130 131 135 136 140 144
## 2 1 6 1 1 2 14 2 8 1 17 2
## 145 146 150 154 155 156 160 161 164 165 166 168
## 15 2 32 1 5 4 42 2 2 36 1 1
## 169 170 171 174 175 176 177 178 180 181 183 185
## 4 33 3 2 16 2 2 5 44 4 1 15
## 187 188 189 190 191 195 196 198 199 200 201 202
## 1 2 7 16 1 13 5 5 1 21 1 1
## 205 206 209 210 211 212 214 215 216 217 218 219
## 5 1 1 3 2 1 3 5 2 1 2 1
## 220 221 222 223 224 225 226 227 228 229 230 231
## 2 5 4 4 3 4 4 4 1 2 3 3
## 232 233 235 236 240 241 242 243 244 245 246 247
## 4 3 5 3 2 2 1 1 1 2 1 1
## 250 251 253 254 255 256 258 260 261 263 265 269
## 3 1 1 1 4 1 1 1 1 2 5 1
## 270 274 278 285 287 296 298 300 301 305 306 309
## 1 4 5 2 2 6 2 1 1 5 1 2
## 310 312 315 319 321 323 326 332 333 334 335 338
## 1 3 1 1 1 2 1 1 2 1 5 2
## 343 345 346 347 352 354 360 361 363 365 366 375
## 2 1 1 3 1 1 1 1 1 1 1 1
## 385 387 396 401 405 410 411 412 421 422 425 433
## 4 2 1 1 1 2 2 1 2 1 1 1
## 435 437 442 447 451 452 455 461 463 469 478 479
## 1 1 1 1 1 2 1 1 1 1 1 1
## 487 488 490 496 501 505 510 511 512 515 520 521
## 1 2 1 1 1 1 3 2 1 1 3 1
## 522 523 553 555 556 565 585 622 674 691 694 699
## 2 1 2 1 1 1 1 1 1 1 1 1
## 700 851 999
## 1 1 210
dat$age[dat$age < 10 | dat$age > 90] <- NA
hist(dat$age, breaks=40)
Clean height
table(dat$height)
##
## 36 37 38 39 40 41 42 43 44 45 46 47
## 68 8 5 5 5 13 3 7 13 11 7 5
## 48 49 50 51 52 53 54 55 56 57 58 59
## 38 29 32 29 38 123 129 134 205 257 274 507
## 60 61 62 63 64 65 66 67 68 69 70 71
## 3876 3193 6843 9054 17215 32624 52838 61087 72983 69105 64694 38571
## 72 73 74 75 76 77 78 79 80 81 82 83
## 46109 23823 16836 6488 2725 1230 553 242 160 116 284 197
## 84 85 86 87 88 89 90 91 93 94 95
## 25 16 3 2 2 9 3 7 3 2 48
barplot(table(dat$height))
dat$height[dat$height < 40 | dat$height > 90] <- NA
hist(dat$height, breaks=40)
Clean weight
table(dat$weight)
##
## 0 1 2 3 4 5 6 7 8 9 10 11
## 6 257 10 1 6 24 15 15 6 16 53 13
## 12 13 14 15 16 17 18 19 20 21 22 23
## 5 8 13 30 44 37 50 14 45 9 20 10
## 24 25 26 27 28 29 30 31 32 33 34 35
## 6 13 8 6 4 5 9 3 3 2 6 3
## 36 37 38 39 40 41 42 43 44 45 46 47
## 2 2 2 1 11 2 1 2 4 2 1 2
## 48 49 50 52 53 54 55 56 57 58 59 60
## 2 1 10 2 2 1 10 3 6 4 6 23
## 61 62 63 65 67 68 70 75 80 83 84 85
## 1 1 2 3 1 1 18 12 43 1 2 36
## 86 87 88 89 90 91 92 93 94 95 96 97
## 1 1 1 3 238 2 4 4 2 148 6 10
## 98 99 100 101 102 103 104 105 106 107 108 109
## 32 16 1877 10 30 14 21 647 27 24 47 28
## 110 111 112 113 114 115 116 117 118 119 120 121
## 4076 31 55 23 30 1949 71 43 101 43 11818 35
## 122 123 124 125 126 127 128 129 130 131 132 133
## 45 57 39 4221 70 74 122 82 18584 37 116 54
## 134 135 136 137 138 139 140 141 142 143 144 145
## 62 6728 94 103 211 107 34437 71 145 127 78 12013
## 146 147 148 149 150 151 152 153 154 155 156 157
## 121 198 340 181 59997 72 242 145 533 11098 393 233
## 158 159 160 161 162 163 164 165 166 167 168 169
## 482 249 69203 85 286 192 130 23055 129 223 538 245
## 170 171 172 173 174 175 176 177 178 179 180 181
## 54721 99 243 175 121 21041 216 83 376 159 59353 101
## 182 183 184 185 186 187 188 189 190 191 192 193
## 179 162 94 13700 152 160 239 270 26949 60 130 62
## 194 195 196 197 198 199 200 201 202 203 204 205
## 37 6776 123 84 287 205 30714 68 44 50 33 2403
## 206 207 208 209 210 211 212 213 214 215 216 217
## 39 51 75 58 10282 35 50 25 37 2628 39 35
## 218 219 220 221 222 223 224 225 226 227 228 229
## 56 28 9484 12 38 15 17 2047 15 16 22 15
## 230 231 232 233 234 235 236 237 238 239 240 241
## 4419 5 13 11 10 571 9 6 24 4 3284 2
## 242 243 244 245 246 247 248 249 250 251 252 253
## 11 6 5 306 12 8 11 12 4567 2 8 5
## 254 255 256 257 258 259 260 261 262 263 264 265
## 4 138 8 4 8 8 1313 2 7 2 4 211
## 266 267 268 269 270 271 272 273 274 275 276 277
## 2 6 8 2 632 1 4 3 3 311 3 3
## 278 279 280 281 282 283 284 285 286 287 288 289
## 4 2 958 3 5 4 1 124 4 4 3 8
## 290 291 295 296 297 298 299 300 301 302 303 304
## 246 2 50 3 1 2 5 1044 4 1 2 2
## 305 307 308 310 311 312 314 315 316 318 319 320
## 17 1 1 101 1 2 1 40 1 1 1 132
## 321 325 329 330 332 335 336 337 339 340 345 349
## 1 36 1 45 1 2 1 1 1 38 4 1
## 350 353 355 360 365 368 370 371 374 375 380 385
## 149 1 2 25 6 1 14 2 2 9 12 2
## 390 395 399 400 405 410 415 417 418 423 425 429
## 4 2 1 34 1 1 1 1 1 1 3 1
## 430 436 440 442 445 450 459 460 465 470 475 480
## 1 1 1 1 2 5 1 3 1 1 1 3
## 485 499 500 510 511 514 515 518 519 525 530 540
## 1 1 8 6 5 1 1 1 1 1 1 3
## 552 600 612 614 700 750 810 870 916 918 999
## 1 2 1 1 2 2 1 1 1 1 359
barplot(table(dat$weight))
dat$weight[dat$weight < 50 | dat$weight > 400] <- NA
hist(dat$weight, breaks=40)
dat$city <- factor(dat$city, labels=c("Manhattan", "Brooklyn", "Bronx",
"Queens", "Staten Island"))
dat$race <- factor(dat$race, labels=c("Black", "Black Hispanic",
"White Hispanic", "White", "Asian/Pacific Islander",
"Am. Indian/ Native Alaskan"))
dat$sex <- factor(dat$sex+1L, label=c("female", "male"))
dat$build <- factor(dat$build, labels=c("heavy", "muscular",
"medium", "thin"))
dat$forceuse <- factor(dat$forceuse, labels =c("defense of other",
"defense of self", "overcome resistence", "other",
"suspected flight", "suspected weapon"))
dat$inout <- factor(dat$inout+1L, labels=c("outside", "inside"))
dat$trhsloc <- factor(dat$trhsloc+1L, labels=c("neither",
"housing authority", "transit authority"))
I copied the crimecodes from the variable description PDF into Excel and saved them in crimecodes.csv
crimecodes <- read.csv("crimecodes.csv", header = FALSE)
dat$detailcm <- factor(dat$detailcm, levels= crimecodes[,1],
labels=crimecodes[,2])
dat$pct <- as.factor(dat$pct) # use names instead?
dat$addrpct <- as.factor(dat$addrpct)
dat$sector <- as.factor(dat$sector)
there are 4 types of ID in the data!!! So I leave the 4th as unknown
dat$typeofid <- factor(dat$typeofid,
labels=c("photo id", "verbal id", "refused to provide id", "unknown"))
dat$repcmd <- as.factor(dat$repcmd)
dat$revcmd <- as.factor(dat$revcmd)
binary <- strsplit("frisked searched contrabn pistol riflshot asltweap knifcuti machgun othrweap arstmade sumissue sumoffen",
" ")[[1]]
for(b in binary) dat[[b]] <- as.logical(dat[[b]])
cs = reason for stop
for(b in grep("cs_", colnames(dat), value=TRUE)) dat[[b]] <- as.logical(dat[[b]])
rf = reason for frisk
for(b in grep("rf_", colnames(dat), value=TRUE)) dat[[b]] <- as.logical(dat[[b]])
sb = basis of search
for(b in grep("sb_", colnames(dat), value=TRUE)) dat[[b]] <- as.logical(dat[[b]])
ac = additional circumstance
for(b in grep("ac_", colnames(dat), value=TRUE)) dat[[b]] <- as.logical(dat[[b]])
pf = force used
for(b in grep("pf_", colnames(dat), value=TRUE)) dat[[b]] <- as.logical(dat[[b]])
dat$othpers <- as.logical(dat$othpers)
dat$explnstp <- as.logical(dat$explnstp)
If the officer is in uniform then the officer does not need to inform the person that he/she is an officer and show his/her shield.
dat$offunif <- as.logical(dat$offunif)
dat$offverb <- as.logical(dat$offverb)
dat$offverb[dat$offunif] <- NA
dat$officrid <- as.logical(dat$officrid)
dat$officrid[dat$offunif] <- NA
dat$offshld <- as.logical(dat$offshld)
dat$offshld[dat$offunif] <- NA
dat$radio <- as.logical(dat$radio)
Remove unused variables
dat$year <- NULL # we only have 2012
dat$haircolr <- NULL
dat$eyecolor <- NULL
dat$ser_num <- NULL
dat$othfeatr <- NULL
dat$arstoffn <- NULL
dat$crimsusp <- NULL
dat$premname <- NULL
dat$addrnum <- NULL
dat$stname <- NULL
dat$stinter <- NULL
dat$ crossst <- NULL
dat$beat <- NULL
dat$post <- NULL
dat$recstat <- NULL
dat$linecm <- NULL
Check summary of cleaned data
summary(dat)
## pct datestop timestop
## 75 : 24408 Min. :2012-01-01 Min. : 0.00
## 73 : 22148 1st Qu.:2012-02-29 1st Qu.:10.00
## 40 : 18276 Median :2012-05-05 Median :16.00
## 44 : 15414 Mean :2012-05-28 Mean :13.89
## 79 : 15294 3rd Qu.:2012-08-25 3rd Qu.:20.00
## 103 : 12986 Max. :2012-12-31 Max. :23.00
## (Other):424385
## city sex
## Manhattan :110366 female: 38062
## Brooklyn :188707 male :487065
## Bronx :102143 NA's : 7784
## Queens :110958
## Staten Island: 20732
## NA's : 5
##
## race age height
## Black :284229 Min. :10.00 Min. :40.0
## Black Hispanic : 35772 1st Qu.:19.00 1st Qu.:67.0
## White Hispanic :129368 Median :24.00 Median :69.0
## White : 50366 Mean :28.13 Mean :68.6
## Asian/Pacific Islander : 17058 3rd Qu.:34.00 3rd Qu.:71.0
## Am. Indian/ Native Alaskan: 2257 Max. :89.00 Max. :90.0
## NA's : 13861 NA's :2360 NA's :146
## weight build frisked searched
## Min. : 50.0 heavy : 43101 Mode :logical Mode :logical
## 1st Qu.:150.0 muscular: 2259 FALSE:235667 FALSE:488663
## Median :165.0 medium :308601 TRUE :297244 TRUE :44248
## Mean :168.9 thin :173552
## 3rd Qu.:180.0 NA's : 5398
## Max. :400.0
## NA's :1243
## contrabn pistol riflshot asltweap
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:523677 FALSE:532249 FALSE:532898 FALSE:532860
## TRUE :9234 TRUE :662 TRUE :13 TRUE :51
##
##
##
##
## knifcuti machgun othrweap arstmade
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:528206 FALSE:532908 FALSE:531589 FALSE:500596
## TRUE :4705 TRUE :3 TRUE :1322 TRUE :32315
##
##
##
##
## sumissue sumoffen detailcm
## Mode :logical Mode:logical CPW :129722
## FALSE:505472 NA's:532911 ROBBERY :116843
## TRUE :27439 BURGLARY : 60090
## GRAND LARCENY AUTO: 44639
## CRIMINAL TRESPASS : 37967
## (Other) :143648
## NA's : 2
## perobs perstop pf_hands pf_wall
## Min. : 1.000 Min. : 2.00 Mode :logical Mode :logical
## 1st Qu.: 1.000 1st Qu.:14.00 FALSE:462623 FALSE:520284
## Median : 1.000 Median :36.00 TRUE :70288 TRUE :12627
## Mean : 2.436 Mean :31.73
## 3rd Qu.: 2.000 3rd Qu.:47.00
## Max. :120.000 Max. :87.00
## NA's :3853 NA's :243
## pf_grnd pf_drwep pf_ptwep pf_baton
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:531089 FALSE:531611 FALSE:531678 FALSE:532854
## TRUE :1822 TRUE :1300 TRUE :1233 TRUE :57
##
##
##
##
## pf_hcuff pf_pepsp pf_other cs_objcs
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:513665 FALSE:532854 FALSE:529421 FALSE:519463
## TRUE :19246 TRUE :57 TRUE :3490 TRUE :13448
##
##
##
##
## cs_descr cs_casng cs_lkout cs_cloth
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:444997 FALSE:343055 FALSE:432273 FALSE:507914
## TRUE :87914 TRUE :189856 TRUE :100638 TRUE :24997
##
##
##
##
## cs_drgtr cs_furtv cs_vcrim cs_bulge
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:493933 FALSE:258266 FALSE:474002 FALSE:493989
## TRUE :38978 TRUE :274645 TRUE :58909 TRUE :38922
##
##
##
##
## cs_other rf_vcrim rf_othsw rf_attir
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:449598 FALSE:463955 FALSE:515019 FALSE:493450
## TRUE :83313 TRUE :68956 TRUE :17892 TRUE :39461
##
##
##
##
## rf_vcact rf_rfcmp rf_verbl rf_knowl
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:489273 FALSE:490633 FALSE:529549 FALSE:522919
## TRUE :43638 TRUE :42278 TRUE :3362 TRUE :9992
##
##
##
##
## rf_furt rf_bulg sb_hdobj sb_outln
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:323411 FALSE:495579 FALSE:509706 FALSE:530116
## TRUE :209500 TRUE :37332 TRUE :23205 TRUE :2795
##
##
##
##
## sb_admis sb_other ac_proxm ac_evasv
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:531450 FALSE:513967 FALSE:412500 FALSE:431677
## TRUE :1461 TRUE :18944 TRUE :120411 TRUE :101234
##
##
##
##
## ac_assoc ac_cgdir ac_incid ac_time
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:508725 FALSE:405926 FALSE:213049 FALSE:299354
## TRUE :24186 TRUE :126985 TRUE :319862 TRUE :233557
##
##
##
##
## ac_stsnd ac_other forceuse
## Mode :logical Mode :logical defense of other : 1016
## FALSE:519278 FALSE:514585 defense of self : 30733
## TRUE :13633 TRUE :18326 overcome resistence: 2120
## other : 14421
## suspected flight : 13941
## suspected weapon : 5071
## NA's :465609
## inout trhsloc addrpct
## outside:417377 neither :413990 75 : 24408
## inside :115534 housing authority: 77373 73 : 22148
## transit authority: 41548 40 : 18276
## 44 : 15414
## 79 : 15294
## (Other):437366
## NA's : 5
## sector xcoord ycoord
## A : 55505 Min. : 913844 Min. :121152
## E : 51528 1st Qu.: 996734 1st Qu.:183094
## B : 51467 Median :1005807 Median :198317
## H : 48822 Mean :1006808 Mean :205079
## C : 48820 3rd Qu.:1016476 3rd Qu.:234311
## G : 46189 Max. :1067249 Max. :271882
## (Other):230580 NA's :15585 NA's :15585
## typeofid othpers explnstp
## photo id :289131 Mode :logical Mode :logical
## verbal id :229801 FALSE:430923 FALSE:376
## refused to provide id: 8149 TRUE :101988 TRUE :532535
## unknown : 5830
##
##
##
## repcmd revcmd offunif offverb
## 165 : 35820 165 : 29202 Mode :logical Mode :logical
## 163 : 27662 163 : 23044 FALSE:131852 FALSE:28973
## 802 : 13211 802 : 13197 TRUE :401059 TRUE :102879
## 164 : 12322 120 : 11149 NA's :401059
## 162 : 11178 162 : 10791
## (Other):432714 101 : 9134
## NA's : 4 (Other):436394
## officrid offshld ac_rept ac_inves
## Mode :logical Mode :logical Mode :logical Mode :logical
## FALSE:125452 FALSE:1843 FALSE:465902 FALSE:458488
## TRUE :6400 TRUE :130009 TRUE :67009 TRUE :74423
## NA's :401059 NA's :401059
##
##
##
## radio
## Mode :logical
## FALSE:403875
## TRUE :129036
##
##
##
##
Save cleaned data
save(dat, file="SFQ_clean.rda")
#load("SFQ_clean.rda")
oldpar <- par("mar" = c(6,4,4,2)+.1)
barplot(table(dat$city), ylab="Number of Stops", las=3)
par(oldpar)
Compare to population (from Wikipedia)
pop <- c(Manhattan =1626159, Brooklyn = 2592149, Bronx = 1418733,
Queens =2296175, 'Staten Island'=472621)
oldpar <- par("mar" = c(6,4,4,2)+.1)
barplot(table(dat$city)/pop*100, ylab="Stops in % of Population",
las=3, ylim=c(0,10))
par(oldpar)
tbl <- table(dat$race)
names(tbl) <- abbreviate(names(tbl), 8)
barplot(tbl, ylab="Number of Stops", las=3)
Compare to NYC population (from Wikipedia)
pop <- c(White=44.6, Black=25.1, Hispanic=27.5, Other=11.8)
sum(pop)
## [1] 109
does not add up to 100
pop <- pop/sum(pop)
aggregate data so the groups match the population data
tbl <- table(dat$race)
tbl <- c(tbl["White"], tbl["Black"],
tbl["White Hispanic"]+tbl["Black Hispanic"],
tbl["Asian/Pacific Islander"]+tbl["Am. Indian/ Native Alaskan"])
names(tbl) <- c("White", "Black", "Hispanic", "Other")
tbl <- tbl/sum(tbl)
barplot((rbind(pop, tbl)*100), beside = TRUE, ylab="Proportion [%]",
col=gray.colors(2), ylim=c(0,70), main = "Stops in NYC")
legend(x = "topright", legend = c("Population", "Stops"),
fill=gray.colors(2), bty="n")
if(! "vcd" %in% installed.packages()) install.packages("vcd", depend = TRUE)
library(vcd)
## Loading required package: grid
assoc(~ sex + race, data=dat, shade=TRUE, abbreviate_labs=6)
assoc(~ sex + forceuse, data=dat, shade=TRUE,
labeling_args = list(rot_labels = c(25, 90, 0, 90), varnames=FALSE))
assoc(~ arstmade + sex, data=dat, shade=TRUE)
assoc(~ arstmade + race, data=dat, shade=TRUE, abbreviate_labs=6)
Installing package proj4 is somewhat more complicated. You can skip this part.
if(! "ggmap" %in% installed.packages()) install.packages("ggmap", depend = TRUE)
library(ggmap)
## Loading required package: ggplot2
NYC <- get_map("New York City", zoom=11)
## Map from URL : http://maps.googleapis.com/maps/api/staticmap?center=New+York+City&zoom=11&size=640x640&scale=2&maptype=terrain&language=en-EN&sensor=false
## Information from URL : http://maps.googleapis.com/maps/api/geocode/json?address=New%20York%20City&sensor=false
map <- ggmap(NYC)
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning in structure(NULL, class = "waiver"): Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
coords <- dat[, c("xcoord", "ycoord")]
# EPSG Projection 2263 - NAD83 / New York Long Island (ftUS)
# WGS84 Bounds: -74.2700, 40.4700, -71.7500, 41.3100
# Projected Bounds: 909126.0155, 110626.2880, 1610215.3590, 424498.0529
if(! "proj4" %in% installed.packages()) install.packages("proj4", depend = TRUE)
library(proj4) ### needs PROJ.4 installed (http://trac.osgeo.org/proj/)
c2 <- project(coords, inverse=TRUE, proj="+proj=lcc +lat_1=41.03333333333333 +lat_2=40.66666666666666 +lat_0=40.16666666666666 +lon_0=-74 +x_0=300000.0000000001 +y_0=0 +ellps=GRS80 +datum=NAD83 +to_meter=0.3048006096012192 +no_defs")
coords <- data.frame(lon=c2[[1]], lat=c2[[2]])
d2 <- coords
d2$detailcm <- dat$detailcm
d2 <- d2[d2$detailcm == "ROBBERY",]
#d2 <- d2[d2$detailcm == "CPW",]
#d2 <- d2[d2$detailcm =="MAKING GRAFFITI",]
d2 <- na.omit(d2)
# uses ggplot style plots
#map + geom_point(aes(x = lon, y = lat,
# colour = detailcm), data= d2)
map +
stat_density2d(
aes(x = lon, y = lat, fill = ..level.., alpha = ..level..),
size = 1, bins = 10, data = d2,
geom = "polygon"
)
## Warning: Removed 6769 rows containing non-finite values (stat_density2d).
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
## Warning: Calling 'structure(NULL, *)' is deprecated, as NULL cannot have attributes.
## Consider 'structure(list(), *)' instead.
if(! "arules" %in% installed.packages()) install.packages("arules", depend = TRUE)
if(! "arulesViz" %in% installed.packages()) install.packages("arulesViz", depend = TRUE)
library(arules)
## Loading required package: Matrix
##
## Attaching package: 'arules'
## The following objects are masked from 'package:base':
##
## abbreviate, write
library(arulesViz)
#load("SFQ_clean.rda")
d <- dat[, c(
grep("rf_", colnames(dat), value = TRUE),
grep("cs_", colnames(dat), value = TRUE),
grep("ac_", colnames(dat), value = TRUE),
grep("pf_", colnames(dat), value = TRUE),
"arstmade", "sumissue", "detailcm", "race",
"pct",
#"city", ### city and precinct are related
"typeofid", "othpers"
)]
d$female <- dat$sex == "female"
#d$detailcm[!(d$arstmade | d$sumissue)] <- NA
d$weapon <- dat$pistol | dat$riflshot | dat$asltweap |
dat$knifcuti | dat$machgun | dat$othrweap
d$no_uniform <- !dat$offunif
d$inside <- dat$inout == "inside"
d$trhsloc <- dat$trhsloc
d$trhsloc[dat$trhsloc == "neither"] <- NA
Continuous variables need to be discretized!
d$minor <- dat$age<18
d$height <- discretize(dat$height, method = "frequency", 3)
trans <- as(d, "transactions")
trans
## transactions in sparse format with
## 532911 transactions (rows) and
## 251 items (columns)
summary(trans)
## transactions as itemMatrix in sparse format with
## 532911 rows (elements/itemsets/transactions) and
## 251 columns (items) and a density of 0.04409263
##
## most frequent items:
## ac_incid typeofid=photo id race=Black cs_furtv
## 319862 289131 284229 274645
## ac_time (Other)
## 233557 4496435
##
## element (itemset/transaction) length distribution:
## sizes
## 4 5 6 7 8 9 10 11 12 13 14 15
## 1 203 5820 28460 55685 78513 86677 77282 62409 46353 32152 21577
## 16 17 18 19 20 21 22 23 24 25 26 27
## 13658 8938 5601 3631 2272 1410 917 555 337 186 127 61
## 28 29 30 31 32 33 34
## 45 19 8 8 4 1 1
##
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 4.00 9.00 11.00 11.07 13.00 34.00
##
## includes extended item information - examples:
## labels variables levels
## 1 rf_vcrim rf_vcrim TRUE
## 2 rf_othsw rf_othsw TRUE
## 3 rf_attir rf_attir TRUE
##
## includes extended transaction information - examples:
## transactionID
## 1 1
## 2 2
## 3 3
The conversion to transactions created items (binary dummy variables for nominal values)
dim(d)
## [1] 532911 52
dim(trans)
## [1] 532911 251
itemLabels(trans)
## [1] "rf_vcrim"
## [2] "rf_othsw"
## [3] "rf_attir"
## [4] "rf_vcact"
## [5] "rf_rfcmp"
## [6] "rf_verbl"
## [7] "rf_knowl"
## [8] "rf_furt"
## [9] "rf_bulg"
## [10] "cs_objcs"
## [11] "cs_descr"
## [12] "cs_casng"
## [13] "cs_lkout"
## [14] "cs_cloth"
## [15] "cs_drgtr"
## [16] "cs_furtv"
## [17] "cs_vcrim"
## [18] "cs_bulge"
## [19] "cs_other"
## [20] "ac_proxm"
## [21] "ac_evasv"
## [22] "ac_assoc"
## [23] "ac_cgdir"
## [24] "ac_incid"
## [25] "ac_time"
## [26] "ac_stsnd"
## [27] "ac_other"
## [28] "ac_rept"
## [29] "ac_inves"
## [30] "pf_hands"
## [31] "pf_wall"
## [32] "pf_grnd"
## [33] "pf_drwep"
## [34] "pf_ptwep"
## [35] "pf_baton"
## [36] "pf_hcuff"
## [37] "pf_pepsp"
## [38] "pf_other"
## [39] "arstmade"
## [40] "sumissue"
## [41] "detailcm=ABANDONMENT OF A CHILD"
## [42] "detailcm=ABORTION"
## [43] "detailcm=ABSCONDING"
## [44] "detailcm=ADULTERY"
## [45] "detailcm=AGGRAVATED ASSAULT"
## [46] "detailcm=AGGRAVATED HARASSMENT"
## [47] "detailcm=AGGRAVATED SEXUAL ABUSE"
## [48] "detailcm=ARSON"
## [49] "detailcm=ASSAULT"
## [50] "detailcm=AUTO STRIPPING"
## [51] "detailcm=BIGAMY"
## [52] "detailcm=BRIBE RECEIVING"
## [53] "detailcm=BRIBERY"
## [54] "detailcm=BURGLARY"
## [55] "detailcm=COERCION"
## [56] "detailcm=COMPUTER TAMPERING"
## [57] "detailcm=COMPUTER TRESPASS"
## [58] "detailcm=COURSE OF SEXUAL CONDUCT"
## [59] "detailcm=CPSP"
## [60] "detailcm=CPW"
## [61] "detailcm=CREATING A HAZARD"
## [62] "detailcm=CRIMINAL CONTEMPT"
## [63] "detailcm=CRIMINAL MISCHIEF"
## [64] "detailcm=CRIMINAL POSSESION OF CONTROLLED SUBSTANCE"
## [65] "detailcm=CRIMINAL POSSESSION OF COMPUTER MATERIAL"
## [66] "detailcm=CRIMINAL POSSESSION OF FORGED INSTRUMENT"
## [67] "detailcm=CRIMINAL POSSESSION OF MARIHUANA"
## [68] "detailcm=CRIMINAL SALE OF CONTROLLED SUBSTANCE"
## [69] "detailcm=CRIMINAL SALE OF MARIHUANA"
## [70] "detailcm=CRIMINAL TAMPERING"
## [71] "detailcm=CRIMINAL TRESPASS"
## [72] "detailcm=CUSTODIAL INTERFERENCE"
## [73] "detailcm=EAVES DROPPING"
## [74] "detailcm=ENDANGER THE WELFARE OF A CHILD"
## [75] "detailcm=ESCAPE"
## [76] "detailcm=FALSIFY BUSINESS RECORDS"
## [77] "detailcm=FORGERY"
## [78] "detailcm=FORGERY OF A VIN"
## [79] "detailcm=FORTUNE TELLING"
## [80] "detailcm=FRAUD"
## [81] "detailcm=FRAUDULENT ACCOSTING"
## [82] "detailcm=FRAUDULENT MAKE ELECTRONIC ACCESS DEVICE"
## [83] "detailcm=FRAUDULENT OBTAINING A SIGNATURE"
## [84] "detailcm=GAMBLING"
## [85] "detailcm=GRAND LARCENY"
## [86] "detailcm=GRAND LARCENY AUTO"
## [87] "detailcm=HARASSMENT"
## [88] "detailcm=HAZING"
## [89] "detailcm=HINDERING PROSECUTION"
## [90] "detailcm=INCEST"
## [91] "detailcm=INSURANCE FRAUD"
## [92] "detailcm=ISSUE A FALSE CERTIFICATE"
## [93] "detailcm=ISSUE A FALSE FINANCIAL STATEMENT"
## [94] "detailcm=ISSUING ABORTION ARTICLES"
## [95] "detailcm=JOSTLING"
## [96] "detailcm=KIDNAPPING"
## [97] "detailcm=KILLING OR INJURING A POILCE ANIMAL"
## [98] "detailcm=LOITERING"
## [99] "detailcm=MAKING GRAFFITI"
## [100] "detailcm=MENACING"
## [101] "detailcm=MISAPPLICATION OF PROPERTY"
## [102] "detailcm=MURDER"
## [103] "detailcm=OBSCENITY"
## [104] "detailcm=OBSTRUCTING FIREFIGHTING OPERATIONS"
## [105] "detailcm=OBSTRUCTING GOVERNMENTAL ADMINISTRATION"
## [106] "detailcm=OFFERING A FALSE INSTRUMENT"
## [107] "detailcm=OFFICIAL MISCONDUCT"
## [108] "detailcm=PETIT LARCENY"
## [109] "detailcm=POSSESSION OF BURGLAR TOOLS"
## [110] "detailcm=POSSESSION OF EAVES DROPPING DEVICES"
## [111] "detailcm=POSSESSION OF GRAFFITI INSTRUMENTS"
## [112] "detailcm=PROHIBITED USE OF WEAPON"
## [113] "detailcm=PROMOTING SUICIDE"
## [114] "detailcm=PROSTITUTION"
## [115] "detailcm=PUBLIC DISPLAY OF OFFENSIVE SEXUAL MATERIAL"
## [116] "detailcm=PUBLIC LEWDNESS"
## [117] "detailcm=RAPE"
## [118] "detailcm=RECKLESS ENDANGERMENT"
## [119] "detailcm=RECKLESS ENDANGERMENT PROPERTY"
## [120] "detailcm=REFUSING TO AID A PEACE OR POLICE OFFICER"
## [121] "detailcm=RENT GOUGING"
## [122] "detailcm=RESISTING ARREST"
## [123] "detailcm=REWARD OFFICIAL MISCONDUCT"
## [124] "detailcm=RIOT"
## [125] "detailcm=ROBBERY"
## [126] "detailcm=SELF ABORTION"
## [127] "detailcm=SEXUAL ABUSE"
## [128] "detailcm=SEXUAL MISCONDUCT"
## [129] "detailcm=SEXUAL PERFORMANCE BY A CHILD"
## [130] "detailcm=SODOMY"
## [131] "detailcm=SUBSTITUTION OF CHILDREN"
## [132] "detailcm=TAMPERING WITH A PUBLIC RECORD"
## [133] "detailcm=TAMPERING WITH CONSUMER PRODUCT"
## [134] "detailcm=TAMPERING WITH PRIVATE COMMUNICATIONS"
## [135] "detailcm=TERRORISM"
## [136] "detailcm=THEFT OF SERVICES"
## [137] "detailcm=TRADEMARK COUNTERFEITING"
## [138] "detailcm=UNLAWFULLY DEALING WITH FIREWORKS"
## [139] "detailcm=UNAUTHORIZED RECORDING"
## [140] "detailcm=UNAUTHORIZED USE OF A VEHICLE"
## [141] "detailcm=UNAUTHORIZED USE OF COMPUTER"
## [142] "detailcm=UNLAWFUL ASSEMBLY"
## [143] "detailcm=UNLAWFUL DUPLICATION OF COMPUTER MATERIAL"
## [144] "detailcm=UNLAWFUL POSSESSION OF RADIO DEVICES"
## [145] "detailcm=UNLAWFUL USE OF CREDIT CARD, DEBIT CARD"
## [146] "detailcm=UNLAWFUL USE OF SECRET SCIENTIFIC MATERIAL"
## [147] "detailcm=UNLAWFUL WEARING A BODY VEST"
## [148] "detailcm=UNLAWFULL IMPRISONMENT"
## [149] "detailcm=UNLAWFULLY DEALING WITH A CHILD"
## [150] "detailcm=UNLAWFULLY USE SLUGS"
## [151] "detailcm=VEHICULAR ASSAULT"
## [152] "detailcm=OTHER"
## [153] "detailcm=FORCIBLE TOUCHINGFurther Notes"
## [154] "race=Black"
## [155] "race=Black Hispanic"
## [156] "race=White Hispanic"
## [157] "race=White"
## [158] "race=Asian/Pacific Islander"
## [159] "race=Am. Indian/ Native Alaskan"
## [160] "pct=1"
## [161] "pct=5"
## [162] "pct=6"
## [163] "pct=7"
## [164] "pct=9"
## [165] "pct=10"
## [166] "pct=13"
## [167] "pct=14"
## [168] "pct=17"
## [169] "pct=18"
## [170] "pct=19"
## [171] "pct=20"
## [172] "pct=22"
## [173] "pct=23"
## [174] "pct=24"
## [175] "pct=25"
## [176] "pct=26"
## [177] "pct=28"
## [178] "pct=30"
## [179] "pct=32"
## [180] "pct=33"
## [181] "pct=34"
## [182] "pct=40"
## [183] "pct=41"
## [184] "pct=42"
## [185] "pct=43"
## [186] "pct=44"
## [187] "pct=45"
## [188] "pct=46"
## [189] "pct=47"
## [190] "pct=48"
## [191] "pct=49"
## [192] "pct=50"
## [193] "pct=52"
## [194] "pct=60"
## [195] "pct=61"
## [196] "pct=62"
## [197] "pct=63"
## [198] "pct=66"
## [199] "pct=67"
## [200] "pct=68"
## [201] "pct=69"
## [202] "pct=70"
## [203] "pct=71"
## [204] "pct=72"
## [205] "pct=73"
## [206] "pct=75"
## [207] "pct=76"
## [208] "pct=77"
## [209] "pct=78"
## [210] "pct=79"
## [211] "pct=81"
## [212] "pct=83"
## [213] "pct=84"
## [214] "pct=88"
## [215] "pct=90"
## [216] "pct=94"
## [217] "pct=100"
## [218] "pct=101"
## [219] "pct=102"
## [220] "pct=103"
## [221] "pct=104"
## [222] "pct=105"
## [223] "pct=106"
## [224] "pct=107"
## [225] "pct=108"
## [226] "pct=109"
## [227] "pct=110"
## [228] "pct=111"
## [229] "pct=112"
## [230] "pct=113"
## [231] "pct=114"
## [232] "pct=115"
## [233] "pct=120"
## [234] "pct=122"
## [235] "pct=123"
## [236] "typeofid=photo id"
## [237] "typeofid=verbal id"
## [238] "typeofid=refused to provide id"
## [239] "typeofid=unknown"
## [240] "othpers"
## [241] "female"
## [242] "weapon"
## [243] "no_uniform"
## [244] "inside"
## [245] "trhsloc=neither"
## [246] "trhsloc=housing authority"
## [247] "trhsloc=transit authority"
## [248] "minor"
## [249] "height=[40,68)"
## [250] "height=[68,71)"
## [251] "height=[71,90]"
as(trans[1:2, 1:10], "matrix")
## rf_vcrim rf_othsw rf_attir rf_vcact rf_rfcmp rf_verbl rf_knowl rf_furt
## 1 TRUE FALSE FALSE TRUE TRUE FALSE FALSE TRUE
## 2 FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE
## rf_bulg cs_objcs
## 1 FALSE FALSE
## 2 FALSE FALSE
itemFrequencyPlot(trans, topN=50, cex.names=.5)
We use positively correlated (Phi coefficient) items. dissimilarity() converts the correlation into distances. Note: we use sample to speed up and plot as large PDF
d <- dissimilarity(sample(trans, 50000), method = "phi", which = "items")
d[is.na(d)] <- 1 # get rid of missing values
pdf(file="similarity.pdf", width=25)
plot(hclust(d), cex=.5)
dev.off()
## png
## 2
Click here to see the resulting dendrogram.
trans_female <- subset(trans, items %in% "female")
itemFrequencyPlot(trans_female, topN = 25, population = trans, cex.names=.5)
itemFrequencyPlot(trans_female, topN = 25, population = trans, lift=TRUE, cex.names=.5)
trans_minor <- subset(trans, items %in% "minor")
itemFrequencyPlot(trans_minor, topN = 25, population = trans, lift=TRUE, cex.names=.5)
trans_white <- subset(trans, items %in% "race=White")
itemFrequencyPlot(trans_white, topN = 25, population = trans, lift=TRUE, cex.names=.5)
trans_black <- subset(trans, items %in% "race=Black")
itemFrequencyPlot(trans_black, topN = 25, population = trans, lift=TRUE, cex.names=.5)
Note: most of the data is stops for this race, therefore the lift is not very high!
Find an interesting support (have at least 500 observations)
nrow(trans)
## [1] 532911
500/nrow(trans)
## [1] 0.000938243
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[241 item(s), 532911 transaction(s)] done [0.37s].
## sorting and recoding items ... [158 item(s)] done [0.08s].
## creating transaction tree ... done [0.57s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.44s].
## writing ... [135073 set(s)] done [0.01s].
## creating S4 object ... done [0.20s].
inspect(head(sort(itemsets), n=10))
## items support count
## [1] {ac_incid,ac_time} 0.3567106 190095
## [2] {ac_incid,race=Black} 0.3304304 176090
## [3] {cs_furtv,ac_incid} 0.3293908 175536
## [4] {ac_incid,typeofid=photo id} 0.3218417 171513
## [5] {rf_furt,cs_furtv} 0.3003184 160043
## [6] {cs_furtv,race=Black} 0.2880931 153528
## [7] {race=Black,typeofid=photo id} 0.2734303 145714
## [8] {cs_furtv,typeofid=photo id} 0.2694671 143602
## [9] {ac_incid,typeofid=verbal id} 0.2627099 140001
## [10] {cs_furtv,ac_time} 0.2500080 133232
Add an additional quality measure
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_hands,
## pf_grnd,
## pf_hcuff,
## arstmade} 0.001077103 574 1090.6910
## [2] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [3] {pf_hands,
## pf_hcuff,
## arstmade,
## weapon} 0.001285393 685 368.4790
## [4] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [5] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [6] {rf_bulg,
## cs_bulge,
## arstmade,
## weapon} 0.001150286 613 306.9926
## [7] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [8] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [9] {pf_hcuff,
## arstmade,
## detailcm=CPW,
## weapon} 0.001868980 996 290.3014
## [10] {pf_hands,
## pf_wall,
## pf_hcuff,
## arstmade} 0.001833327 977 267.8753
Plot itemsets as a graph. Different subgroups with items that are related to each other can be identified.
plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", control=list(cex=.8))
#plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", interactive = TRUE)
remove pf_hcuff=TRUE (always with arrest)
trans <- trans[,-pmatch("pf_hcuff", colnames(trans))]
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[240 item(s), 532911 transaction(s)] done [0.38s].
## sorting and recoding items ... [157 item(s)] done [0.08s].
## creating transaction tree ... done [0.55s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.41s].
## writing ... [131662 set(s)] done [0.01s].
## creating S4 object ... done [0.20s].
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [2] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [3] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [4] {rf_bulg,
## cs_bulge,
## arstmade,
## weapon} 0.001150286 613 306.9926
## [5] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [6] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [7] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY,
## trhsloc=transit authority} 0.001232851 657 267.1288
## [8] {pf_other,
## detailcm=GRAND LARCENY,
## inside,
## trhsloc=transit authority} 0.001163421 620 188.2330
## [9] {rf_othsw,
## arstmade,
## detailcm=CPW,
## weapon} 0.001097744 585 183.4118
## [10] {detailcm=GRAND LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001193445 636 175.9494
plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", control=list(cex=.8))
remove rf_bulg (always with cs_bulge)
trans <- trans[,-pmatch("rf_bulg", colnames(trans))]
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[239 item(s), 532911 transaction(s)] done [0.36s].
## sorting and recoding items ... [156 item(s)] done [0.08s].
## creating transaction tree ... done [0.55s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.34s].
## writing ... [127176 set(s)] done [0.01s].
## creating S4 object ... done [0.20s].
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [2] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [3] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [4] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [5] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [6] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY,
## trhsloc=transit authority} 0.001232851 657 267.1288
## [7] {pf_other,
## detailcm=GRAND LARCENY,
## inside,
## trhsloc=transit authority} 0.001163421 620 188.2330
## [8] {rf_othsw,
## arstmade,
## detailcm=CPW,
## weapon} 0.001097744 585 183.4118
## [9] {detailcm=GRAND LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001193445 636 175.9494
## [10] {detailcm=PETIT LARCENY,
## pct=14,
## no_uniform,
## trhsloc=transit authority} 0.001379217 735 167.8902
plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", control=list(cex=.8))
remove detailcm=CPW (always with weapon)
trans <- trans[,-pmatch("detailcm=CPW", colnames(trans))]
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[238 item(s), 532911 transaction(s)] done [0.36s].
## sorting and recoding items ... [155 item(s)] done [0.07s].
## creating transaction tree ... done [0.54s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.20s].
## writing ... [117120 set(s)] done [0.01s].
## creating S4 object ... done [0.19s].
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [2] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [3] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [4] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [5] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [6] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY,
## trhsloc=transit authority} 0.001232851 657 267.1288
## [7] {pf_other,
## detailcm=GRAND LARCENY,
## inside,
## trhsloc=transit authority} 0.001163421 620 188.2330
## [8] {detailcm=GRAND LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001193445 636 175.9494
## [9] {detailcm=PETIT LARCENY,
## pct=14,
## no_uniform,
## trhsloc=transit authority} 0.001379217 735 167.8902
## [10] {detailcm=GRAND LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002634586 1404 166.3092
plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", control=list(cex=.8))
remove rf_vcrim and rf_vcact (always with cs_vcrim)
trans <- trans[,-pmatch("rf_vcrim", colnames(trans))]
trans <- trans[,-pmatch("rf_vcact", colnames(trans))]
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[236 item(s), 532911 transaction(s)] done [0.36s].
## sorting and recoding items ... [153 item(s)] done [0.07s].
## creating transaction tree ... done [0.54s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.07s].
## writing ... [103189 set(s)] done [0.01s].
## creating S4 object ... done [0.19s].
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [2] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [3] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [4] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [5] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [6] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY,
## trhsloc=transit authority} 0.001232851 657 267.1288
## [7] {pf_other,
## detailcm=GRAND LARCENY,
## inside,
## trhsloc=transit authority} 0.001163421 620 188.2330
## [8] {detailcm=GRAND LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001193445 636 175.9494
## [9] {detailcm=PETIT LARCENY,
## pct=14,
## no_uniform,
## trhsloc=transit authority} 0.001379217 735 167.8902
## [10] {detailcm=GRAND LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002634586 1404 166.3092
plot(head(sort(itemsets, by = "lift"), n=50), method="graph", control=list(cex=.8))
remove cs_drgtr=TRUE (always with controlled substance)
trans <- trans[,-pmatch("cs_drgtr", colnames(trans))]
itemsets <- apriori(trans, parameter = list(target = "frequent",
supp=0.001, minlen = 2, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## NA 0.1 1 none FALSE TRUE 5 0.001 2
## maxlen target ext
## 4 frequent itemsets FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[235 item(s), 532911 transaction(s)] done [0.35s].
## sorting and recoding items ... [152 item(s)] done [0.07s].
## creating transaction tree ... done [0.56s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(target = "frequent", supp =
## 0.001, : Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.04s].
## writing ... [99704 set(s)] done [0.01s].
## creating S4 object ... done [0.19s].
quality(itemsets)$lift <- interestMeasure(itemsets, measure="lift", trans = trans)
inspect(head(sort(itemsets, by = "lift"), n=10))
## items support count lift
## [1] {pf_other,
## detailcm=PETIT LARCENY,
## inside,
## trhsloc=transit authority} 0.001769526 943 630.0595
## [2] {detailcm=PETIT LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002516368 1341 349.5777
## [3] {detailcm=PETIT LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001026438 547 333.0304
## [4] {detailcm=GRAND LARCENY,
## pct=18,
## inside,
## trhsloc=transit authority} 0.001595013 850 302.4293
## [5] {ac_other,
## pf_other,
## inside,
## trhsloc=transit authority} 0.001133398 604 297.7474
## [6] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY,
## trhsloc=transit authority} 0.001232851 657 267.1288
## [7] {pf_other,
## detailcm=GRAND LARCENY,
## inside,
## trhsloc=transit authority} 0.001163421 620 188.2330
## [8] {detailcm=GRAND LARCENY,
## pct=13,
## inside,
## trhsloc=transit authority} 0.001193445 636 175.9494
## [9] {detailcm=PETIT LARCENY,
## pct=14,
## no_uniform,
## trhsloc=transit authority} 0.001379217 735 167.8902
## [10] {detailcm=GRAND LARCENY,
## pct=14,
## inside,
## trhsloc=transit authority} 0.002634586 1404 166.3092
plot(head(sort(itemsets, by = "lift"), n=50), method = "graph", control=list(cex=.7))
r <- apriori(trans, parameter = list(supp=0.001, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.001 1
## maxlen target ext
## 4 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 532
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[235 item(s), 532911 transaction(s)] done [0.36s].
## sorting and recoding items ... [152 item(s)] done [0.07s].
## creating transaction tree ... done [0.55s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(supp = 0.001, maxlen = 4)):
## Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.01s].
## writing ... [10964 rule(s)] done [0.01s].
## creating S4 object ... done [0.17s].
inspect(head(sort(r, by="lift"), n=10))
## lhs rhs support confidence lift count
## [1] {rf_othsw,
## detailcm=CRIMINAL TRESPASS,
## trhsloc=housing authority} => {arstmade} 0.001208457 0.8224777 13.56359 644
## [2] {rf_furt,
## weapon,
## no_uniform} => {arstmade} 0.001955298 0.8140625 13.42481 1042
## [3] {ac_cgdir,
## weapon,
## no_uniform} => {arstmade} 0.001063968 0.8088445 13.33876 567
## [4] {cs_furtv,
## weapon,
## no_uniform} => {arstmade} 0.001837080 0.8077558 13.32081 979
## [5] {cs_casng,
## pf_other,
## detailcm=PETIT LARCENY} => {trhsloc=transit authority} 0.001232851 0.9969651 12.78747 657
## [6] {pf_other,
## detailcm=GRAND LARCENY,
## inside} => {trhsloc=transit authority} 0.001163421 0.9967846 12.78515 620
## [7] {pf_other,
## detailcm=PETIT LARCENY,
## inside} => {trhsloc=transit authority} 0.001769526 0.9957761 12.77222 943
## [8] {pf_other,
## detailcm=PETIT LARCENY,
## race=Black} => {trhsloc=transit authority} 0.001065844 0.9826990 12.60448 568
## [9] {pf_other,
## detailcm=PETIT LARCENY,
## typeofid=photo id} => {trhsloc=transit authority} 0.001110880 0.9817579 12.59241 592
## [10] {detailcm=GRAND LARCENY,
## pct=13,
## inside} => {trhsloc=transit authority} 0.001193445 0.9814815 12.58887 636
Default is a scatter plot. Dark (red) dots are interesting since they represent rules with high lift.
plot(r)
Try interactive plot
#plot(r, interactive=TRUE)
100/nrow(trans)
## [1] 0.0001876486
r <- apriori(trans, parameter = list(supp=0.00019, maxlen=4))
## Apriori
##
## Parameter specification:
## confidence minval smax arem aval originalSupport maxtime support minlen
## 0.8 0.1 1 none FALSE TRUE 5 0.00019 1
## maxlen target ext
## 4 rules FALSE
##
## Algorithmic control:
## filter tree heap memopt load sort verbose
## 0.1 TRUE TRUE FALSE TRUE 2 TRUE
##
## Absolute minimum support count: 101
##
## set item appearances ...[0 item(s)] done [0.00s].
## set transactions ...[235 item(s), 532911 transaction(s)] done [0.37s].
## sorting and recoding items ... [171 item(s)] done [0.07s].
## creating transaction tree ... done [0.55s].
## checking subsets of size 1 2 3 4
## Warning in apriori(trans, parameter = list(supp = 0.00019, maxlen =
## 4)): Mining stopped (maxlen reached). Only patterns up to a length of 4
## returned!
## done [2.25s].
## writing ... [32331 rule(s)] done [0.03s].
## creating S4 object ... done [0.18s].
inspect(head(sort(r, by="lift"), n=10))
## lhs rhs support confidence lift count
## [1] {ac_incid,
## detailcm=TERRORISM,
## no_uniform} => {pct=1} 0.0001989075 0.9464286 165.20216 106
## [2] {cs_casng,
## detailcm=TERRORISM,
## no_uniform} => {pct=1} 0.0002214253 0.8027211 140.11756 118
## [3] {cs_furtv,
## detailcm=FRAUDULENT ACCOSTING,
## typeofid=verbal id} => {pct=14} 0.0002645845 0.9400000 56.00183 141
## [4] {ac_evasv,
## detailcm=FRAUDULENT ACCOSTING,
## typeofid=verbal id} => {pct=14} 0.0002101664 0.9105691 54.24844 112
## [5] {cs_furtv,
## ac_incid,
## detailcm=FRAUDULENT ACCOSTING} => {pct=14} 0.0003190026 0.8585859 51.15146 170
## [6] {ac_incid,
## detailcm=FRAUDULENT ACCOSTING,
## typeofid=verbal id} => {pct=14} 0.0003283850 0.8536585 50.85791 175
## [7] {cs_furtv,
## ac_evasv,
## detailcm=FRAUDULENT ACCOSTING} => {pct=14} 0.0002608315 0.8527607 50.80442 139
## [8] {detailcm=FRAUDULENT ACCOSTING,
## typeofid=verbal id} => {pct=14} 0.0004428507 0.8368794 49.85827 236
## [9] {cs_furtv,
## detailcm=FRAUDULENT ACCOSTING} => {pct=14} 0.0004315918 0.8333333 49.64701 230
## [10] {cs_furtv,
## detailcm=FRAUDULENT ACCOSTING,
## no_uniform} => {pct=14} 0.0001914016 0.8292683 49.40483 102
plot(r)
Try interactive plot
#plot(r, interactive=TRUE)
r_female <- subset(r, subset = items %in% "female")
r_female
## set of 1472 rules
inspect(head(sort(r_female, by="lift"), 10))
## lhs rhs support confidence lift count
## [1] {rf_othsw,
## detailcm=CRIMINAL TRESPASS,
## female} => {arstmade} 0.0002420667 0.8835616 14.57093 129
## [2] {pf_other,
## detailcm=PETIT LARCENY,
## female} => {trhsloc=transit authority} 0.0002552021 0.9645390 12.37156 136
## [3] {detailcm=TERRORISM,
## female,
## inside} => {trhsloc=transit authority} 0.0002120429 0.9416667 12.07819 113
## [4] {cs_cloth,
## detailcm=PROSTITUTION,
## height=[40,68)} => {female} 0.0001951545 0.8455285 11.83835 104
## [5] {pf_other,
## female,
## inside} => {trhsloc=transit authority} 0.0004935158 0.8976109 11.51311 263
## [6] {cs_casng,
## pf_other,
## female} => {trhsloc=transit authority} 0.0003302615 0.8888889 11.40124 176
## [7] {pct=13,
## female,
## inside} => {trhsloc=transit authority} 0.0004878863 0.8813559 11.30462 260
## [8] {cs_casng,
## pct=13,
## female} => {trhsloc=transit authority} 0.0002195489 0.8478261 10.87455 117
## [9] {detailcm=CRIMINAL TRESPASS,
## pct=14,
## female} => {trhsloc=transit authority} 0.0002064135 0.8461538 10.85310 110
## [10] {detailcm=GRAND LARCENY,
## female,
## inside} => {trhsloc=transit authority} 0.0020697640 0.8262172 10.59739 1103
itemFrequencyPlot(items(r_female), topN=30, cex.names=.6)
plot(head(sort(r_female, by="lift"), 50),
method="graph", control=list(cex=.7))
plot(head(sort(r_female, by="support"), 50),
method="graph", control=list(cex=.7))
r_summon <- subset(r, subset = items %pin% "sumissue")
r_summon
## set of 1178 rules
inspect(head(sort(r_summon, by="lift"), n=10))
## lhs rhs support confidence lift count
## [1] {ac_proxm,
## sumissue,
## pct=20} => {detailcm=GRAND LARCENY} 0.0001932781 0.8373984 14.997271 103
## [2] {sumissue,
## detailcm=GRAND LARCENY,
## inside} => {trhsloc=transit authority} 0.0005779577 0.9418960 12.081129 308
## [3] {sumissue,
## no_uniform,
## inside} => {trhsloc=transit authority} 0.0011427799 0.8969072 11.504085 609
## [4] {sumissue,
## detailcm=GRAND LARCENY,
## no_uniform} => {trhsloc=transit authority} 0.0005010217 0.8929766 11.453669 267
## [5] {cs_other,
## sumissue,
## pct=23} => {trhsloc=housing authority} 0.0002814729 0.8928571 6.149605 150
## [6] {ac_proxm,
## sumissue,
## pct=23} => {trhsloc=housing authority} 0.0003208791 0.8592965 5.918454 171
## [7] {ac_other,
## sumissue,
## detailcm=GRAND LARCENY AUTO} => {cs_other} 0.0003396440 0.8227273 5.262569 181
## [8] {cs_bulge,
## sumissue,
## trhsloc=transit authority} => {inside} 0.0002120429 1.0000000 4.612590 113
## [9] {sumissue,
## race=White,
## trhsloc=transit authority} => {inside} 0.0002814729 1.0000000 4.612590 150
## [10] {ac_inves,
## sumissue,
## trhsloc=transit authority} => {inside} 0.0002345607 1.0000000 4.612590 125
itemFrequencyPlot(items(r_summon), topN=20)
plot(head(sort(r_summon, by="lift"), 50),
method="graph", control=list(cex=.7))
plot(head(sort(r_summon, by="support"), 50),
method="graph", control=list(cex=.7))
r_terrorism <- subset(r, subset = items %in% "detailcm=TERRORISM")
r_terrorism
## set of 83 rules
inspect(sort(r_terrorism, by="lift"))
## lhs rhs support confidence lift count
## [1] {ac_incid,
## detailcm=TERRORISM,
## no_uniform} => {pct=1} 0.0001989075 0.9464286 165.202161 106
## [2] {cs_casng,
## detailcm=TERRORISM,
## no_uniform} => {pct=1} 0.0002214253 0.8027211 140.117556 118
## [3] {detailcm=TERRORISM,
## pct=14,
## inside} => {trhsloc=transit authority} 0.0003302615 1.0000000 12.826394 176
## [4] {cs_other,
## detailcm=TERRORISM,
## pct=14} => {trhsloc=transit authority} 0.0002739669 0.9668874 12.401679 146
## [5] {detailcm=TERRORISM,
## pct=14,
## typeofid=photo id} => {trhsloc=transit authority} 0.0002702140 0.9664430 12.395978 144
## [6] {detailcm=TERRORISM,
## pct=14} => {trhsloc=transit authority} 0.0003321380 0.9619565 12.338433 177
## [7] {detailcm=TERRORISM,
## race=Black,
## inside} => {trhsloc=transit authority} 0.0002214253 0.9440000 12.108116 118
## [8] {detailcm=TERRORISM,
## female,
## inside} => {trhsloc=transit authority} 0.0002120429 0.9416667 12.078187 113
## [9] {detailcm=TERRORISM,
## inside,
## height=[71,90]} => {trhsloc=transit authority} 0.0002026605 0.9391304 12.045657 108
## [10] {detailcm=TERRORISM,
## inside,
## height=[40,68)} => {trhsloc=transit authority} 0.0004278388 0.9268293 11.887877 228
## [11] {cs_other,
## detailcm=TERRORISM,
## inside} => {trhsloc=transit authority} 0.0007280765 0.9260143 11.877424 388
## [12] {detailcm=TERRORISM,
## typeofid=photo id,
## inside} => {trhsloc=transit authority} 0.0007393355 0.9227166 11.835127 394
## [13] {detailcm=TERRORISM,
## race=White,
## inside} => {trhsloc=transit authority} 0.0003058672 0.9209040 11.811877 163
## [14] {detailcm=TERRORISM,
## inside} => {trhsloc=transit authority} 0.0008913308 0.9187621 11.784404 475
## [15] {detailcm=TERRORISM,
## inside,
## height=[68,71)} => {trhsloc=transit authority} 0.0002608315 0.8910256 11.428646 139
## [16] {detailcm=TERRORISM,
## female,
## inside} => {cs_other} 0.0002082899 0.9250000 5.916756 111
## [17] {detailcm=TERRORISM,
## female,
## trhsloc=transit authority} => {cs_other} 0.0001970310 0.9130435 5.840276 105
## [18] {detailcm=TERRORISM,
## typeofid=photo id,
## female} => {cs_other} 0.0002289313 0.8905109 5.696147 122
## [19] {detailcm=TERRORISM,
## female,
## height=[40,68)} => {cs_other} 0.0002401902 0.8888889 5.685771 128
## [20] {detailcm=TERRORISM,
## female} => {cs_other} 0.0002664610 0.8765432 5.606802 142
## [21] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[40,68)} => {cs_other} 0.0003734207 0.8614719 5.510399 199
## [22] {detailcm=TERRORISM,
## race=White,
## trhsloc=transit authority} => {cs_other} 0.0002645845 0.8545455 5.466094 141
## [23] {detailcm=TERRORISM,
## inside,
## height=[40,68)} => {cs_other} 0.0003940620 0.8536585 5.460421 210
## [24] {detailcm=TERRORISM,
## race=White,
## inside} => {cs_other} 0.0002833494 0.8531073 5.456895 151
## [25] {detailcm=TERRORISM,
## pct=14,
## typeofid=photo id} => {cs_other} 0.0002364372 0.8456376 5.409115 126
## [26] {detailcm=TERRORISM,
## typeofid=photo id,
## trhsloc=transit authority} => {cs_other} 0.0006304993 0.8400000 5.373054 336
## [27] {detailcm=TERRORISM,
## typeofid=photo id,
## inside} => {cs_other} 0.0006661525 0.8313817 5.317927 355
## [28] {detailcm=TERRORISM,
## pct=14,
## trhsloc=transit authority} => {cs_other} 0.0002739669 0.8248588 5.276203 146
## [29] {detailcm=TERRORISM,
## pct=14,
## inside} => {cs_other} 0.0002720905 0.8238636 5.269838 145
## [30] {detailcm=TERRORISM,
## pct=14} => {cs_other} 0.0002833494 0.8206522 5.249296 151
## [31] {detailcm=TERRORISM,
## inside,
## trhsloc=transit authority} => {cs_other} 0.0007280765 0.8168421 5.224925 388
## [32] {detailcm=TERRORISM,
## trhsloc=transit authority} => {cs_other} 0.0007374590 0.8153527 5.215398 393
## [33] {detailcm=TERRORISM,
## inside} => {cs_other} 0.0007862476 0.8104449 5.184005 419
## [34] {detailcm=TERRORISM,
## race=Black,
## trhsloc=transit authority} => {inside} 0.0002214253 1.0000000 4.612590 118
## [35] {detailcm=TERRORISM,
## pct=14,
## trhsloc=transit authority} => {inside} 0.0003302615 0.9943503 4.586530 176
## [36] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[68,71)} => {inside} 0.0002608315 0.9928571 4.579643 139
## [37] {detailcm=TERRORISM,
## race=White,
## trhsloc=transit authority} => {inside} 0.0003058672 0.9878788 4.556680 163
## [38] {cs_other,
## detailcm=TERRORISM,
## trhsloc=transit authority} => {inside} 0.0007280765 0.9872774 4.553906 388
## [39] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[40,68)} => {inside} 0.0004278388 0.9870130 4.552686 228
## [40] {detailcm=TERRORISM,
## trhsloc=transit authority} => {inside} 0.0008913308 0.9854772 4.545602 475
## [41] {detailcm=TERRORISM,
## typeofid=photo id,
## trhsloc=transit authority} => {inside} 0.0007393355 0.9850000 4.543401 394
## [42] {detailcm=TERRORISM,
## female,
## trhsloc=transit authority} => {inside} 0.0002120429 0.9826087 4.532371 113
## [43] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[71,90]} => {inside} 0.0002026605 0.9729730 4.487926 108
## [44] {cs_other,
## detailcm=TERRORISM,
## pct=14} => {inside} 0.0002720905 0.9602649 4.429309 145
## [45] {detailcm=TERRORISM,
## pct=14,
## typeofid=photo id} => {inside} 0.0002683375 0.9597315 4.426848 143
## [46] {detailcm=TERRORISM,
## pct=14} => {inside} 0.0003302615 0.9565217 4.412043 176
## [47] {detailcm=TERRORISM,
## female,
## inside} => {height=[40,68)} 0.0002082899 0.9250000 2.613846 111
## [48] {detailcm=TERRORISM,
## female,
## trhsloc=transit authority} => {height=[40,68)} 0.0001989075 0.9217391 2.604632 106
## [49] {detailcm=TERRORISM,
## typeofid=photo id,
## female} => {height=[40,68)} 0.0002364372 0.9197080 2.598892 126
## [50] {cs_other,
## detailcm=TERRORISM,
## female} => {height=[40,68)} 0.0002401902 0.9014085 2.547182 128
## [51] {detailcm=TERRORISM,
## female} => {height=[40,68)} 0.0002702140 0.8888889 2.511804 144
## [52] {detailcm=TERRORISM,
## female,
## height=[40,68)} => {typeofid=photo id} 0.0002364372 0.8750000 1.612754 126
## [53] {cs_other,
## detailcm=TERRORISM,
## female} => {typeofid=photo id} 0.0002289313 0.8591549 1.583549 122
## [54] {cs_other,
## detailcm=TERRORISM,
## race=Black} => {typeofid=photo id} 0.0002026605 0.8571429 1.579840 108
## [55] {cs_other,
## detailcm=TERRORISM,
## trhsloc=transit authority} => {typeofid=photo id} 0.0006304993 0.8549618 1.575821 336
## [56] {detailcm=TERRORISM,
## female,
## inside} => {typeofid=photo id} 0.0001914016 0.8500000 1.566675 102
## [57] {cs_other,
## detailcm=TERRORISM,
## inside} => {typeofid=photo id} 0.0006661525 0.8472554 1.561616 355
## [58] {detailcm=TERRORISM,
## female} => {typeofid=photo id} 0.0002570786 0.8456790 1.558711 137
## [59] {detailcm=TERRORISM,
## race=Asian/Pacific Islander} => {typeofid=photo id} 0.0003565323 0.8444444 1.556435 190
## [60] {cs_other,
## detailcm=TERRORISM,
## height=[40,68)} => {typeofid=photo id} 0.0004822569 0.8398693 1.548003 257
## [61] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[40,68)} => {typeofid=photo id} 0.0003640383 0.8398268 1.547925 194
## [62] {cs_other,
## detailcm=TERRORISM,
## race=Asian/Pacific Islander} => {typeofid=photo id} 0.0002026605 0.8372093 1.543100 108
## [63] {cs_other,
## detailcm=TERRORISM,
## pct=14} => {typeofid=photo id} 0.0002364372 0.8344371 1.537990 126
## [64] {detailcm=TERRORISM,
## race=Black,
## inside} => {typeofid=photo id} 0.0001951545 0.8320000 1.533498 104
## [65] {detailcm=TERRORISM,
## trhsloc=transit authority} => {typeofid=photo id} 0.0007505944 0.8298755 1.529583 400
## [66] {detailcm=TERRORISM,
## inside,
## trhsloc=transit authority} => {typeofid=photo id} 0.0007393355 0.8294737 1.528842 394
## [67] {cs_other,
## detailcm=TERRORISM,
## height=[71,90]} => {typeofid=photo id} 0.0002645845 0.8294118 1.528728 141
## [68] {detailcm=TERRORISM,
## inside,
## height=[40,68)} => {typeofid=photo id} 0.0003828031 0.8292683 1.528464 204
## [69] {detailcm=TERRORISM,
## inside} => {typeofid=photo id} 0.0008012595 0.8259188 1.522290 427
## [70] {detailcm=TERRORISM,
## height=[40,68)} => {typeofid=photo id} 0.0006792879 0.8246014 1.519862 362
## [71] {cs_other,
## detailcm=TERRORISM} => {typeofid=photo id} 0.0010864854 0.8212766 1.513734 579
## [72] {detailcm=TERRORISM,
## race=Black} => {typeofid=photo id} 0.0002795964 0.8186813 1.508950 149
## [73] {detailcm=TERRORISM,
## inside,
## height=[68,71)} => {typeofid=photo id} 0.0002383137 0.8141026 1.500511 127
## [74] {detailcm=TERRORISM,
## pct=14,
## trhsloc=transit authority} => {typeofid=photo id} 0.0002702140 0.8135593 1.499510 144
## [75] {detailcm=TERRORISM,
## pct=14,
## inside} => {typeofid=photo id} 0.0002683375 0.8125000 1.497557 143
## [76] {detailcm=TERRORISM,
## race=White,
## trhsloc=transit authority} => {typeofid=photo id} 0.0002514491 0.8121212 1.496859 134
## [77] {detailcm=TERRORISM,
## pct=14} => {typeofid=photo id} 0.0002795964 0.8097826 1.492549 149
## [78] {cs_other,
## detailcm=TERRORISM,
## race=White} => {typeofid=photo id} 0.0003978150 0.8091603 1.491402 212
## [79] {detailcm=TERRORISM,
## race=White,
## inside} => {typeofid=photo id} 0.0002683375 0.8079096 1.489096 143
## [80] {detailcm=TERRORISM,
## trhsloc=transit authority,
## height=[68,71)} => {typeofid=photo id} 0.0002120429 0.8071429 1.487683 113
## [81] {cs_other,
## ac_other,
## detailcm=TERRORISM} => {typeofid=photo id} 0.0003114967 0.8058252 1.485255 166
## [82] {detailcm=TERRORISM} => {typeofid=photo id} 0.0016400487 0.8010999 1.476545 874
## [83] {detailcm=TERRORISM,
## race=Asian/Pacific Islander,
## pct=1} => {typeofid=photo id} 0.0002026605 0.8000000 1.474518 108
itemFrequencyPlot(items(r_terrorism), topN=20, cex.names=.6)
plot(head(sort(r_terrorism, by="lift"), 25),
method="graph", control=list(cex=.7))
r_minor <- subset(r, subset = items %pin% "minor")
r_minor
## set of 1052 rules
inspect(head(sort(r_minor, by="lift"), 20))
## lhs rhs support confidence lift count
## [1] {ac_rept,
## weapon,
## minor} => {arstmade} 0.0002439432 0.8609272 14.197665 130
## [2] {cs_descr,
## weapon,
## minor} => {arstmade} 0.0002552021 0.8395062 13.844409 136
## [3] {pf_hands,
## weapon,
## minor} => {arstmade} 0.0002795964 0.8370787 13.804376 149
## [4] {weapon,
## no_uniform,
## minor} => {arstmade} 0.0004578626 0.8187919 13.502808 244
## [5] {ac_proxm,
## weapon,
## minor} => {arstmade} 0.0003058672 0.8029557 13.241650 163
## [6] {pf_other,
## detailcm=PETIT LARCENY,
## minor} => {trhsloc=transit authority} 0.0002758434 0.9865772 12.654227 147
## [7] {pct=13,
## inside,
## minor} => {trhsloc=transit authority} 0.0002946083 0.9345238 11.986570 157
## [8] {pf_other,
## inside,
## minor} => {trhsloc=transit authority} 0.0005817106 0.9309309 11.940487 310
## [9] {pct=18,
## inside,
## minor} => {trhsloc=transit authority} 0.0002552021 0.9251701 11.866595 136
## [10] {detailcm=GRAND LARCENY,
## inside,
## minor} => {trhsloc=transit authority} 0.0040138034 0.9113762 11.689670 2139
## [11] {detailcm=GRAND LARCENY,
## pct=40,
## minor} => {trhsloc=transit authority} 0.0002401902 0.8707483 11.168560 128
## [12] {cs_casng,
## pf_other,
## minor} => {trhsloc=transit authority} 0.0003659148 0.8666667 11.116208 195
## [13] {pct=14,
## inside,
## minor} => {trhsloc=transit authority} 0.0004578626 0.8299320 10.645034 244
## [14] {pct=14,
## no_uniform,
## minor} => {trhsloc=transit authority} 0.0002645845 0.8294118 10.638362 141
## [15] {pct=123,
## typeofid=photo id,
## minor} => {race=White} 0.0002871023 0.9161677 9.693758 153
## [16] {cs_casng,
## pct=123,
## minor} => {race=White} 0.0002139194 0.8906250 9.423497 114
## [17] {ac_incid,
## pct=123,
## minor} => {race=White} 0.0002420667 0.8896552 9.413236 129
## [18] {cs_furtv,
## pct=123,
## minor} => {race=White} 0.0002439432 0.8666667 9.170000 130
## [19] {pct=123,
## minor,
## height=[68,71)} => {race=White} 0.0002045370 0.8582677 9.081132 109
## [20] {ac_time,
## pct=123,
## minor} => {race=White} 0.0002007840 0.8425197 8.914506 107
itemFrequencyPlot(items(r_minor), topN=30, cex.names=.6)
plot(head(sort(r_minor, by="lift"), 30),
method="graph", control=list(cex=.7))