Classification Examples for the Dallas Police Data

load the cleaned data (my data is not really clean!)

load("data_clean.rda")

create class variable

data_clean$arrest <- data_clean$Status == "Clear by Arrest" | data_clean$Status == "Clear by Exceptional Arrest" | data_clean$Status == "Returned for Correction"


library(rpart)
library(rpart.plot)

m <- rpart(arrest ~ . , data = data_clean)
rpart.plot(m)

This was creating a regression tree! We need to make he class variable a factor.

data_clean$arrest <- as.factor(data_clean$arrest)
m <- rpart(arrest ~ . , data = data_clean)
rpart.plot(m, extra = 2)

we cannot use status

m <- rpart(arrest ~ UCROffDesc + PCClass + CompRace + CompSex + CompAge + ZipCode + City, data = data_clean)
rpart.plot(m, cex=.8, extra = 2)

try again will less features

m <- rpart(arrest ~ PCClass + CompRace + CompSex + CompAge, data = data_clean)
rpart.plot(m, cex=.8, extra = 2)

increase complexity (this will over fit the data!)

m <- rpart(arrest ~ PCClass + CompRace + CompSex + CompAge, data = data_clean, control = rpart.control(cp=.0001, minsplit = 10))
rpart.plot(m, cex=.5, extra = 2)