load the cleaned data (my data is not really clean!)
load("data_clean.rda")
create class variable
data_clean$arrest <- data_clean$Status == "Clear by Arrest" | data_clean$Status == "Clear by Exceptional Arrest" | data_clean$Status == "Returned for Correction"
library(rpart)
library(rpart.plot)
m <- rpart(arrest ~ . , data = data_clean)
rpart.plot(m)
This was creating a regression tree! We need to make he class variable a factor.
data_clean$arrest <- as.factor(data_clean$arrest)
m <- rpart(arrest ~ . , data = data_clean)
rpart.plot(m, extra = 2)
we cannot use status
m <- rpart(arrest ~ UCROffDesc + PCClass + CompRace + CompSex + CompAge + ZipCode + City, data = data_clean)
rpart.plot(m, cex=.8, extra = 2)
try again will less features
m <- rpart(arrest ~ PCClass + CompRace + CompSex + CompAge, data = data_clean)
rpart.plot(m, cex=.8, extra = 2)
increase complexity (this will over fit the data!)
m <- rpart(arrest ~ PCClass + CompRace + CompSex + CompAge, data = data_clean, control = rpart.control(cp=.0001, minsplit = 10))
rpart.plot(m, cex=.5, extra = 2)