Can we predict if a university will improve its ranking over time?
shanghai <- read.csv("shanghaiData.csv")
summary(shanghai)
## world_rank university_name national_rank
## 301-400: 600 Queen's University : 13 1 : 343
## 401-500: 600 University of Maryland, Baltimore : 12 2 : 206
## 201-300: 584 Aarhus University : 11 3 : 133
## 151-200: 300 Boston University : 11 4 : 122
## 201-302: 204 Brown University : 11 1-2 : 86
## 101-150: 200 California Institute of Technology: 11 2-3 : 84
## (Other):2409 (Other) :4828 (Other):3923
## total_score alumni award hici
## Min. : 23.50 Min. : 0.000 Min. : 0.000 Min. : 0.00
## 1st Qu.: 27.40 1st Qu.: 0.000 1st Qu.: 0.000 1st Qu.: 7.30
## Median : 31.30 Median : 0.000 Median : 0.000 Median : 12.60
## Mean : 36.38 Mean : 9.162 Mean : 7.692 Mean : 16.22
## 3rd Qu.: 41.80 3rd Qu.: 15.600 3rd Qu.: 13.400 3rd Qu.: 21.70
## Max. :100.00 Max. :100.000 Max. :100.000 Max. :100.00
## NA's :3796 NA's :1 NA's :2 NA's :2
## ns pub pcp year
## Min. : 0.00 Min. : 7.30 Min. : 8.30 Min. :2005
## 1st Qu.: 8.00 1st Qu.: 28.90 1st Qu.: 15.60 1st Qu.:2007
## Median : 12.80 Median : 36.00 Median : 19.00 Median :2009
## Mean : 16.08 Mean : 38.25 Mean : 21.24 Mean :2010
## 3rd Qu.: 19.80 3rd Qu.: 45.30 3rd Qu.: 24.50 3rd Qu.:2012
## Max. :100.00 Max. :100.00 Max. :100.00 Max. :2015
## NA's :22 NA's :2 NA's :2
clean data
rnk <- as.character(shanghai$world_rank)
rnk <- sub(pattern = "-.*", "", rnk)
rnk <- as.numeric(rnk)
summary(rnk)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.0 101.0 201.0 207.6 301.0 403.0
shanghai$world_rank <- rnk
rnk <- as.character(shanghai$national_rank)
rnk <- sub(pattern = "-.*", "", rnk)
rnk <- as.numeric(rnk)
summary(rnk)
## Min. 1st Qu. Median Mean 3rd Qu. Max. NA's
## 1 4 10 28 35 141 1
shanghai$national_rank <- rnk
split the data by university
l <- split(shanghai, shanghai$university_name)
years <- sapply(l, nrow)
table(years)
## years
## 1 2 3 4 5 6 7 8 9 10 11 12 13
## 56 61 21 23 17 24 42 39 126 158 90 1 1
pick two years for comparison
r <- lapply(l, FUN = function(x) {
d_2005 <- x[x$year==2005,]
d_2015 <- x[x$year==2015,]
merge(d_2005, d_2015, by = "university_name",
all = TRUE, suffix = c("_Y2005", "_Y2015"))
})
r <- do.call(rbind, r)
head(r)
## university_name
## Aalborg University Aalborg University
## Aalto University Aalto University
## Aarhus University Aarhus University
## Aix Marseille University Aix Marseille University
## Aristotle University of Thessaloniki Aristotle University of Thessaloniki
## Arizona State University Arizona State University
## world_rank_Y2005 national_rank_Y2005
## Aalborg University NA NA
## Aalto University 401 3
## Aarhus University 101 2
## Aix Marseille University NA NA
## Aristotle University of Thessaloniki 301 2
## Arizona State University NA NA
## total_score_Y2005 alumni_Y2005
## Aalborg University NA NA
## Aalto University NA 0.0
## Aarhus University NA 15.4
## Aix Marseille University NA NA
## Aristotle University of Thessaloniki NA 0.0
## Arizona State University NA NA
## award_Y2005 hici_Y2005 ns_Y2005
## Aalborg University NA NA NA
## Aalto University 0.0 0.0 11.4
## Aarhus University 19.3 7.9 22.3
## Aix Marseille University NA NA NA
## Aristotle University of Thessaloniki 0.0 7.9 2.0
## Arizona State University NA NA NA
## pub_Y2005 pcp_Y2005 year_Y2005
## Aalborg University NA NA NA
## Aalto University 28.6 13.1 2005
## Aarhus University 41.6 22.4 2005
## Aix Marseille University NA NA NA
## Aristotle University of Thessaloniki 34.9 15.2 2005
## Arizona State University NA NA NA
## world_rank_Y2015 national_rank_Y2015
## Aalborg University 301 5
## Aalto University 401 4
## Aarhus University 73 2
## Aix Marseille University 101 5
## Aristotle University of Thessaloniki 401 2
## Arizona State University 93 47
## total_score_Y2015 alumni_Y2015
## Aalborg University NA 0.0
## Aalto University NA 0.0
## Aarhus University 27.3 11.5
## Aix Marseille University NA 13.6
## Aristotle University of Thessaloniki NA 0.0
## Arizona State University 24.5 0.0
## award_Y2015 hici_Y2015 ns_Y2015
## Aalborg University 0.0 11.2 4.6
## Aalto University 0.0 0.0 10.4
## Aarhus University 22.1 12.3 25.8
## Aix Marseille University 0.0 15.2 20.3
## Aristotle University of Thessaloniki 0.0 4.9 1.5
## Arizona State University 20.0 22.2 25.5
## pub_Y2015 pcp_Y2015 year_Y2015
## Aalborg University 30.4 16.8 2015
## Aalto University 33.4 17.0 2015
## Aarhus University 51.8 31.0 2015
## Aix Marseille University 50.1 27.6 2015
## Aristotle University of Thessaloniki 34.0 16.8 2015
## Arizona State University 42.6 19.1 2015
calculate improvement
improved <- r$'world_rank_Y2005' - r$'world_rank_Y2015'
hist(improved)
hist(improved, breaks = 100)
improved <- improved > 0
table(improved, useNA = "always")
## improved
## FALSE TRUE <NA>
## 227 152 242
add improvement as the class variable (has to be a factor)
r$improved <- as.factor(improved)
dim(r)
## [1] 621 22
summary(r)
## university_name world_rank_Y2005
## Aalborg University : 1 Min. : 1
## Aalto University : 1 1st Qu.:101
## Aarhus University : 1 Median :203
## Aix Marseille University : 1 Mean :216
## Aristotle University of Thessaloniki: 1 3rd Qu.:301
## Arizona State University : 1 Max. :401
## (Other) :615 NA's :121
## national_rank_Y2005 total_score_Y2005 alumni_Y2005 award_Y2005
## Min. : 1.00 Min. : 23.90 Min. : 0.000 Min. : 0.000
## 1st Qu.: 4.00 1st Qu.: 27.62 1st Qu.: 0.000 1st Qu.: 0.000
## Median : 12.00 Median : 31.55 Median : 0.000 Median : 0.000
## Mean : 31.98 Mean : 36.68 Mean : 9.266 Mean : 6.685
## 3rd Qu.: 43.25 3rd Qu.: 42.65 3rd Qu.: 16.600 3rd Qu.: 9.100
## Max. :141.00 Max. :100.00 Max. :100.000 Max. :100.000
## NA's :121 NA's :521 NA's :121 NA's :121
## hici_Y2005 ns_Y2005 pub_Y2005 pcp_Y2005
## Min. : 0.00 Min. : 0.00 Min. : 10.10 Min. : 9.00
## 1st Qu.: 7.90 1st Qu.: 7.80 1st Qu.: 27.40 1st Qu.: 14.07
## Median : 11.10 Median : 12.45 Median : 33.90 Median : 17.25
## Mean : 15.11 Mean : 15.73 Mean : 36.63 Mean : 19.82
## 3rd Qu.: 19.20 3rd Qu.: 19.07 3rd Qu.: 43.75 3rd Qu.: 23.02
## Max. :100.00 Max. :100.00 Max. :100.00 Max. :100.00
## NA's :121 NA's :123 NA's :121 NA's :121
## year_Y2005 world_rank_Y2015 national_rank_Y2015 total_score_Y2015
## Min. :2005 Min. : 1.0 Min. : 1.00 Min. : 23.90
## 1st Qu.:2005 1st Qu.:101.0 1st Qu.: 4.00 1st Qu.: 27.00
## Median :2005 Median :201.0 Median : 10.00 Median : 30.75
## Mean :2005 Mean :215.8 Mean : 25.78 Mean : 35.87
## 3rd Qu.:2005 3rd Qu.:301.0 3rd Qu.: 29.00 3rd Qu.: 39.33
## Max. :2005 Max. :401.0 Max. :126.00 Max. :100.00
## NA's :121 NA's :121 NA's :121 NA's :521
## alumni_Y2015 award_Y2015 hici_Y2015 ns_Y2015
## Min. : 0.000 Min. : 0.00 Min. : 0.000 Min. : 0.00
## 1st Qu.: 0.000 1st Qu.: 0.00 1st Qu.: 6.275 1st Qu.: 8.00
## Median : 0.000 Median : 0.00 Median : 12.300 Median : 12.10
## Mean : 7.968 Mean : 7.47 Mean : 15.218 Mean : 15.29
## 3rd Qu.: 13.600 3rd Qu.: 13.30 3rd Qu.: 20.100 3rd Qu.: 19.00
## Max. :100.000 Max. :100.00 Max. :100.000 Max. :100.00
## NA's :121 NA's :121 NA's :121 NA's :123
## pub_Y2015 pcp_Y2015 year_Y2015 improved
## Min. : 7.80 Min. : 11.20 Min. :2015 FALSE:227
## 1st Qu.: 30.55 1st Qu.: 16.60 1st Qu.:2015 TRUE :152
## Median : 36.70 Median : 19.90 Median :2015 NA's :242
## Mean : 38.78 Mean : 21.84 Mean :2015
## 3rd Qu.: 45.02 3rd Qu.: 24.20 3rd Qu.:2015
## Max. :100.00 Max. :100.00 Max. :2015
## NA's :121 NA's :121 NA's :121
library(rpart)
library(rpart.plot)
library("caret")
## Loading required package: lattice
## Loading required package: ggplot2
tree <- rpart(improved ~ ., data=r)
tree
## n=379 (242 observations deleted due to missingness)
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 379 152 FALSE (0.5989446 0.4010554)
## 2) university_name=Aalto University,Aristotle University of Thessaloniki,Auburn University,Autonomous University of Madrid,Bar-Ilan University,Baylor College of Medicine,Ben-Gurion University of the Negev,Boston College,Brandeis University,Brigham Young University,California Institute of Technology,Carleton University,Carnegie Mellon University,Case Western Reserve University,Catholic University of Louvain,Chalmers University of Technology,Chiba University,City University of New York City College,Clemson University,Colorado State University,Columbia University,Cornell University,Dartmouth College,Ecole Polytechnique,Eindhoven University of Technology,Emory University,Eotvos Lorand University,Federal University of Rio de Janeiro,Florida International University,Florida State University,Georgetown University,Georgia Institute of Technology,Harvard University,Hiroshima University,Hokkaido University,Indiana University Bloomington,Indian Institute of Science,Istanbul University,Jagiellonian University,Kanazawa University,Kansas State University,Karolinska Institute,Keio University,Kobe University,Kyoto University,Kyungpook National University,Kyushu University,Lancaster University,Leiden University,Linkoping University,Lund University,Mayo Medical School,McMaster University,Medical University of Graz,Medical University of South Carolina,Michigan State University,Moscow State University,Nara Institute of Science and Technology,National and Kapodistrian University of Athens,National Autonomous University of Mexico,National Cheng Kung University,National University of Singapore,North Carolina State University - Raleigh,Okayama University,Oregon State University,Osaka University,Pennsylvania State University - University Park,Pohang University of Science and Technology,Queen's University,Queen's University Belfast,Rensselaer Polytechnic Institute,Rice University,Rockefeller University,Rutgers, The State University of New Jersey - New Brunswick,Saint Louis University,Saint Petersburg State University,San Diego State University,Scuola Normale Superiore - Pisa,Seoul National University,State University of New York Health Science Center at Brooklyn,Stockholm School of Economics,Syracuse University,Technical University Darmstadt,Technical University of Berlin,Technical University of Braunschweig,Tel Aviv University,Temple University,Texas Tech University,The Australian National University,The Hong Kong Polytechnic University,The Imperial College of Science, Technology and Medicine,The Ohio State University - Columbus,The University of Calgary,The University of Edinburgh,The University of Georgia,The University of Glasgow,The University of Reading,The University of Sheffield,The University of Texas at Austin,The University of Texas at Dallas,The University of Texas Health Science Center at Houston,The University of Texas Health Science Center at San Antonio,The University of Texas Medical Branch at Galveston,The University of Texas Southwestern Medical Center at Dallas,The University of Tokushima,The University of Tokyo,Thomas Jefferson University,Tohoku University,Tokyo Institute of Technology,Tokyo Medical and Dental University,Tufts University,Tulane University,Umea University,University College Cork,University of Alaska - Fairbanks,University of Alberta,University of Amsterdam,University of Antwerp,University of Arizona,University of Basel,University of Bath,University of Bayreuth,University of Birmingham,University of Bochum,University of Bremen,University of Bristol,University of British Columbia,University of California, Berkeley,University of California, Davis,University of California, Irvine,University of California, Riverside,University of California, San Diego,University of California, San Francisco,University of California, Santa Barbara,University of Cambridge,University of Chicago,University of Chile,University of Cincinnati,University of Duesseldorf,University of East Anglia,University of Essex,University of Ferrara,University of Florida,University of Frankfurt,University of Freiburg,University of Goettingen,University of Graz,University of Halle-Wittenberg,University of Hamburg,University of Hawaii at Manoa,University of Illinois at Chicago,University of Illinois at Urbana-Champaign,University of Iowa,University of Jena,University of Jyvaskyla,University of Konstanz,University of KwaZulu-Natal,University of Leeds,University of Leicester,University of Leipzig,University of Liverpool,University of Mainz,University of Manitoba,University of Marburg,University of Maryland, Baltimore,University of Maryland, Baltimore County,University of Massachusetts Amherst,University of Miami,University of Milan,University of Missouri - Columbia,University of Muenster,University of Munich,University of Naples Federico II,University of Nebraska - Lincoln,University of New Hampshire - Durham,University of Notre Dame,University of Nottingham,University of Oklahoma - Norman,University of Oregon,University of Oxford,University of Palermo,University of Parma,University of Pavia,University of Pennsylvania,University of Perugia,University of Pisa,University of Quebec,University of Regensburg,University of Rhode Island,University of Rochester,University of Roma - Tor Vergata,University of Rostock,University of Sao Paulo,University of Saskatchewan,University of St Andrews,University of Surrey,University of Sussex,University of Sydney,University of Szeged,University of Toronto,University of Trieste,University of Tsukuba,University of Tuebingen,University of Turku,University of Twente,University of Ulm,University of Utah,University of Valencia,University of Vermont,University of Vienna,University of Virginia,University of Warsaw,University of Wisconsin - Madison,University of Wuerzburg,University of Wyoming,University of Zaragoza,Uppsala University,Utah State University,Utrecht University,Vanderbilt University,Vienna University of Technology,Virginia Polytechnic Institute and State University,Wake Forest University,Washington University in St. Louis,Wayne State University,Weizmann Institute of Science,Yale University 227 0 FALSE (1.0000000 0.0000000) *
## 3) university_name=Aarhus University,Autonomous University of Barcelona,Boston University,Brown University,Cardiff University,Charles University in Prague,City University of Hong Kong,Claude Bernard University Lyon 1,Complutense University of Madrid,Dalhousie University,Delft University of Technology,Drexel University,Duke University,Ecole Normale Superieure - Lyon,Ecole Normale Superieure - Paris,Erasmus University,Flinders University,Fudan University,George Mason University,Ghent University,Hannover Medical School,Hanyang University,Indiana University-Purdue University at Indianapolis,Iowa State University,Jilin University,Joseph Fourier University (Grenoble 1),King's College London,Korea Advanced Institute of Science and Technology,Korea University,Laval University,London School of Economics and Political Science,Louisiana State University - Baton Rouge,Macquarie University,Massachusetts Institute of Technology (MIT),McGill University,Monash University,Nagoya University,Nanjing University,Nanyang Technological University,National Chiao Tung University,National Taiwan University,National Tsing Hua University,Newcastle University,New York University,Northeastern University,Northwestern University,Oregon Health and Science University,Paul Sabatier University (Toulouse 3),Peking University,Pierre and Marie Curie University - Paris 6,Polytechnic Institute of Milan,Polytechnic University of Valencia,Princeton University,Purdue University - West Lafayette,Radboud University Nijmegen,RWTH Aachen University,Shanghai Jiao Tong University,Simon Fraser University,Stanford University,Stockholm University,Sungkyunkwan University,Swedish University of Agricultural Sciences,Swiss Federal Institute of Technology Zurich,Technical University Munich,Technical University of Denmark,Technion-Israel Institute of Technology,The Chinese University of Hong Kong,The George Washington University,The Hebrew University of Jerusalem,The Hong Kong University of Science and Technology,The University of Adelaide,The University of Auckland,The University of Dundee,The University of Hong Kong,The University of Manchester,The University of New Mexico - Albuquerque,The University of Queensland,The University of Texas M. D. Anderson Cancer Center,The University of Western Australia,Trinity College Dublin,Tsinghua University,University College Dublin,University College London,University of Aberdeen,University of Barcelona,University of Bergen,University of Bern,University of Bologna,University of Bonn,University of Buenos Aires,University of California, Los Angeles,University of California, Santa Cruz,University of Cape Town,University of Central Florida,University of Colorado at Boulder,University of Copenhagen,University of Delaware,University of Duisburg-Essen,University of Erlangen-Nuremberg,University of Exeter,University of Florence,University of Geneva,University of Giessen,University of Gothenburg,University of Granada,University of Groningen,University of Guelph,University of Helsinki,University of Houston,University of Innsbruck,University of Kentucky,University of Kiel,University of Koeln,University of Lausanne,University of Liege,University of Lisbon,University of Maryland, College Park,University of Massachusetts Medical School - Worcester,University of Minnesota, Twin Cities,University of Montreal,University of North Carolina at Chapel Hill,University of Oslo,University of Otago,University of Ottawa,University of Oulu,University of Padua,University of Paris Descartes (Paris 5),University of Science and Technology of China,University of Southampton,University of South Carolina - Columbia,University of Southern California,University of Southern Denmark,University of South Florida,University of Strasbourg,University of Stuttgart,University of Tasmania,University of Tennessee - Knoxville,University of the Witwatersrand,University of Turin,University of Victoria,University of Wageningen,University of Warwick,University of Washington,University of Waterloo,University of York,University of Zurich,Virginia Commonwealth University,VU University Amsterdam,Western University,Yeshiva University,Yonsei University,Zhejiang University 152 0 TRUE (0.0000000 1.0000000) *
rpart.plot(tree, extra = 2, under = TRUE, varlen=0, faclen=0)
Do in-sample testing (resubstitution error). NOTE: You should use a training and test sample.
pred <- predict(tree, r, type="class")
confusionMatrix(data=pred, reference=r$improved, positive = "TRUE")
## Confusion Matrix and Statistics
##
## Reference
## Prediction FALSE TRUE
## FALSE 227 0
## TRUE 0 152
##
## Accuracy : 1
## 95% CI : (0.9903, 1)
## No Information Rate : 0.5989
## P-Value [Acc > NIR] : < 2.2e-16
##
## Kappa : 1
## Mcnemar's Test P-Value : NA
##
## Sensitivity : 1.0000
## Specificity : 1.0000
## Pos Pred Value : 1.0000
## Neg Pred Value : 1.0000
## Prevalence : 0.4011
## Detection Rate : 0.4011
## Detection Prevalence : 0.4011
## Balanced Accuracy : 1.0000
##
## 'Positive' Class : TRUE
##
Note: Why is this bad?
colnames(r)
## [1] "university_name" "world_rank_Y2005" "national_rank_Y2005"
## [4] "total_score_Y2005" "alumni_Y2005" "award_Y2005"
## [7] "hici_Y2005" "ns_Y2005" "pub_Y2005"
## [10] "pcp_Y2005" "year_Y2005" "world_rank_Y2015"
## [13] "national_rank_Y2015" "total_score_Y2015" "alumni_Y2015"
## [16] "award_Y2015" "hici_Y2015" "ns_Y2015"
## [19] "pub_Y2015" "pcp_Y2015" "year_Y2015"
## [22] "improved"
r$'university_name' <- NULL
r$'year_Y2015' <- NULL
r$'year_Y2005' <- NULL
tree <- rpart(improved ~ ., data=r)
tree
## n=379 (242 observations deleted due to missingness)
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 379 152 FALSE (0.5989446 0.4010554)
## 2) world_rank_Y2015>=251 123 13 FALSE (0.8943089 0.1056911)
## 4) world_rank_Y2005< 351 91 0 FALSE (1.0000000 0.0000000) *
## 5) world_rank_Y2005>=351 32 13 FALSE (0.5937500 0.4062500)
## 10) world_rank_Y2015>=351 19 0 FALSE (1.0000000 0.0000000) *
## 11) world_rank_Y2015< 351 13 0 TRUE (0.0000000 1.0000000) *
## 3) world_rank_Y2015< 251 256 117 TRUE (0.4570312 0.5429688)
## 6) world_rank_Y2005< 178 182 65 FALSE (0.6428571 0.3571429)
## 12) pcp_Y2005>=17.55 169 54 FALSE (0.6804734 0.3195266)
## 24) world_rank_Y2015>=98.5 81 13 FALSE (0.8395062 0.1604938)
## 48) world_rank_Y2005< 127 52 0 FALSE (1.0000000 0.0000000) *
## 49) world_rank_Y2005>=127 29 13 FALSE (0.5517241 0.4482759)
## 98) world_rank_Y2015>=176 16 0 FALSE (1.0000000 0.0000000) *
## 99) world_rank_Y2015< 176 13 0 TRUE (0.0000000 1.0000000) *
## 25) world_rank_Y2015< 98.5 88 41 FALSE (0.5340909 0.4659091)
## 50) ns_Y2005>=24.3 58 18 FALSE (0.6896552 0.3103448)
## 100) pcp_Y2015< 23.1 8 0 FALSE (1.0000000 0.0000000) *
## 101) pcp_Y2015>=23.1 50 18 FALSE (0.6400000 0.3600000)
## 202) award_Y2005>=6.45 43 12 FALSE (0.7209302 0.2790698) *
## 203) award_Y2005< 6.45 7 1 TRUE (0.1428571 0.8571429) *
## 51) ns_Y2005< 24.3 30 7 TRUE (0.2333333 0.7666667) *
## 13) pcp_Y2005< 17.55 13 2 TRUE (0.1538462 0.8461538) *
## 7) world_rank_Y2005>=178 74 0 TRUE (0.0000000 1.0000000) *
rpart.plot(tree, extra = 2, under = TRUE, varlen=0, faclen=0)
pred <- predict(tree, r, type="class")
confusionMatrix(data=pred, reference=r$improved, positive = "TRUE")
## Confusion Matrix and Statistics
##
## Reference
## Prediction FALSE TRUE
## FALSE 217 12
## TRUE 10 140
##
## Accuracy : 0.942
## 95% CI : (0.9134, 0.9633)
## No Information Rate : 0.5989
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.8789
## Mcnemar's Test P-Value : 0.8312
##
## Sensitivity : 0.9211
## Specificity : 0.9559
## Pos Pred Value : 0.9333
## Neg Pred Value : 0.9476
## Prevalence : 0.4011
## Detection Rate : 0.3694
## Detection Prevalence : 0.3958
## Balanced Accuracy : 0.9385
##
## 'Positive' Class : TRUE
##
Note: Why is this bad, too?
colnames(r)
## [1] "world_rank_Y2005" "national_rank_Y2005" "total_score_Y2005"
## [4] "alumni_Y2005" "award_Y2005" "hici_Y2005"
## [7] "ns_Y2005" "pub_Y2005" "pcp_Y2005"
## [10] "world_rank_Y2015" "national_rank_Y2015" "total_score_Y2015"
## [13] "alumni_Y2015" "award_Y2015" "hici_Y2015"
## [16] "ns_Y2015" "pub_Y2015" "pcp_Y2015"
## [19] "improved"
r$'world_rank_Y2015' <- NULL
tree <- rpart(improved ~ ., data=r)
tree
## n=379 (242 observations deleted due to missingness)
##
## node), split, n, loss, yval, (yprob)
## * denotes terminal node
##
## 1) root 379 152 FALSE (0.59894459 0.40105541)
## 2) pub_Y2015< 28.4 69 6 FALSE (0.91304348 0.08695652) *
## 3) pub_Y2015>=28.4 310 146 FALSE (0.52903226 0.47096774)
## 6) world_rank_Y2005< 127 135 39 FALSE (0.71111111 0.28888889)
## 12) award_Y2015< 11.2 47 3 FALSE (0.93617021 0.06382979) *
## 13) award_Y2015>=11.2 88 36 FALSE (0.59090909 0.40909091)
## 26) ns_Y2015< 17.6 12 0 FALSE (1.00000000 0.00000000) *
## 27) ns_Y2015>=17.6 76 36 FALSE (0.52631579 0.47368421)
## 54) pcp_Y2005>=30.8 32 8 FALSE (0.75000000 0.25000000) *
## 55) pcp_Y2005< 30.8 44 16 TRUE (0.36363636 0.63636364)
## 110) ns_Y2005>=27.2 18 8 FALSE (0.55555556 0.44444444) *
## 111) ns_Y2005< 27.2 26 6 TRUE (0.23076923 0.76923077) *
## 7) world_rank_Y2005>=127 175 68 TRUE (0.38857143 0.61142857)
## 14) pcp_Y2015< 20.15 112 50 FALSE (0.55357143 0.44642857)
## 28) ns_Y2015< 7.55 23 1 FALSE (0.95652174 0.04347826) *
## 29) ns_Y2015>=7.55 89 40 TRUE (0.44943820 0.55056180)
## 58) pub_Y2015< 38.45 64 28 FALSE (0.56250000 0.43750000)
## 116) hici_Y2015< 6.2 17 1 FALSE (0.94117647 0.05882353) *
## 117) hici_Y2015>=6.2 47 20 TRUE (0.42553191 0.57446809)
## 234) hici_Y2005>=14.65 19 5 FALSE (0.73684211 0.26315789) *
## 235) hici_Y2005< 14.65 28 6 TRUE (0.21428571 0.78571429) *
## 59) pub_Y2015>=38.45 25 4 TRUE (0.16000000 0.84000000) *
## 15) pcp_Y2015>=20.15 63 6 TRUE (0.09523810 0.90476190) *
rpart.plot(tree, extra = 2, under = TRUE, varlen=0, faclen=0)
pred <- predict(tree, r, type="class")
confusionMatrix(data=pred, reference=r$improved, positive = "TRUE")
## Confusion Matrix and Statistics
##
## Reference
## Prediction FALSE TRUE
## FALSE 205 32
## TRUE 22 120
##
## Accuracy : 0.8575
## 95% CI : (0.8182, 0.8911)
## No Information Rate : 0.5989
## P-Value [Acc > NIR] : <2e-16
##
## Kappa : 0.7002
## Mcnemar's Test P-Value : 0.2207
##
## Sensitivity : 0.7895
## Specificity : 0.9031
## Pos Pred Value : 0.8451
## Neg Pred Value : 0.8650
## Prevalence : 0.4011
## Detection Rate : 0.3166
## Detection Prevalence : 0.3747
## Balanced Accuracy : 0.8463
##
## 'Positive' Class : TRUE
##
Notes: