Libraries DT (datatable) and plotly are for interactive displays

library("DT")
library("plotly")
## Loading required package: ggplot2
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:graphics':
## 
##     layout

Attainment

attainment <- read.csv("educational_attainment_supplementary_data.csv")
head(attainment)
##   country_name
## 1  Afghanistan
## 2  Afghanistan
## 3  Afghanistan
## 4  Afghanistan
## 5  Afghanistan
## 6  Afghanistan
##                                                        series_name X1985
## 1   Barro-Lee: Average years of primary schooling, age 15+, female  0.33
## 2    Barro-Lee: Average years of primary schooling, age 15+, total  1.03
## 3 Barro-Lee: Average years of primary schooling, age 15-19, female  0.83
## 4  Barro-Lee: Average years of primary schooling, age 15-19, total  2.34
## 5 Barro-Lee: Average years of primary schooling, age 20-24, female  0.54
## 6  Barro-Lee: Average years of primary schooling, age 20-24, total  1.52
##   X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999 X2000
## 1    NA    NA  0.44    NA    NA    NA  0.57    NA    NA    NA    NA  0.75
## 2    NA    NA  1.26    NA    NA    NA  1.54    NA    NA    NA    NA  2.01
## 3    NA    NA  0.95    NA    NA    NA  1.26    NA    NA    NA    NA  1.92
## 4    NA    NA  2.22    NA    NA    NA  2.37    NA    NA    NA    NA  3.83
## 5    NA    NA  0.92    NA    NA    NA  0.94    NA    NA    NA    NA  1.26
## 6    NA    NA  2.51    NA    NA    NA  2.27    NA    NA    NA    NA  2.48
##   X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012
## 1    NA    NA    NA    NA  0.86    NA    NA    NA    NA  1.27    NA    NA
## 2    NA    NA    NA    NA  2.18    NA    NA    NA    NA  2.64    NA    NA
## 3    NA    NA    NA    NA  1.01    NA    NA    NA    NA  2.45    NA    NA
## 4    NA    NA    NA    NA  2.26    NA    NA    NA    NA  3.55    NA    NA
## 5    NA    NA    NA    NA  2.00    NA    NA    NA    NA  1.29    NA    NA
## 6    NA    NA    NA    NA  3.93    NA    NA    NA    NA  2.64    NA    NA
##   X2013 X2015
## 1    NA    NA
## 2    NA    NA
## 3    NA    NA
## 4    NA    NA
## 5    NA    NA
## 6    NA    NA
colnames(attainment)
##  [1] "country_name" "series_name"  "X1985"        "X1986"       
##  [5] "X1987"        "X1990"        "X1991"        "X1992"       
##  [9] "X1993"        "X1995"        "X1996"        "X1997"       
## [13] "X1998"        "X1999"        "X2000"        "X2001"       
## [17] "X2002"        "X2003"        "X2004"        "X2005"       
## [21] "X2006"        "X2007"        "X2008"        "X2009"       
## [25] "X2010"        "X2011"        "X2012"        "X2013"       
## [29] "X2015"

Countries and series?

head(levels(attainment$country_name))
## [1] ""            "Afghanistan" "Albania"     "Algeria"     "Andorra"    
## [6] "Argentina"
head(levels(attainment$series_name))
## [1] ""                                                                
## [2] "Barro-Lee: Average years of primary schooling, age 15-19, female"
## [3] "Barro-Lee: Average years of primary schooling, age 15-19, total" 
## [4] "Barro-Lee: Average years of primary schooling, age 15+, female"  
## [5] "Barro-Lee: Average years of primary schooling, age 15+, total"   
## [6] "Barro-Lee: Average years of primary schooling, age 20-24, female"
head(attainment[attainment$country_name =="United States",])
##        country_name
## 74801 United States
## 74802 United States
## 74803 United States
## 74804 United States
## 74805 United States
## 74806 United States
##                                                            series_name
## 74801   Barro-Lee: Average years of primary schooling, age 15+, female
## 74802    Barro-Lee: Average years of primary schooling, age 15+, total
## 74803 Barro-Lee: Average years of primary schooling, age 15-19, female
## 74804  Barro-Lee: Average years of primary schooling, age 15-19, total
## 74805 Barro-Lee: Average years of primary schooling, age 20-24, female
## 74806  Barro-Lee: Average years of primary schooling, age 20-24, total
##       X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998
## 74801  5.91    NA    NA  5.92    NA    NA    NA  5.94    NA    NA    NA
## 74802  5.86    NA    NA  5.84    NA    NA    NA  5.94    NA    NA    NA
## 74803  5.94    NA    NA  5.92    NA    NA    NA  5.99    NA    NA    NA
## 74804  5.94    NA    NA  5.91    NA    NA    NA  5.98    NA    NA    NA
## 74805  5.96    NA    NA  5.97    NA    NA    NA  5.97    NA    NA    NA
## 74806  5.91    NA    NA  5.92    NA    NA    NA  5.96    NA    NA    NA
##       X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 74801    NA  5.95    NA    NA    NA    NA  5.95    NA    NA    NA    NA
## 74802    NA  5.95    NA    NA    NA    NA  5.95    NA    NA    NA    NA
## 74803    NA  5.99    NA    NA    NA    NA  6.00    NA    NA    NA    NA
## 74804    NA  5.99    NA    NA    NA    NA  5.99    NA    NA    NA    NA
## 74805    NA  5.97    NA    NA    NA    NA  5.98    NA    NA    NA    NA
## 74806    NA  5.97    NA    NA    NA    NA  5.98    NA    NA    NA    NA
##       X2010 X2011 X2012 X2013 X2015
## 74801  5.96    NA    NA    NA    NA
## 74802  5.96    NA    NA    NA    NA
## 74803  5.98    NA    NA    NA    NA
## 74804  5.98    NA    NA    NA    NA
## 74805  5.99    NA    NA    NA    NA
## 74806  5.99    NA    NA    NA    NA

College age population

head(attainment[attainment$series_name =="Barro-Lee: Population in thousands, age 20-24, total",])
##      country_name                                          series_name
## 336   Afghanistan Barro-Lee: Population in thousands, age 20-24, total
## 761       Albania Barro-Lee: Population in thousands, age 20-24, total
## 1186      Algeria Barro-Lee: Population in thousands, age 20-24, total
## 1611      Andorra Barro-Lee: Population in thousands, age 20-24, total
## 2036    Argentina Barro-Lee: Population in thousands, age 20-24, total
## 2461      Armenia Barro-Lee: Population in thousands, age 20-24, total
##      X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998
## 336   1182    NA    NA  1243    NA    NA    NA  1753    NA    NA    NA
## 761    300    NA    NA   316    NA    NA    NA   257    NA    NA    NA
## 1186  2043    NA    NA  2390    NA    NA    NA  2737    NA    NA    NA
## 1611    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2036  2325    NA    NA  2467    NA    NA    NA  2797    NA    NA    NA
## 2461   346    NA    NA   283    NA    NA    NA   217    NA    NA    NA
##      X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 336     NA  2009    NA    NA    NA    NA  2586    NA    NA    NA    NA
## 761     NA   230    NA    NA    NA    NA   270    NA    NA    NA    NA
## 1186    NA  3170    NA    NA    NA    NA  3676    NA    NA    NA    NA
## 1611    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2036    NA  3301    NA    NA    NA    NA  3234    NA    NA    NA    NA
## 2461    NA   240    NA    NA    NA    NA   282    NA    NA    NA    NA
##      X2010 X2011 X2012 X2013 X2015
## 336   3162    NA    NA    NA    NA
## 761    296    NA    NA    NA    NA
## 1186  3699    NA    NA    NA    NA
## 1611    NA    NA    NA    NA    NA
## 2036  3364    NA    NA    NA    NA
## 2461   307    NA    NA    NA    NA
age <- attainment[attainment$series_name =="Barro-Lee: Population in thousands, age 20-24, total",]

age2 <- apply(age[,-(1:2)], MARGIN =1, mean, na.rm = TRUE)
age <- data.frame(country=age$country_name, college_age_pop = age2)

datatable(age) %>% formatRound(2,2)

Years in school might be an indicator of education level in a country

head(attainment[attainment$series_name =="UIS: Mean years of schooling of the population age 25+. Total",])
##      country_name
## 363   Afghanistan
## 788       Albania
## 1213      Algeria
## 1638      Andorra
## 2063    Argentina
## 2488      Armenia
##                                                        series_name X1985
## 363  UIS: Mean years of schooling of the population age 25+. Total    NA
## 788  UIS: Mean years of schooling of the population age 25+. Total    NA
## 1213 UIS: Mean years of schooling of the population age 25+. Total    NA
## 1638 UIS: Mean years of schooling of the population age 25+. Total    NA
## 2063 UIS: Mean years of schooling of the population age 25+. Total    NA
## 2488 UIS: Mean years of schooling of the population age 25+. Total    NA
##      X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999
## 363     NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 788     NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 1213    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 1638    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2063    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
## 2488    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA    NA
##      X2000    X2001 X2002  X2003 X2004 X2005 X2006 X2007   X2008 X2009
## 363     NA       NA    NA     NA    NA    NA    NA    NA      NA    NA
## 788     NA  8.60585    NA     NA    NA    NA    NA    NA 8.98886    NA
## 1213    NA       NA    NA     NA    NA    NA    NA    NA      NA    NA
## 1638    NA       NA    NA     NA    NA    NA    NA    NA      NA    NA
## 2063    NA  8.68699    NA 9.7898    NA    NA    NA    NA      NA    NA
## 2488    NA 10.27363    NA     NA    NA    NA    NA    NA      NA    NA
##      X2010   X2011 X2012 X2013 X2015
## 363     NA      NA    NA    NA    NA
## 788     NA 9.25558    NA    NA    NA
## 1213    NA      NA    NA    NA    NA
## 1638    NA      NA    NA    NA    NA
## 2063    NA      NA    NA    NA    NA
## 2488    NA      NA    NA    NA    NA
years <- attainment[attainment$series_name =="UIS: Mean years of schooling of the population age 25+. Total",]

matplot(t(years[,-(1:2)]), type = "l",)

years2 <- apply(years[,-(1:2)], MARGIN =1, mean, na.rm = TRUE)
years <- data.frame(country=years$country_name, years_of_education = years2)

merged <- cbind(years, college_age_pop = age[,2])

datatable(merged) %>% formatRound(2:3,2)

Expenditure

expenditure <- read.csv("education_expenditure_supplementary_data.csv")
summary(expenditure)
##       country                                   institute_type
##  Australia:  9   All Institutions                      :111   
##  Austria  :  9   Elementary and Secondary Institutions :111   
##  Belgium  :  9   Higher Education Institutions         :111   
##    Brazil :  9                                                
##  Canada   :  9                                                
##  Chile    :  9                                                
##  (Other)  :279                                                
##  direct_expenditure_type     X1995           X2000           X2005      
##  Private:111             Min.   :0.100   Min.   :0.500   Min.   :0.300  
##  Public :111             1st Qu.:1.100   1st Qu.:1.200   1st Qu.:1.200  
##  Total  :111             Median :3.400   Median :3.300   Median :3.450  
##                          Mean   :3.106   Mean   :3.063   Mean   :3.142  
##                          3rd Qu.:4.500   3rd Qu.:4.350   3rd Qu.:4.300  
##                          Max.   :6.800   Max.   :6.600   Max.   :7.200  
##                          NA's   :239     NA's   :234     NA's   :227    
##      X2009           X2010           X2011      
##  Min.   :0.300   Min.   :0.500   Min.   :0.000  
##  1st Qu.:1.350   1st Qu.:1.300   1st Qu.:0.700  
##  Median :3.700   Median :3.600   Median :1.750  
##  Mean   :3.388   Mean   :3.392   Mean   :2.573  
##  3rd Qu.:4.850   3rd Qu.:4.800   3rd Qu.:4.300  
##  Max.   :7.500   Max.   :7.600   Max.   :7.900  
##  NA's   :230     NA's   :232     NA's   :51
head(expenditure)
##        country    institute_type direct_expenditure_type X1995 X2000 X2005
## 1 OECD Average All Institutions                   Public   4.9   4.9   5.0
## 2    Australia All Institutions                   Public   4.5   4.6   4.3
## 3      Austria All Institutions                   Public   5.3   5.4   5.2
## 4      Belgium All Institutions                   Public   5.0   5.1   5.8
## 5       Canada All Institutions                   Public   5.8   5.2   4.8
## 6        Chile All Institutions                   Public    NA   4.2   3.3
##   X2009 X2010 X2011
## 1   5.4   5.4   5.3
## 2   4.5   4.6   4.3
## 3   5.7   5.6   5.5
## 4   6.4   6.4   6.4
## 5   5.0   5.2    NA
## 6   4.1   4.3   3.9
expend <- apply(expenditure[4:9], MARGIN = 1, mean, na.rm = TRUE)
expend <- aggregate(expend, by = list(country = expenditure$country), sum, na.rm = TRUE)
colnames(expend)[2] <- "expenditure"

datatable(expend) %>% formatRound(2,2)

Interactive plots

(round first for display)

merged <- cbind(country = merged[,1], round(merged[,-1], 2))
plot_ly(merged, x = expenditure, y = years_of_education,
  text = country,
  mode = "text+markers", textposition = "top middle")

plot_ly(merged, x = expenditure, y = years_of_education,
  text = country, size = college_age_pop,
  mode = "markers")