Libraries DT (datatable) and plotly are for interactive displays
library("DT")
library("plotly")
## Loading required package: ggplot2
##
## Attaching package: 'plotly'
##
## The following object is masked from 'package:ggplot2':
##
## last_plot
##
## The following object is masked from 'package:graphics':
##
## layout
attainment <- read.csv("educational_attainment_supplementary_data.csv")
head(attainment)
## country_name
## 1 Afghanistan
## 2 Afghanistan
## 3 Afghanistan
## 4 Afghanistan
## 5 Afghanistan
## 6 Afghanistan
## series_name X1985
## 1 Barro-Lee: Average years of primary schooling, age 15+, female 0.33
## 2 Barro-Lee: Average years of primary schooling, age 15+, total 1.03
## 3 Barro-Lee: Average years of primary schooling, age 15-19, female 0.83
## 4 Barro-Lee: Average years of primary schooling, age 15-19, total 2.34
## 5 Barro-Lee: Average years of primary schooling, age 20-24, female 0.54
## 6 Barro-Lee: Average years of primary schooling, age 20-24, total 1.52
## X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999 X2000
## 1 NA NA 0.44 NA NA NA 0.57 NA NA NA NA 0.75
## 2 NA NA 1.26 NA NA NA 1.54 NA NA NA NA 2.01
## 3 NA NA 0.95 NA NA NA 1.26 NA NA NA NA 1.92
## 4 NA NA 2.22 NA NA NA 2.37 NA NA NA NA 3.83
## 5 NA NA 0.92 NA NA NA 0.94 NA NA NA NA 1.26
## 6 NA NA 2.51 NA NA NA 2.27 NA NA NA NA 2.48
## X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009 X2010 X2011 X2012
## 1 NA NA NA NA 0.86 NA NA NA NA 1.27 NA NA
## 2 NA NA NA NA 2.18 NA NA NA NA 2.64 NA NA
## 3 NA NA NA NA 1.01 NA NA NA NA 2.45 NA NA
## 4 NA NA NA NA 2.26 NA NA NA NA 3.55 NA NA
## 5 NA NA NA NA 2.00 NA NA NA NA 1.29 NA NA
## 6 NA NA NA NA 3.93 NA NA NA NA 2.64 NA NA
## X2013 X2015
## 1 NA NA
## 2 NA NA
## 3 NA NA
## 4 NA NA
## 5 NA NA
## 6 NA NA
colnames(attainment)
## [1] "country_name" "series_name" "X1985" "X1986"
## [5] "X1987" "X1990" "X1991" "X1992"
## [9] "X1993" "X1995" "X1996" "X1997"
## [13] "X1998" "X1999" "X2000" "X2001"
## [17] "X2002" "X2003" "X2004" "X2005"
## [21] "X2006" "X2007" "X2008" "X2009"
## [25] "X2010" "X2011" "X2012" "X2013"
## [29] "X2015"
Countries and series?
head(levels(attainment$country_name))
## [1] "" "Afghanistan" "Albania" "Algeria" "Andorra"
## [6] "Argentina"
head(levels(attainment$series_name))
## [1] ""
## [2] "Barro-Lee: Average years of primary schooling, age 15-19, female"
## [3] "Barro-Lee: Average years of primary schooling, age 15-19, total"
## [4] "Barro-Lee: Average years of primary schooling, age 15+, female"
## [5] "Barro-Lee: Average years of primary schooling, age 15+, total"
## [6] "Barro-Lee: Average years of primary schooling, age 20-24, female"
head(attainment[attainment$country_name =="United States",])
## country_name
## 74801 United States
## 74802 United States
## 74803 United States
## 74804 United States
## 74805 United States
## 74806 United States
## series_name
## 74801 Barro-Lee: Average years of primary schooling, age 15+, female
## 74802 Barro-Lee: Average years of primary schooling, age 15+, total
## 74803 Barro-Lee: Average years of primary schooling, age 15-19, female
## 74804 Barro-Lee: Average years of primary schooling, age 15-19, total
## 74805 Barro-Lee: Average years of primary schooling, age 20-24, female
## 74806 Barro-Lee: Average years of primary schooling, age 20-24, total
## X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998
## 74801 5.91 NA NA 5.92 NA NA NA 5.94 NA NA NA
## 74802 5.86 NA NA 5.84 NA NA NA 5.94 NA NA NA
## 74803 5.94 NA NA 5.92 NA NA NA 5.99 NA NA NA
## 74804 5.94 NA NA 5.91 NA NA NA 5.98 NA NA NA
## 74805 5.96 NA NA 5.97 NA NA NA 5.97 NA NA NA
## 74806 5.91 NA NA 5.92 NA NA NA 5.96 NA NA NA
## X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 74801 NA 5.95 NA NA NA NA 5.95 NA NA NA NA
## 74802 NA 5.95 NA NA NA NA 5.95 NA NA NA NA
## 74803 NA 5.99 NA NA NA NA 6.00 NA NA NA NA
## 74804 NA 5.99 NA NA NA NA 5.99 NA NA NA NA
## 74805 NA 5.97 NA NA NA NA 5.98 NA NA NA NA
## 74806 NA 5.97 NA NA NA NA 5.98 NA NA NA NA
## X2010 X2011 X2012 X2013 X2015
## 74801 5.96 NA NA NA NA
## 74802 5.96 NA NA NA NA
## 74803 5.98 NA NA NA NA
## 74804 5.98 NA NA NA NA
## 74805 5.99 NA NA NA NA
## 74806 5.99 NA NA NA NA
College age population
head(attainment[attainment$series_name =="Barro-Lee: Population in thousands, age 20-24, total",])
## country_name series_name
## 336 Afghanistan Barro-Lee: Population in thousands, age 20-24, total
## 761 Albania Barro-Lee: Population in thousands, age 20-24, total
## 1186 Algeria Barro-Lee: Population in thousands, age 20-24, total
## 1611 Andorra Barro-Lee: Population in thousands, age 20-24, total
## 2036 Argentina Barro-Lee: Population in thousands, age 20-24, total
## 2461 Armenia Barro-Lee: Population in thousands, age 20-24, total
## X1985 X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998
## 336 1182 NA NA 1243 NA NA NA 1753 NA NA NA
## 761 300 NA NA 316 NA NA NA 257 NA NA NA
## 1186 2043 NA NA 2390 NA NA NA 2737 NA NA NA
## 1611 NA NA NA NA NA NA NA NA NA NA NA
## 2036 2325 NA NA 2467 NA NA NA 2797 NA NA NA
## 2461 346 NA NA 283 NA NA NA 217 NA NA NA
## X1999 X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 336 NA 2009 NA NA NA NA 2586 NA NA NA NA
## 761 NA 230 NA NA NA NA 270 NA NA NA NA
## 1186 NA 3170 NA NA NA NA 3676 NA NA NA NA
## 1611 NA NA NA NA NA NA NA NA NA NA NA
## 2036 NA 3301 NA NA NA NA 3234 NA NA NA NA
## 2461 NA 240 NA NA NA NA 282 NA NA NA NA
## X2010 X2011 X2012 X2013 X2015
## 336 3162 NA NA NA NA
## 761 296 NA NA NA NA
## 1186 3699 NA NA NA NA
## 1611 NA NA NA NA NA
## 2036 3364 NA NA NA NA
## 2461 307 NA NA NA NA
age <- attainment[attainment$series_name =="Barro-Lee: Population in thousands, age 20-24, total",]
age2 <- apply(age[,-(1:2)], MARGIN =1, mean, na.rm = TRUE)
age <- data.frame(country=age$country_name, college_age_pop = age2)
datatable(age) %>% formatRound(2,2)
Years in school might be an indicator of education level in a country
head(attainment[attainment$series_name =="UIS: Mean years of schooling of the population age 25+. Total",])
## country_name
## 363 Afghanistan
## 788 Albania
## 1213 Algeria
## 1638 Andorra
## 2063 Argentina
## 2488 Armenia
## series_name X1985
## 363 UIS: Mean years of schooling of the population age 25+. Total NA
## 788 UIS: Mean years of schooling of the population age 25+. Total NA
## 1213 UIS: Mean years of schooling of the population age 25+. Total NA
## 1638 UIS: Mean years of schooling of the population age 25+. Total NA
## 2063 UIS: Mean years of schooling of the population age 25+. Total NA
## 2488 UIS: Mean years of schooling of the population age 25+. Total NA
## X1986 X1987 X1990 X1991 X1992 X1993 X1995 X1996 X1997 X1998 X1999
## 363 NA NA NA NA NA NA NA NA NA NA NA
## 788 NA NA NA NA NA NA NA NA NA NA NA
## 1213 NA NA NA NA NA NA NA NA NA NA NA
## 1638 NA NA NA NA NA NA NA NA NA NA NA
## 2063 NA NA NA NA NA NA NA NA NA NA NA
## 2488 NA NA NA NA NA NA NA NA NA NA NA
## X2000 X2001 X2002 X2003 X2004 X2005 X2006 X2007 X2008 X2009
## 363 NA NA NA NA NA NA NA NA NA NA
## 788 NA 8.60585 NA NA NA NA NA NA 8.98886 NA
## 1213 NA NA NA NA NA NA NA NA NA NA
## 1638 NA NA NA NA NA NA NA NA NA NA
## 2063 NA 8.68699 NA 9.7898 NA NA NA NA NA NA
## 2488 NA 10.27363 NA NA NA NA NA NA NA NA
## X2010 X2011 X2012 X2013 X2015
## 363 NA NA NA NA NA
## 788 NA 9.25558 NA NA NA
## 1213 NA NA NA NA NA
## 1638 NA NA NA NA NA
## 2063 NA NA NA NA NA
## 2488 NA NA NA NA NA
years <- attainment[attainment$series_name =="UIS: Mean years of schooling of the population age 25+. Total",]
matplot(t(years[,-(1:2)]), type = "l",)
years2 <- apply(years[,-(1:2)], MARGIN =1, mean, na.rm = TRUE)
years <- data.frame(country=years$country_name, years_of_education = years2)
merged <- cbind(years, college_age_pop = age[,2])
datatable(merged) %>% formatRound(2:3,2)
expenditure <- read.csv("education_expenditure_supplementary_data.csv")
summary(expenditure)
## country institute_type
## Australia: 9 All Institutions :111
## Austria : 9 Elementary and Secondary Institutions :111
## Belgium : 9 Higher Education Institutions :111
## Brazil : 9
## Canada : 9
## Chile : 9
## (Other) :279
## direct_expenditure_type X1995 X2000 X2005
## Private:111 Min. :0.100 Min. :0.500 Min. :0.300
## Public :111 1st Qu.:1.100 1st Qu.:1.200 1st Qu.:1.200
## Total :111 Median :3.400 Median :3.300 Median :3.450
## Mean :3.106 Mean :3.063 Mean :3.142
## 3rd Qu.:4.500 3rd Qu.:4.350 3rd Qu.:4.300
## Max. :6.800 Max. :6.600 Max. :7.200
## NA's :239 NA's :234 NA's :227
## X2009 X2010 X2011
## Min. :0.300 Min. :0.500 Min. :0.000
## 1st Qu.:1.350 1st Qu.:1.300 1st Qu.:0.700
## Median :3.700 Median :3.600 Median :1.750
## Mean :3.388 Mean :3.392 Mean :2.573
## 3rd Qu.:4.850 3rd Qu.:4.800 3rd Qu.:4.300
## Max. :7.500 Max. :7.600 Max. :7.900
## NA's :230 NA's :232 NA's :51
head(expenditure)
## country institute_type direct_expenditure_type X1995 X2000 X2005
## 1 OECD Average All Institutions Public 4.9 4.9 5.0
## 2 Australia All Institutions Public 4.5 4.6 4.3
## 3 Austria All Institutions Public 5.3 5.4 5.2
## 4 Belgium All Institutions Public 5.0 5.1 5.8
## 5 Canada All Institutions Public 5.8 5.2 4.8
## 6 Chile All Institutions Public NA 4.2 3.3
## X2009 X2010 X2011
## 1 5.4 5.4 5.3
## 2 4.5 4.6 4.3
## 3 5.7 5.6 5.5
## 4 6.4 6.4 6.4
## 5 5.0 5.2 NA
## 6 4.1 4.3 3.9
expend <- apply(expenditure[4:9], MARGIN = 1, mean, na.rm = TRUE)
expend <- aggregate(expend, by = list(country = expenditure$country), sum, na.rm = TRUE)
colnames(expend)[2] <- "expenditure"
datatable(expend) %>% formatRound(2,2)
(round first for display)
merged <- cbind(country = merged[,1], round(merged[,-1], 2))
plot_ly(merged, x = expenditure, y = years_of_education,
text = country,
mode = "text+markers", textposition = "top middle")
plot_ly(merged, x = expenditure, y = years_of_education,
text = country, size = college_age_pop,
mode = "markers")