1. Create x by x <- runif(100). Write a function with the name avg_gt with two formal arguments: a vector x and a value gt. The functions computes the average of the values greater than gt in x. Write a version with a loop and if and one version without loops and if statements.

The R way: Select as subset and apply mean.

avg_gt <- function(x, gt) {
  mean(x[x > gt])
}

Using a loop that iterates over the vector (don’t do this, it is very slow)

avg_gt_loop <- function(x, gt) {
  c <- 0 # counter
  s <- 0 # sum
  for(y in x) {
    if(y > gt) { 
      s <- s + y
      c <- c + 1
    }
  }
  
  s/c
}  

Using a loop that uses an index to iterate over the vector (don’t do this, it is even slower)

avg_gt_loop_index <- function(x, gt) {
  c <- 0 # counter
  s <- 0 # sum
  for(i in seq_len(length(x))) {
    if(x[i] > gt) { 
      s <- s + x[i]
      c <- c + 1
    }
  }
  
  s/c
}  


x <- runif(100)

with gt = 0.5, we should get something like .75.

avg_gt(x, .5)
## [1] 0.7239619
avg_gt_loop(x, .5)
## [1] 0.7239619
avg_gt_loop_index(x, .5)
## [1] 0.7239619

How fast are the implementations on a larger set of data?

library(microbenchmark)
x <- runif(10000)

rbind(
  microbenchmark(avg_gt(x, .5)),
  microbenchmark(avg_gt_loop(x, .5)),
  microbenchmark(avg_gt_loop_index(x, .5))
)
## Unit: microseconds
##                       expr     min       lq     mean   median        uq
##             avg_gt(x, 0.5) 110.095 169.7895 243.6671 185.9820  227.0440
##        avg_gt_loop(x, 0.5) 501.070 539.6445 593.0590 589.0825  624.6785
##  avg_gt_loop_index(x, 0.5) 678.321 726.8700 902.9252 814.3950 1059.0055
##       max neval cld
##  4771.967   100 a  
##   827.667   100  b 
##  1502.856   100   c

2. Create a list with 5 numeric vectors (lengths and values of your choice). Sort all vectors in the list. Hint: see sort().

x <- list(
  a = 1:10,
  b = runif(n = 5),
  c = sample(seq_len(1000), 7),
  d = 5:1,
  e = numeric(0)
)

x
## $a
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $b
## [1] 0.4886365 0.1953019 0.9646642 0.9399901 0.3711698
## 
## $c
## [1] 189 928 195 291 424 183 705
## 
## $d
## [1] 5 4 3 2 1
## 
## $e
## numeric(0)
str(x)
## List of 5
##  $ a: int [1:10] 1 2 3 4 5 6 7 8 9 10
##  $ b: num [1:5] 0.489 0.195 0.965 0.94 0.371
##  $ c: int [1:7] 189 928 195 291 424 183 705
##  $ d: int [1:5] 5 4 3 2 1
##  $ e: num(0)

Note: R creates/stores sequences internally as integers.

Apply sort to each element of the list

lapply(x, sort)
## $a
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## $b
## [1] 0.1953019 0.3711698 0.4886365 0.9399901 0.9646642
## 
## $c
## [1] 183 189 195 291 424 705 928
## 
## $d
## [1] 1 2 3 4 5
## 
## $e
## numeric(0)
lapply(x, sort, decreasing = TRUE) # decreasing is passed on to sort
## $a
##  [1] 10  9  8  7  6  5  4  3  2  1
## 
## $b
## [1] 0.9646642 0.9399901 0.4886365 0.3711698 0.1953019
## 
## $c
## [1] 928 705 424 291 195 189 183
## 
## $d
## [1] 5 4 3 2 1
## 
## $e
## numeric(0)

3. Write a function that computes the smallest value in each column of a given matrix. Create a random 5 x 5 matrix to test the function.

x <- matrix(runif(n = 5*5), nrow = 5, ncol = 5)
x
##           [,1]      [,2]      [,3]      [,4]       [,5]
## [1,] 0.1609560 0.2816432 0.5304434 0.5775389 0.06985085
## [2,] 0.6830547 0.4614342 0.3029954 0.4557990 0.40016273
## [3,] 0.8151331 0.9726291 0.7100540 0.4874506 0.54477220
## [4,] 0.9955362 0.4775851 0.1104582 0.7952480 0.58517515
## [5,] 0.2751434 0.7119835 0.8565724 0.6428982 0.22446163

Apply the minimum function to columns (MARGIN = 2)

apply(x, MARGIN = 2, min)
## [1] 0.16095602 0.28164325 0.11045822 0.45579898 0.06985085