set.seed(1234)
library("forecast")

Create sine-wave time series (no trend)

sin_ts <- function() ts(sin(seq(0, 10*pi, length.out = 500)), frequency = 500/5)

y <- sin_ts()
plot(y)

Autocorrelation function is the correlation of a signal with a delayed copy of itself.

acf(y)

Partial autocorrelation function controls for values of the time series for all shorter lags.

pacf(y)

Estimate an autroregressive model

ar <- ar(y, method="ols")
## Warning in ar.ols(x, aic = aic, order.max = order.max, na.action =
## na.action, : model order: 3 singularities in the computation of the
## projection matrix results are only valid up to model order 2
ar
## 
## Call:
## ar(x = y, method = "ols")
## 
## Coefficients:
##      1       2  
##  1.996  -1.000  
## 
## Intercept: 2.944e-17 (5.67e-16) 
## 
## Order selected 2  sigma^2 estimated as  1.601e-28

predict and plot the next 100 values

plot(forecast(ar, 1000))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.

ARIMA(p, d, q). ARIMA contains three models AR, I (integrated) and MA (moving average) and has three parameters: * AR’s order p, * I’s degree of differencing d, and * MA window width q.

ARIMA(1,0,0) - First order model

ar <- arima(y, c(1,0,0))
ar
## 
## Call:
## arima(x = y, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.9968     0.0000
## s.e.  0.0024     0.4183
## 
## sigma^2 estimated as 0.001976:  log likelihood = 844.67,  aic = -1683.35
plot(forecast(ar, 1000))

ARIMA(2,0,0) - Second order

ar <- arima(y, c(2,0,0))
## Warning in log(s2): NaNs produced

## Warning in log(s2): NaNs produced

## Warning in log(s2): NaNs produced

## Warning in log(s2): NaNs produced
ar
## 
## Call:
## arima(x = y, order = c(2, 0, 0))
## 
## Coefficients:
##         ar1  ar2  intercept
##       1.996   -1          0
## s.e.  0.000    0          0
## 
## sigma^2 estimated as 8.615e-15:  log likelihood = 7385.84,  aic = -14763.67
plot(forecast(ar, 1000))

ARIMA(0,1,0) - Random Walk

ar <- arima(y, c(0,1,0))
ar
## 
## Call:
## arima(x = y, order = c(0, 1, 0))
## 
## 
## sigma^2 estimated as 0.001981:  log likelihood = 844.85,  aic = -1687.71
plot(forecast(ar, 1000))

Sine wave with noise

add_noise <- function(x, sd=.1) {
  x + rnorm(length(x), sd=sd)
}

y <- sin_ts()
y <- add_noise(y)
plot(y)

acf(y)

pacf(y)

ar <- ar(y, method="ols")
ar
## 
## Call:
## ar(x = y, method = "ols")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.2617   0.1322   0.1270   0.1629   0.0574   0.0086   0.1726   0.0729  
##       9       10       11       12       13       14       15       16  
##  0.0144   0.0332   0.0981   0.0092   0.0110  -0.0169  -0.1069   0.0385  
##      17       18       19       20       21       22       23       24  
## -0.0061   0.0648  -0.0205  -0.0777  -0.0541  -0.0477  -0.0247  -0.0997  
## 
## Intercept: 0.0001015 (0.005028) 
## 
## Order selected 24  sigma^2 estimated as  0.01199
plot(forecast(ar, 200))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.

Use ARIMA with a moving average of 10 to even out noise. We can even reduce order because of the averaging!

ar <- arima(y, c(2, 0, 10))
ar
## 
## Call:
## arima(x = y, order = c(2, 0, 10))
## 
## Coefficients:
##         ar1  ar2      ma1     ma2      ma3     ma4      ma5     ma6
##       1.996   -1  -1.9437  0.9788  -0.0930  0.0877  -0.0690  0.0006
## s.e.  0.000    0   0.0727  0.1816   0.1484  0.1121   0.1047  0.1176
##          ma7      ma8     ma9     ma10  intercept
##       0.1956  -0.2767  0.1568  -0.0310     0.0023
## s.e.  0.1206   0.1150  0.1088   0.0501     0.0074
## 
## sigma^2 estimated as 0.01074:  log likelihood = 416.98,  aic = -805.96
plot(forecast(ar, 200))

Sine wave with noise and trend

add_trend <- function(x, mu = .01, sd =.0) {
  x + cumsum(rnorm(length(x), sd=sd, mu))
}

y <- sin_ts()
y <- add_noise(y)
y <- add_trend(y)
plot(y)

acf(y)

pacf(y)

Note: The increasing trend creates inflated auto-correlation!

ar <- ar(y, method="ols")
ar
## 
## Call:
## ar(x = y, method = "ols")
## 
## Coefficients:
##       1        2        3        4        5        6        7        8  
##  0.2680   0.2523   0.2887   0.1821   0.1773   0.0894   0.0615   0.0348  
##       9       10       11       12       13       14       15       16  
##  0.0609   0.0258  -0.0396  -0.0761  -0.0728  -0.0994  -0.0909  -0.1251  
##      17       18       19       20       21       22       23       24  
## -0.1049   0.0228  -0.0410   0.0039  -0.0056   0.0192  -0.0267   0.0094  
##      25       26  
##  0.1069   0.0759  
## 
## Intercept: 0.007735 (0.005196) 
## 
## Order selected 26  sigma^2 estimated as  0.01226
plot(forecast(ar, 200))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
##   Use c() or as.vector() instead.

Does not work! Time series needs to be stationary! Remove trend first. E.g., by seasonal decomposition

y_dec <- decompose(y)
plot(y_dec)

str(y_dec)
## List of 6
##  $ x       : Time-Series [1:500] from 1 to 5.99: 0.1085 -0.0396 0.2266 0.2168 0.4774 ...
##  $ seasonal: Time-Series [1:500] from 1 to 5.99: -0.037 0.1096 0.0804 0.2769 0.2667 ...
##  $ trend   : Time-Series [1:500] from 1 to 5.99: NA NA NA NA NA NA NA NA NA NA ...
##  $ random  : Time-Series [1:500] from 1 to 5.99: NA NA NA NA NA NA NA NA NA NA ...
##  $ figure  : num [1:100] -0.037 0.1096 0.0804 0.2769 0.2667 ...
##  $ type    : chr "additive"
##  - attr(*, "class")= chr "decomposed.ts"
plot(y_dec$seasonal)

The seasonal component can be modeled using AR

Use ARIMA for differencing. We use degree of differencing of 1 and MA with 3

How large should d be?

ndiffs(y)
## [1] 1
ar <- arima(y, c(10, 1, 10))
## Warning in arima(y, c(10, 1, 10)): possible convergence problem: optim gave
## code = 1
plot(forecast(ar, 400))

Note: This should work, but there seems to be an issue with arima. Do the differencing manually

ar <- arima(diff(y), c(10, 0, 10))
plot(forecast(ar, 400))

Alternative: Use double exponetial smoothing (Holt-Winters)

hw <- HoltWinters(y)
plot(hw$fitted)

plot(forecast(hw, 400))

Get some real data from Yahoo finance (package quantmod)

library(quantmod)
## Loading required package: xts
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
getSymbols("AAPL")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
## 
## This message is shown once per session and may be disabled by setting 
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
## 
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
## 
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "AAPL"

Plot closing prices

apple <- AAPL[,4]
plot(apple)

Add 90/365 day moving average

plot(apple)

lines(rollmean(apple, k=90), col="red", lwd=2)

lines(rollmean(apple, k=365), col="blue", lwd=2)

seasonal decomposition

days_per_year <- ndays(AAPL["2014"])
plot(decompose(ts(apple, frequency=days_per_year)))

Note that the seasonal signal is much smaller than the random noise! Use double exponential smoothing

hw <- HoltWinters(ts(apple, frequency=days_per_year))
plot(hw$fitted)

plot(forecast(hw, 500))

Note that again all the volatility is the level! Use ARIMA (starting February 2013). Since stocks follow a random walk with a trend. We model no autoregression (p=0), twice differencing d=2 and a MA over 5 days.

ar <- arima(apple["2013-02-01::"], c(0, 2, 5))
plot(forecast(ar, 500))