set.seed(1234)
library("forecast")
sin_ts <- function() ts(sin(seq(0, 10*pi, length.out = 500)), frequency = 500/5)
y <- sin_ts()
plot(y)
Autocorrelation function is the correlation of a signal with a delayed copy of itself.
acf(y)
Partial autocorrelation function controls for values of the time series for all shorter lags.
pacf(y)
Estimate an autroregressive model
ar <- ar(y, method="ols")
## Warning in ar.ols(x, aic = aic, order.max = order.max, na.action =
## na.action, : model order: 3 singularities in the computation of the
## projection matrix results are only valid up to model order 2
ar
##
## Call:
## ar(x = y, method = "ols")
##
## Coefficients:
## 1 2
## 1.996 -1.000
##
## Intercept: 2.944e-17 (5.67e-16)
##
## Order selected 2 sigma^2 estimated as 1.601e-28
predict and plot the next 100 values
plot(forecast(ar, 1000))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
ARIMA(p, d, q). ARIMA contains three models AR, I (integrated) and MA (moving average) and has three parameters: * AR’s order p, * I’s degree of differencing d, and * MA window width q.
ARIMA(1,0,0) - First order model
ar <- arima(y, c(1,0,0))
ar
##
## Call:
## arima(x = y, order = c(1, 0, 0))
##
## Coefficients:
## ar1 intercept
## 0.9968 0.0000
## s.e. 0.0024 0.4183
##
## sigma^2 estimated as 0.001976: log likelihood = 844.67, aic = -1683.35
plot(forecast(ar, 1000))
ARIMA(2,0,0) - Second order
ar <- arima(y, c(2,0,0))
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
## Warning in log(s2): NaNs produced
ar
##
## Call:
## arima(x = y, order = c(2, 0, 0))
##
## Coefficients:
## ar1 ar2 intercept
## 1.996 -1 0
## s.e. 0.000 0 0
##
## sigma^2 estimated as 8.615e-15: log likelihood = 7385.84, aic = -14763.67
plot(forecast(ar, 1000))
ARIMA(0,1,0) - Random Walk
ar <- arima(y, c(0,1,0))
ar
##
## Call:
## arima(x = y, order = c(0, 1, 0))
##
##
## sigma^2 estimated as 0.001981: log likelihood = 844.85, aic = -1687.71
plot(forecast(ar, 1000))
add_noise <- function(x, sd=.1) {
x + rnorm(length(x), sd=sd)
}
y <- sin_ts()
y <- add_noise(y)
plot(y)
acf(y)
pacf(y)
ar <- ar(y, method="ols")
ar
##
## Call:
## ar(x = y, method = "ols")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.2617 0.1322 0.1270 0.1629 0.0574 0.0086 0.1726 0.0729
## 9 10 11 12 13 14 15 16
## 0.0144 0.0332 0.0981 0.0092 0.0110 -0.0169 -0.1069 0.0385
## 17 18 19 20 21 22 23 24
## -0.0061 0.0648 -0.0205 -0.0777 -0.0541 -0.0477 -0.0247 -0.0997
##
## Intercept: 0.0001015 (0.005028)
##
## Order selected 24 sigma^2 estimated as 0.01199
plot(forecast(ar, 200))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
Use ARIMA with a moving average of 10 to even out noise. We can even reduce order because of the averaging!
ar <- arima(y, c(2, 0, 10))
ar
##
## Call:
## arima(x = y, order = c(2, 0, 10))
##
## Coefficients:
## ar1 ar2 ma1 ma2 ma3 ma4 ma5 ma6
## 1.996 -1 -1.9437 0.9788 -0.0930 0.0877 -0.0690 0.0006
## s.e. 0.000 0 0.0727 0.1816 0.1484 0.1121 0.1047 0.1176
## ma7 ma8 ma9 ma10 intercept
## 0.1956 -0.2767 0.1568 -0.0310 0.0023
## s.e. 0.1206 0.1150 0.1088 0.0501 0.0074
##
## sigma^2 estimated as 0.01074: log likelihood = 416.98, aic = -805.96
plot(forecast(ar, 200))
add_trend <- function(x, mu = .01, sd =.0) {
x + cumsum(rnorm(length(x), sd=sd, mu))
}
y <- sin_ts()
y <- add_noise(y)
y <- add_trend(y)
plot(y)
acf(y)
pacf(y)
Note: The increasing trend creates inflated auto-correlation!
ar <- ar(y, method="ols")
ar
##
## Call:
## ar(x = y, method = "ols")
##
## Coefficients:
## 1 2 3 4 5 6 7 8
## 0.2680 0.2523 0.2887 0.1821 0.1773 0.0894 0.0615 0.0348
## 9 10 11 12 13 14 15 16
## 0.0609 0.0258 -0.0396 -0.0761 -0.0728 -0.0994 -0.0909 -0.1251
## 17 18 19 20 21 22 23 24
## -0.1049 0.0228 -0.0410 0.0039 -0.0056 0.0192 -0.0267 0.0094
## 25 26
## 0.1069 0.0759
##
## Intercept: 0.007735 (0.005196)
##
## Order selected 26 sigma^2 estimated as 0.01226
plot(forecast(ar, 200))
## Warning in object$var.pred * vars: Recycling array of length 1 in array-vector arithmetic is deprecated.
## Use c() or as.vector() instead.
Does not work! Time series needs to be stationary! Remove trend first. E.g., by seasonal decomposition
y_dec <- decompose(y)
plot(y_dec)
str(y_dec)
## List of 6
## $ x : Time-Series [1:500] from 1 to 5.99: 0.1085 -0.0396 0.2266 0.2168 0.4774 ...
## $ seasonal: Time-Series [1:500] from 1 to 5.99: -0.037 0.1096 0.0804 0.2769 0.2667 ...
## $ trend : Time-Series [1:500] from 1 to 5.99: NA NA NA NA NA NA NA NA NA NA ...
## $ random : Time-Series [1:500] from 1 to 5.99: NA NA NA NA NA NA NA NA NA NA ...
## $ figure : num [1:100] -0.037 0.1096 0.0804 0.2769 0.2667 ...
## $ type : chr "additive"
## - attr(*, "class")= chr "decomposed.ts"
plot(y_dec$seasonal)
The seasonal component can be modeled using AR
How large should d be?
ndiffs(y)
## [1] 1
ar <- arima(y, c(10, 1, 10))
## Warning in arima(y, c(10, 1, 10)): possible convergence problem: optim gave
## code = 1
plot(forecast(ar, 400))
Note: This should work, but there seems to be an issue with arima. Do the differencing manually
ar <- arima(diff(y), c(10, 0, 10))
plot(forecast(ar, 400))
hw <- HoltWinters(y)
plot(hw$fitted)
plot(forecast(hw, 400))
library(quantmod)
## Loading required package: xts
## Loading required package: zoo
##
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
##
## as.Date, as.Date.numeric
## Loading required package: TTR
## Version 0.4-0 included new data defaults. See ?getSymbols.
getSymbols("AAPL")
## 'getSymbols' currently uses auto.assign=TRUE by default, but will
## use auto.assign=FALSE in 0.5-0. You will still be able to use
## 'loadSymbols' to automatically load data. getOption("getSymbols.env")
## and getOption("getSymbols.auto.assign") will still be checked for
## alternate defaults.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.warning4.0"=FALSE). See ?getSymbols for details.
##
## WARNING: There have been significant changes to Yahoo Finance data.
## Please see the Warning section of '?getSymbols.yahoo' for details.
##
## This message is shown once per session and may be disabled by setting
## options("getSymbols.yahoo.warning"=FALSE).
## [1] "AAPL"
Plot closing prices
apple <- AAPL[,4]
plot(apple)
Add 90/365 day moving average
plot(apple)
lines(rollmean(apple, k=90), col="red", lwd=2)
lines(rollmean(apple, k=365), col="blue", lwd=2)
seasonal decomposition
days_per_year <- ndays(AAPL["2014"])
plot(decompose(ts(apple, frequency=days_per_year)))
Note that the seasonal signal is much smaller than the random noise! Use double exponential smoothing
hw <- HoltWinters(ts(apple, frequency=days_per_year))
plot(hw$fitted)
plot(forecast(hw, 500))
Note that again all the volatility is the level! Use ARIMA (starting February 2013). Since stocks follow a random walk with a trend. We model no autoregression (p=0), twice differencing d=2 and a MA over 5 days.
ar <- arima(apple["2013-02-01::"], c(0, 2, 5))
plot(forecast(ar, 500))