Forecasting

Zhentao Shi

Oct 25, 2021

Practice

Types of forecasts

Demonstration

\[ y_t = \phi_0 + \phi_1 y_{t-1} + v_t,\ \ v_t \sim iidN(0,\sigma_v^2) \]

\[ y_{T+1} = \phi_0 + \phi_1 y_{T} + v_{T+1} \]

\[ \hat{y}_{T+1} = \hat{\phi}_0 + \hat{\phi}_1 y_{T} + 0 = \hat{\phi}_0 + \hat{\phi}_1 y_{T} \]

Further future

\[ y_{T+2} = \phi_0 + \phi_1 y_{T+1} + v_{T+2} \]

\[ \begin{align} \hat{y}_{T+2} & = \hat{\phi}_0 + \hat{\phi_1} \hat {y}_{T+1} + 0 \\ & = \hat{\phi}_0 + \hat{\phi_1} (\hat{\phi}_0 + \hat{\phi}_1 y_{T}) \\ & = \hat{\phi}_0 (1+\hat{\phi}_1) + \hat{\phi}^2_1 y_T \end{align} \]

\[ \hat{y}_{T+H} = \hat{\phi}_0 + \hat{\phi}_1 \hat{y}_{T+H-1} \]

Example

library(forecast)
n = 100
H = 10
x <- arima.sim(model=list(ar = 0.5), n+H)
x_training <- x[1:n]
fit1 <- arima(x_training, order = c(1,0,0))
f1 <- forecast(fit1, h = H)
summary(f1)
## 
## Forecast method: ARIMA(1,0,0) with non-zero mean
## 
## Model Information:
## 
## Call:
## arima(x = x_training, order = c(1, 0, 0))
## 
## Coefficients:
##          ar1  intercept
##       0.3864     0.0212
## s.e.  0.0919     0.1510
## 
## sigma^2 estimated as 0.8694:  log likelihood = -134.98,  aic = 275.96
## 
## Error measures:
##                        ME      RMSE       MAE      MPE     MAPE      MASE
## Training set -0.003744925 0.9324423 0.7526782 119.9123 148.3727 0.7945792
##                      ACF1
## Training set -0.002086191
## 
## Forecasts:
##     Point Forecast     Lo 80    Hi 80     Lo 95    Hi 95
## 101   -0.189466793 -1.384440 1.005506 -2.017020 1.638086
## 102   -0.060186607 -1.341279 1.220906 -2.019448 1.899075
## 103   -0.010228487 -1.303689 1.283232 -1.988406 1.967949
## 104    0.009076976 -1.286221 1.304375 -1.971910 1.990064
## 105    0.016537243 -1.279034 1.312109 -1.964869 1.997943
## 106    0.019420135 -1.276192 1.315033 -1.962048 2.000889
## 107    0.020534180 -1.275085 1.316153 -1.960944 2.002012
## 108    0.020964683 -1.274655 1.316584 -1.960515 2.002444
## 109    0.021131044 -1.274489 1.316751 -1.960348 2.002611
## 110    0.021195331 -1.274424 1.316815 -1.960284 2.002675
autoplot(forecast(fit1))

AR(2)

\[ \begin{align} \hat{y}_{T+1} & = \hat{\phi}_0 + \hat{\phi_1} y_{T} + \hat{\phi}_2 y_{T-1} \\ \hat{y}_{T+2} & = \hat{\phi}_0 + \hat{\phi_1} \hat{y}_{T+1} + \hat{\phi}_2 y_{T} \\ \hat{y}_{T+3} & = \hat{\phi}_0 + \hat{\phi_1} \hat{y}_{T+2} + \hat{\phi}_2 \hat{y}_{T+1} \\ & \cdots \\ \hat{y}_{T+H} & = \hat{\phi}_0 + \hat{\phi_1} \hat{y}_{T+H-1} + \hat{\phi}_2 \hat{y}_{T+H-2} \\ \end{align} \]

Statistical Properties

\[ \begin{align} E_T [y_{T+1}] & = \phi_0 + \phi_1 y_{T} \\ E_T [y_{T+2}] & = \phi_0 + \phi_1 E_T [y_{T+1}] = \phi_0 (1+\phi_1) + \phi^2_1 y_T \\ & \cdots \\ E_T [y_{T+H}] & = \phi_0 (1+\phi_1 + \cdots + \phi_1^{H-1}) + \phi^H_1 y_T \\ \end{align} \]

\[ \lim_{H\to \infty} E_T [y_{T+H}] = \phi_0 (1+\phi_1 + \phi_1^2 + \cdots) = \phi_0/(1-\phi_1) = E[y_t] \]

Forecast error

\[ \begin{align} y_{T+1} - E_T [y_{T+1}] & = v_{T+1} \\ y_{T+2} - E_T [y_{T+2}] & = v_{T+2} + \phi_1 (y_{T+1} - E_T [y_{T+1}]) = v_{T+2} + \phi_1 v_{T+1} \\ & \cdots \\ y_{T+H} - E_T [y_{T+H}] & = v_{T+H} + \phi_1 v_{T+H-1} + \cdots + \phi_1^{H-1} v_{T+1} \end{align} \]

Example

x <- arima.sim(model=list(ar = c(0.5)), n+H)
y <- cumsum(x)
y_training = y[1:n]
fit2 <- arima(y_training, order = c(2,0,0))
autoplot(forecast(fit2))

plot(fit2)

Forecast with VAR model

Decision theory

Square loss function

Back to AR(1)

\[ \begin{align} e_{T+1} & = y_{T+1} - f_t \\ & = y_{T+1} - \hat{y}_{T+1} \\ & = (\phi_0 + \phi_1 y_T + v_{T+1} ) - (\hat{\phi}_0 + \hat{\phi}_1 y_T ) \\ & = [(\phi_0 - \hat{\phi}_0) + (\phi_1 - \hat{\phi}_1)y_T] + v_{T+1} \end{align} \]

\[ var[e_{T+1}] = var[(\phi_0 - \hat{\phi}_0) + (\phi_1 - \hat{\phi}_1)y_T] + var[v_{T+1}] \]

Real data example

d0 <- read.csv(file = "CPIAUCSL.csv", header = TRUE)# seasonally adjusted

d0[,1] <- as.Date(d0[,1], format = "%Y-%m-%d")

d0[,2] <- ts(d0[,2])
d0$r <- c(NA, diff( log(d0[,2]) ) )
d0 = d0[-1,]

plot(y = d0$r, x = d0[,1], type = "l", xlab = "Year")
abline( h = 0, lty = 2)

d0_train <- d0[ d0$DATE < as.Date("2020-01-15", format = "%Y-%m-%d"), ]
d0_test <- d0[ d0$DATE >= as.Date("2020-01-15", format = "%Y-%m-%d"), ]

ar12 <- arima(d0_train$r, order = c(12,0,0))
f.ar12 <- forecast(ar12, h = nrow(d0_test))
e.ar12 <- f.ar12$mean - d0_test$r
matplot( x = d0_test$DATE, y = cbind(f.ar12$lower[,1], f.ar12$upper[,1]), 
         type = "l", lty = 2, col = "red", ylab = "", xlab = "Time",
         ylim = c(-0.007, 0.009)) # 80% confidence interval
lines(x = d0_test$DATE, y = f.ar12$mean, col = "blue")
points(x = d0_test$DATE, y = d0_test$r, col = "black")
lines(x = d0_test$DATE, y = d0_test$r, col = "black")
abline(h = 0, lty = 3)

Real data example (continue)

ma12 <- arima(d0_train$r, order = c(0,0,12))
f.ma12 <- forecast(ma12, h = nrow(d0_test))
e.ma12 <- f.ma12$mean - d0_test$r


matplot( x = d0_test$DATE, y = cbind(f.ma12$lower[,1], f.ma12$upper[,1]), 
         type = "l", lty = 2, col = "red", ylab = "", xlab = "Time",
         ylim = c(-0.007, 0.009)) # 80% confidence interval
lines(x = d0_test$DATE, y = f.ma12$mean, col = "blue")
points(x = d0_test$DATE, y = d0_test$r, col = "black")
lines(x = d0_test$DATE, y = d0_test$r, col = "black")
abline(h = 0, lty = 3)

Metrics of forecast evalaution

Real data example (continue)

accuracy(f.ar12, x = d0_test$r)
##                         ME        RMSE         MAE      MPE     MAPE      MASE
## Training set -1.349173e-05 0.002598823 0.001816334      NaN      Inf 0.8602333
## Test set      6.850085e-04 0.003357473 0.002528049 34.92868 76.41044 1.1973081
##                     ACF1
## Training set 0.006037529
## Test set              NA
accuracy(f.ma12, x = d0_test$r)
##                         ME        RMSE         MAE      MPE     MAPE      MASE
## Training set -5.973922e-06 0.002624132 0.001844581      NaN      Inf 0.8736111
## Test set      4.172479e-04 0.003184436 0.002350124 27.06738 73.93202 1.1130412
##                       ACF1
## Training set -0.0005634359
## Test set                NA

Superior Forecast Ability

\[ t = \frac{H^{-1/2} \sum_{h=1}^H D_{T+h}}{\sqrt{ lrvar( (D_{T+h})_{h=1}^H ) }} \]

Real data example (continue)

dm.test( e1 = e.ar12, e2 = e.ma12, alternative = "two.sided", h = 1, power = 1 )
## 
##  Diebold-Mariano Test
## 
## data:  e.ar12e.ma12
## DM = 2.9038, Forecast horizon = 1, Loss function power = 1, p-value =
## 0.009102
## alternative hypothesis: two.sided
dm.test( e1 = e.ar12, e2 = e.ma12, alternative = "greater", h = 1, power = 1 )
## 
##  Diebold-Mariano Test
## 
## data:  e.ar12e.ma12
## DM = 2.9038, Forecast horizon = 1, Loss function power = 1, p-value =
## 0.004551
## alternative hypothesis: greater
dm.test( e1 = e.ar12, e2 = e.ma12, alternative = "less", h = 1, power = 1 )
## 
##  Diebold-Mariano Test
## 
## data:  e.ar12e.ma12
## DM = 2.9038, Forecast horizon = 1, Loss function power = 1, p-value =
## 0.9954
## alternative hypothesis: less

Forecast combination

Optimal forecast combination: I

\[ \min_{\mathbf{w}\in\mathbb{R}^{N}}\,\frac{1}{2}\mathbf{w}^{\prime}\widehat{\boldsymbol{\Sigma}}\mathbf{w}\ \ \text{subject to }\mathbf{w}^{\prime}\boldsymbol{1}_{N}=1. \]

\[ \widehat{\mathbf{w}}=\frac{\widehat{\boldsymbol{\Sigma}}^{-1}\boldsymbol{1}_{N}} {\ \boldsymbol{1}_{N}^{\prime}\widehat{\boldsymbol{\Sigma}}^{-1}\boldsymbol{1}_{N}}. \]

\[ \widehat{w}_i=\frac{\widehat{\sigma}^{-2}_i } {\sum_{i=1}^N \widehat{\sigma}^{-2}_i}. \] is inversely proportional to \(i\)’s variance.

Optimal forecast combination: II

\[ y_t = \sum_{i=1}^N w_i f_{it} + v_t \]

Information criterion approach

\[ \widehat{w}_i = \frac{\exp(-AIC_i/2)}{\sum_{i=1}^N \exp(-AIC_i/2)} \]

AIC(fit1)
## [1] 275.9598

Simple average

Real data example (contiue)

combine <- foreccomb(observed_vector = d0_train$r,
                     prediction_matrix = cbind(f.ar12$fitted, f.ma12$fitted),
                     newobs = d0_test$r,
                     newpreds = cbind(f.ar12$mean, f.ma12$mean))

comb_BG(combine)$Accuracy_Test
##                    ME        RMSE        MAE      MPE     MAPE
## Test set 0.0005524257 0.003267997 0.00241836 31.03612 74.34991
comb_SA(combine)$Accuracy_Test
##                    ME        RMSE         MAE      MPE     MAPE
## Test set 0.0005511282 0.003267158 0.002417605 30.99803 74.34224
comb_OLS(combine)$Accuracy_Test
##                    ME        RMSE         MAE      MPE     MAPE
## Test set 0.0006388229 0.003308018 0.002470606 33.39845 74.87377

Recent findings

Density of forecast error

\[ y_t = \mu + v_t,\ v_t \sim N(0, \sigma_v^2) \]

\[ u_t = \Phi ( (y_t - \mu)/\sigma_v ) \sim \mathrm{Uniform}[0,1] \]

where \(\Phi(\cdot)\) is the CDF of \(N(0,1)\)

\[ \begin{align} P(u_t \leq \alpha ) & = P(\Phi ( (y_t - \mu)/\sigma_v ) \leq \alpha ) \\ & = P( (y_t - \mu)/\sigma_v \leq \Phi^{-1}(\alpha) ) \\ & = \Phi (\Phi^{-1}(\alpha)) = \alpha \end{align} \]

More general model

Real data example (cotintue)

# integral transformation
hist(f.ar12$residuals/ sqrt(ar12$sigma2) )

hist( pnorm( f.ar12$residuals / sqrt(ar12$sigma2) ) )

Models with regressors

\[ y_t = \beta_0 + \beta_1 x_t + u_t \]

We have data \(\{(y_t, x_t)\}_{t=1}^T\). We want to forecast \(y_{T+1}\)

\[ \begin{align} y_t & = \beta_0 + \beta_1 x_t + u_t \\ x_t & = \phi_0 + \phi_1 x_{t-1} + v_t \\ \end{align} \]

Then, forecast for \(y_{T+1}\) can be done by first predict \(x_{T+1}\) from the second equation, and then plug it into the first equation

Predictive regression

d1 <- read.csv("goyalwelch2003.csv", header = TRUE)
d1 <- d1[d1$year>1970, ]
matplot(y = cbind(d1$DivPrc, d1$DivYield), x = d1$year, type = "l", ylab = "DP/DY ratio")

plot(y = d1$ExcessReturn, x = d1$year, type = "l", ylab = "ExcessReturn")

reg1 <- dynlm::dynlm(ts(ExcessReturn) ~ L(ts(DivPrc)), data = d1)
summary(reg1)
## 
## Time series regression with "ts" data:
## Start = 2, End = 32
## 
## Call:
## dynlm::dynlm(formula = ts(ExcessReturn) ~ L(ts(DivPrc)), data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.45244 -0.10800  0.03182  0.12385  0.22642 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)
## (Intercept)    0.39490    0.24929   1.584    0.124
## L(ts(DivPrc))  0.10170    0.07027   1.447    0.159
## 
## Residual standard error: 0.1709 on 29 degrees of freedom
## Multiple R-squared:  0.06735,    Adjusted R-squared:  0.03519 
## F-statistic: 2.094 on 1 and 29 DF,  p-value: 0.1586
reg2 <- dynlm::dynlm(ts(ExcessReturn) ~ L(ts(DivYield)), data = d1)
summary(reg2)
## 
## Time series regression with "ts" data:
## Start = 2, End = 32
## 
## Call:
## dynlm::dynlm(formula = ts(ExcessReturn) ~ L(ts(DivYield)), data = d1)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.41800 -0.10684  0.03064  0.10970  0.24012 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)  
## (Intercept)       0.4214     0.2407   1.751   0.0906 .
## L(ts(DivYield))   0.1118     0.0694   1.610   0.1182  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1696 on 29 degrees of freedom
## Multiple R-squared:  0.08208,    Adjusted R-squared:  0.05043 
## F-statistic: 2.593 on 1 and 29 DF,  p-value: 0.1182

Issues of predictive regressions

Value-at-Risk

\[ \max_{x} \{x \in \mathbb{R}: F(x) \leq q \} \]

Ways to estimate VaR

Summary

Housing price forecast