Tema VI: Modelos ARIMA de Box&Jenkins - Lab 1

Curso: Series Cronológicas

Autor/a

Afiliación

Shu Wei Chou Chen

Escuela de Estadística, UCR

1 librerías

library(ggplot2)
library(forecast)
library(fpp2)
library(astsa)
library(car)
library(TSA)
library(tseries)
library(urca)

2 Modelo AR(1)

2.1 Funciones para simular un AR(1)

set.seed(1000)
gen_ar1a <- function(N = 150, phi1 = 0.8, sigma2 = 1) {
  a <- rnorm(N,0,sigma2) 
  y <- as.numeric(0)
  y[1] <- a[1]
  for(i in 2:N){
    y[i] <- phi1*y[i-1]+a[i]
  }
  return(y)
}                     

gen_ar1b <- function(N = 150, C=0, phi1 = 0.8, sigma2 = 1) {
  NN <- 1000
  a <- rnorm(NN+N,0,sigma2) 
  y <- as.numeric(0)

  y[1] <- a[1]
  for(i in 2:(NN+N)){
    y[i] <- C + phi1*y[i-1] + a[i]
  }
  return(y[NN:(NN+N)])
}

2.2 AR(1)

phi1=0.6
y <- gen_ar1b(N=150,C=5,phi1=phi1,sigma2=1)

2.3 Simulación y el análisis descriptivo

# descriptiva
ts.plot(y)

mean(y)  #promedio teórico: 5/(1-phi1)

[1] 12.29826

acf(y,lag.max=30)

pacf(y)

acf2(y)   #library(astsa)

     [,1] [,2] [,3]  [,4]  [,5]  [,6]  [,7] [,8] [,9] [,10] [,11] [,12] [,13]
ACF  0.55 0.37 0.32  0.22  0.15  0.03 -0.01 0.02 0.04  0.06  0.04 -0.05  0.04
PACF 0.55 0.10 0.11 -0.02 -0.01 -0.12 -0.03 0.07 0.05  0.06 -0.02 -0.14  0.10
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
ACF   0.06  0.00  0.10  0.01 -0.01 -0.01 -0.07 -0.08 -0.12 -0.13
PACF  0.03 -0.03  0.16 -0.13 -0.03 -0.04 -0.06 -0.01 -0.02 -0.02

2.4 Estimación del modelo AR(1)

mod0a <- Arima(y, order=c(1,0,0),method="CSS-ML")
mod0b <- Arima(y, order=c(1,0,0),method="ML")
mod0c <- Arima(y, order=c(1,0,0),method="CSS")
summary(mod0a)

Series: y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1     mean
      0.5458  12.3079
s.e.  0.0677   0.1747

sigma^2 = 0.9791:  log likelihood = -211.84
AIC=429.67   AICc=429.84   BIC=438.73

Training set error measures:
                       ME      RMSE       MAE        MPE     MAPE     MASE
Training set -0.003583648 0.9829323 0.7777108 -0.7004689 6.441715 0.862108
                    ACF1
Training set -0.05598464

summary(mod0b)

Series: y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1     mean
      0.5458  12.3079
s.e.  0.0677   0.1747

sigma^2 = 0.9791:  log likelihood = -211.84
AIC=429.67   AICc=429.84   BIC=438.73

Training set error measures:
                      ME      RMSE       MAE        MPE     MAPE     MASE
Training set -0.00358382 0.9829323 0.7777108 -0.7004712 6.441715 0.862108
                    ACF1
Training set -0.05598279

summary(mod0c)

Series: y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1     mean
      0.5478  12.2906
s.e.  0.0680   0.1772

sigma^2 = 0.9763:  log likelihood = -211.94

Training set error measures:
                        ME      RMSE       MAE        MPE     MAPE      MASE
Training set -9.091757e-07 0.9815181 0.7736897 -0.6675575 6.406619 0.8576505
                    ACF1
Training set -0.06819174

No está estimando el intercepto C, sino la media del proceso.

mean(y)

[1] 12.29826

5/(1-phi1) #media teórica

[1] 12.5

2.5 El paquete `tseries`

Este paquete permite la estimación del intercepto.

mod0e<-tseries::arma(y,order=c(1,0),include.intercept=TRUE)
summary(mod0e)


Call:
tseries::arma(x = y, order = c(1, 0), include.intercept = TRUE)

Model:
ARMA(1,0)

Residuals:
     Min       1Q   Median       3Q      Max 
-2.70023 -0.62982  0.01873  0.59927  2.49800 

Coefficient(s):
           Estimate  Std. Error  t value Pr(>|t|)    
ar1         0.54778     0.06799    8.056 8.88e-16 ***
intercept   5.55805     0.83982    6.618 3.64e-11 ***
---
Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Fit:
sigma^2 estimated as 0.9763,  Conditional Sum-of-Squares = 145.47,  AIC = 428.9

2.6 El diagnóstico del modelo AR(1)

#devolvemos al mod0c
res<-mod0c$residuals
ts.plot(res)

acf(res)

pacf(res)

acf2(res)

      [,1] [,2] [,3] [,4] [,5]  [,6]  [,7]  [,8] [,9] [,10] [,11] [,12] [,13]
ACF  -0.07 0.02 0.12 0.04 0.09 -0.05 -0.07  0.02 0.00  0.05  0.05 -0.15  0.06
PACF -0.07 0.02 0.12 0.06 0.09 -0.05 -0.10 -0.02 0.01  0.08  0.09 -0.14  0.01
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
ACF   0.08 -0.11  0.17 -0.07  0.00  0.04 -0.07 -0.01 -0.05 -0.05
PACF  0.07 -0.09  0.17 -0.02 -0.02 -0.02 -0.07 -0.03 -0.02 -0.01

tsdiag(mod0c)    #library(stats)

checkresiduals(mod0c,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 6.1954, df = 9, p-value = 0.7202

Model df: 1.   Total lags used: 10

checkresiduals(mod0c,lag=30)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 30.38, df = 29, p-value = 0.3952

Model df: 1.   Total lags used: 30

2.7 Normalidad

shapiro.test(res)


    Shapiro-Wilk normality test

data:  res
W = 0.99372, p-value = 0.7572

jarque.bera.test(res)


    Jarque Bera Test

data:  res
X-squared = 0.34556, df = 2, p-value = 0.8413

qqPlot(res)

[1] 98 12

2.8 Pronóstico

forecast(mod0c)

    Point Forecast    Lo 80    Hi 80     Lo 95    Hi 95
152       12.54741 11.28113 13.81369 10.610804 14.48402
153       12.43127 10.98746 13.87508 10.223155 14.63938
154       12.36765 10.87469 13.86061 10.084359 14.65094
155       12.33280 10.82540 13.84020 10.027432 14.63817
156       12.31371 10.80201 13.82542 10.001760 14.62567
157       12.30326 10.79026 13.81625  9.989332 14.61718
158       12.29753 10.78415 13.81091  9.983013 14.61205
159       12.29439 10.78090 13.80789  9.979699 14.60909
160       12.29267 10.77914 13.80621  9.977927 14.60742
161       12.29173 10.77819 13.80527  9.976969 14.60649

autoplot(forecast(mod0c))

2.9 Simulación con `arima.sim`

#AR(1)
m<-5 #la media del proceso
y1 <- arima.sim(n = 150, model = list(order = c(1,0,0),ar = c(0.8)),sd=3,rand.gen= rnorm) + m
ts.plot(y1)

acf2(y1)

     [,1] [,2] [,3]  [,4]  [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
ACF  0.79 0.63 0.51  0.33  0.15  0.00 -0.09 -0.17 -0.22 -0.22 -0.20 -0.16 -0.11
PACF 0.79 0.00 0.03 -0.23 -0.14 -0.13  0.08 -0.06  0.02 -0.01  0.04  0.00  0.03
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
ACF  -0.07 -0.08 -0.05  0.00 -0.01 -0.04 -0.01  0.00 -0.02 -0.05
PACF -0.07 -0.13  0.06  0.09 -0.08 -0.07  0.06  0.01 -0.01 -0.09

mod1<- forecast::Arima(y1, order = c(1, 0, 0))
summary(mod1)

Series: y1 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1    mean
      0.7930  5.6271
s.e.  0.0489  1.1244

sigma^2 = 8.644:  log likelihood = -374.1
AIC=754.19   AICc=754.36   BIC=763.23

Training set error measures:
                     ME     RMSE      MAE       MPE     MAPE      MASE
Training set 0.02229917 2.920453 2.377549 -30.89234 154.6729 0.9571567
                     ACF1
Training set -0.001460519

3 Modelo AR(2)

y2 <- arima.sim(n = 150, model = list(order = c(2,0,0),ar = c(0.6,-0.28)),sd=1,rand.gen= rnorm) 

ts.plot(y2)

ar2.st <- arima(y2, c(2, 0, 0), include.mean=FALSE,
                transform.pars=FALSE, method="ML")
ar2.st$coef

       ar1        ar2 
 0.6302657 -0.2673028

polyroot(c(1, -ar2.st$coef))

[1] 1.178936+1.533358i 1.178936-1.533358i

Mod(polyroot(c(1, -ar2.st$coef)))

[1] 1.934186 1.934186

root.comp <- Im(polyroot(c(1, -ar2.st$coef)))
root.real <- Re(polyroot(c(1, -ar2.st$coef)))
# Plotting the roots in a unit circle
x <- seq(-1, 1, length = 1000)
y1 <- sqrt(1- x^2)
y2 <- -sqrt(1- x^2)
plot(c(x, x), c(y1, y2), xlab='Parte Real',
     ylab='Parte Compleja', type='l',
     main='Circulo unitario', ylim=c(-2, 2), xlim=c(-2, 2))
abline(h=0)
abline(v=0)
points(Re(polyroot(c(1, -ar2.st$coef))),
       Im(polyroot(c(1, -ar2.st$coef))), pch=19)
legend(-1.5, -1.5, legend="Raíces del AR(2)", pch=19)

#Otra posibilidad es usar el inverso de las raíces.
autoplot(ar2.st)

4 Modelo ARMA(1,1)

y3<-arima.sim(n = 150, list(order = c(1,0,1),ar = c(0.88), ma = c(-0.23)),
              sd = sqrt(2))

ts.plot(y3)

acf2(y3)

     [,1] [,2]  [,3] [,4]  [,5]  [,6]  [,7] [,8]  [,9] [,10] [,11] [,12] [,13]
ACF  0.82 0.69  0.55 0.47  0.37  0.27  0.19 0.14  0.09  0.02 -0.05 -0.03 -0.01
PACF 0.82 0.06 -0.07 0.08 -0.11 -0.05 -0.03 0.02 -0.01 -0.13 -0.04  0.20  0.01
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
ACF   0.01 -0.02 -0.01 -0.05 -0.05 -0.06 -0.06 -0.03 -0.05 -0.05
PACF  0.00 -0.08  0.02 -0.14  0.05  0.04 -0.02  0.07 -0.12  0.06

#ARMA(1,1)
mod3<- forecast::Arima(y3, order = c(1, 0, 1))
summary(mod3)

Series: y3 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1      ma1     mean
      0.8357  -0.0605  -0.6783
s.e.  0.0523   0.0920   0.6173

sigma^2 = 1.899:  log likelihood = -259.99
AIC=527.98   AICc=528.26   BIC=540.02

Training set error measures:
                     ME     RMSE      MAE       MPE     MAPE    MASE
Training set 0.00444155 1.364335 1.089917 -143.1311 358.1284 0.96821
                     ACF1
Training set 0.0003472122

checkresiduals(mod3,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,1) with non-zero mean
Q* = 5.5925, df = 8, p-value = 0.6928

Model df: 2.   Total lags used: 10

#AR(1)
mod3ar1<- forecast::Arima(y3, order = c(1, 0, 0))
summary(mod3ar1)

Series: y3 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1     mean
      0.8158  -0.6744
s.e.  0.0463   0.5890

sigma^2 = 1.892:  log likelihood = -260.21
AIC=526.41   AICc=526.58   BIC=535.45

Training set error measures:
                      ME     RMSE      MAE       MPE     MAPE      MASE
Training set 0.004989702 1.366326 1.092374 -151.9529 362.4927 0.9703933
                    ACF1
Training set -0.04392361

checkresiduals(mod3ar1,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 6.5639, df = 9, p-value = 0.6824

Model df: 1.   Total lags used: 10

mod3$aic

[1] 527.9802

mod3ar1$aic

[1] 526.4143

4.1 Identificación con auto.arima

#procedimiento automático (pero tener mucho cuidado!!!)
auto.arima(y3,ic="aicc") #por defecto

Series: y3 
ARIMA(0,1,0) 

sigma^2 = 2.06:  log likelihood = -265.27
AIC=532.54   AICc=532.57   BIC=535.54

auto.arima(y3,ic="aic")

Series: y3 
ARIMA(0,1,0) 

sigma^2 = 2.06:  log likelihood = -265.27
AIC=532.54   AICc=532.57   BIC=535.54

auto.arima(y3,ic="bic")

Series: y3 
ARIMA(0,1,0) 

sigma^2 = 2.06:  log likelihood = -265.27
AIC=532.54   AICc=532.57   BIC=535.54

5 Contraste de raíz unitaria

5.1 Probamos con dos tamaño de series

TT=150
# prueben con TT=500

5.2 AR(1)

y1 <- arima.sim(n = TT, model = list(order = c(1,0,0),ar = c(0.8)),sd=3,rand.gen= rnorm)
ts.plot(y1)

adf.test(y1)


    Augmented Dickey-Fuller Test

data:  y1
Dickey-Fuller = -2.6633, Lag order = 5, p-value = 0.3
alternative hypothesis: stationary

5.3 ARIMA(0,1,0)

y2 <- arima.sim(n = TT, model = list(order = c(0,1,0),sd=1,rand.gen= rnorm))
ts.plot(y2)

acf2(y2)

     [,1] [,2]  [,3] [,4]  [,5]  [,6] [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
ACF  0.98 0.96  0.94 0.92  0.89  0.87 0.85  0.83  0.80  0.77  0.74  0.71  0.68
PACF 0.98 0.03 -0.03 0.02 -0.02 -0.05 0.01 -0.05 -0.07 -0.08 -0.05 -0.03  0.01
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23]
ACF   0.65  0.63  0.60  0.57  0.54  0.51  0.48  0.45  0.42  0.39
PACF -0.01  0.00 -0.04 -0.07  0.03 -0.01  0.02 -0.01 -0.04 -0.01

adf.test(y2)


    Augmented Dickey-Fuller Test

data:  y2
Dickey-Fuller = -2.1354, Lag order = 5, p-value = 0.52
alternative hypothesis: stationary

6 Ejemplos reales

6.1 Ejemplo con graduados de ITCR de 1975 a 2002

itcrgrad<-read.csv("ITCR.csv",sep=",") 
y<-ts(itcrgrad$graduados,start=1975)

ts.plot(y)

acf2(y)

     [,1] [,2]  [,3] [,4]  [,5]  [,6] [,7] [,8]  [,9]
ACF  0.81 0.71  0.59 0.52  0.39  0.25 0.15 0.12  0.04
PACF 0.81 0.16 -0.06 0.08 -0.20 -0.17 0.01 0.13 -0.12

Indicación de no estacionariedad. Como ejemplo vamos a ajustar un AR(1) ignorando la no estacionariedad. (1 rezago de f.a.c.p. significativo)

try(mod0 <- Arima(y, order=c(1,0,0)))

Error in stats::arima(x = x, order = order, seasonal = seasonal, include.mean = include.mean,  : 
  non-stationary AR part from CSS

try(mod0a <- Arima(y, order=c(1,0,0),method="CSS-ML"))

Error in stats::arima(x = x, order = order, seasonal = seasonal, include.mean = include.mean,  : 
  non-stationary AR part from CSS

mod0b <- Arima(y, order=c(1,0,0),method="ML")
mod0c <- Arima(y, order=c(1,0,0),method="CSS")
summary(mod0b)

Series: y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
      ar1        mean
        1    700.0827
s.e.    0  22667.9113

sigma^2 = 9939:  log likelihood = -168.06
AIC=342.12   AICc=343.12   BIC=346.12

Training set error measures:
                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
Training set 37.67579 96.06853 72.60758 -3.574074 35.35438 0.9652411 -0.2506579

summary(mod0c)

Series: y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1       mean
      1.0535  -385.7770
s.e.  0.0647   937.6708

sigma^2 = 8159:  log likelihood = -165.3

Training set error measures:
                     ME     RMSE      MAE      MPE     MAPE      MASE
Training set 0.06042778 87.04048 66.72368 -19.4297 37.70421 0.8870208
                   ACF1
Training set -0.3141227

adf.test(y) #no estacionario

Warning in adf.test(y): p-value greater than printed p-value


    Augmented Dickey-Fuller Test

data:  y
Dickey-Fuller = 0.18783, Lag order = 3, p-value = 0.99
alternative hypothesis: stationary

dif.y<-diff(y)

ts.plot(dif.y)

acf(dif.y)

pacf(dif.y)

adf.test(dif.y)

Warning in adf.test(dif.y): p-value smaller than printed p-value


    Augmented Dickey-Fuller Test

data:  dif.y
Dickey-Fuller = -5.427, Lag order = 2, p-value = 0.01
alternative hypothesis: stationary

mod1 <- Arima(y, order=c(0,1,0))
summary(mod1)

Series: y 
ARIMA(0,1,0) 

sigma^2 = 9571:  log likelihood = -162.06
AIC=326.12   AICc=326.28   BIC=327.41

Training set error measures:
                   ME     RMSE      MAE       MPE     MAPE      MASE       ACF1
Training set 37.75096 96.06824 72.53668 -3.300231 35.08901 0.9642985 -0.2508814

6.1.1 Diagnóstico

res<-mod1$res
ts.plot(res)

acf(res)

pacf(res)

tsdiag(mod1)

checkresiduals(mod1,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(0,1,0)
Q* = 8.2233, df = 10, p-value = 0.607

Model df: 0.   Total lags used: 10

6.1.1.1 Normalidad

shapiro.test(res)


    Shapiro-Wilk normality test

data:  res
W = 0.95826, p-value = 0.3168

jarque.bera.test(res)


    Jarque Bera Test

data:  res
X-squared = 0.067392, df = 2, p-value = 0.9669

qqPlot(res)

[1] 28  6

6.1.1.2 Pronóstico

forecast(mod1)

     Point Forecast    Lo 80    Hi 80    Lo 95    Hi 95
2003           1084 958.6244 1209.376 892.2545 1275.745
2004           1084 906.6921 1261.308 812.8310 1355.169
2005           1084 866.8431 1301.157 751.8871 1416.113
2006           1084 833.2488 1334.751 700.5091 1467.491
2007           1084 803.6516 1364.348 655.2441 1512.756
2008           1084 776.8937 1391.106 614.3215 1553.679
2009           1084 752.2873 1415.713 576.6892 1591.311
2010           1084 729.3842 1438.616 541.6620 1626.338
2011           1084 707.8732 1460.127 508.7636 1659.236
2012           1084 687.5275 1480.472 477.6476 1690.352

autoplot(forecast(mod1))

6.1.1.3 Identificación automática

auto.arima(y,ic="aicc", allowdrift = FALSE) #por defecto

Series: y 
ARIMA(0,1,0) 

sigma^2 = 9571:  log likelihood = -162.06
AIC=326.12   AICc=326.28   BIC=327.41

auto.arima(y,ic="aic", allowdrift = FALSE)

Series: y 
ARIMA(0,1,0) 

sigma^2 = 9571:  log likelihood = -162.06
AIC=326.12   AICc=326.28   BIC=327.41

auto.arima(y,ic="bic", allowdrift = FALSE)

Series: y 
ARIMA(0,1,0) 

sigma^2 = 9571:  log likelihood = -162.06
AIC=326.12   AICc=326.28   BIC=327.41

auto.arima(y,ic="aicc") #por defecto

Series: y 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1    drift
      -0.3872  36.5960
s.e.   0.1712  10.2477

sigma^2 = 7686:  log likelihood = -158.14
AIC=322.28   AICc=323.33   BIC=326.17

auto.arima(y,ic="aic")

Series: y 
ARIMA(0,1,1) with drift 

Coefficients:
          ma1    drift
      -0.3872  36.5960
s.e.   0.1712  10.2477

sigma^2 = 7686:  log likelihood = -158.14
AIC=322.28   AICc=323.33   BIC=326.17

auto.arima(y,ic="bic")

Series: y 
ARIMA(0,1,0) with drift 

Coefficients:
        drift
      39.1481
s.e.  17.2545

sigma^2 = 8348:  log likelihood = -159.7
AIC=323.41   AICc=323.91   BIC=326

¿Qué es un modelo con desvío? \(ARIMA(0,1,0)\) se define como: \((1-B)Y_t = C + a_t\)

6.2 Tasa de desempleo

Ejemplo 1-3 tomado de Bernhard (2008): Tasa de desempleo

data(npext)
y <- ts(na.omit(npext$unemploy), start=1909, end=1988,
        frequency=1)
plot(y, ylab="unemployment rate (logarithm)")

acf2(y,ylim=c(-1, 1))

     [,1]  [,2] [,3]  [,4] [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
ACF  0.75  0.46 0.32  0.17 0.04 -0.01 -0.05 -0.16 -0.22 -0.23 -0.27 -0.30 -0.28
PACF 0.75 -0.23 0.15 -0.19 0.00  0.04 -0.10 -0.17  0.03 -0.09 -0.07 -0.08 -0.03
     [,14] [,15] [,16] [,17] [,18] [,19]
ACF  -0.25 -0.22 -0.10 -0.01 -0.03 -0.03
PACF -0.04  0.01  0.14 -0.10 -0.09 -0.01

6.2.1 ARMA(2,0)

arma20 <- Arima(y, order=c(2, 0, 0))
summary(arma20)

Series: y 
ARIMA(2,0,0) with non-zero mean 

Coefficients:
         ar1      ar2    mean
      0.9297  -0.2356  1.6988
s.e.  0.1079   0.1077  0.1586

sigma^2 = 0.2026:  log likelihood = -48.59
AIC=105.18   AICc=105.71   BIC=114.71

Training set error measures:
                      ME      RMSE       MAE       MPE     MAPE      MASE
Training set 0.003310401 0.4415365 0.3455724 -14.36729 31.38812 0.9642293
                   ACF1
Training set 0.04651987

6.2.1.1 Log-verosimilitud

#AIC
loglik <- arma20$loglik
(aic<- -2*loglik+2*(2+1+1))

[1] 105.1803

arma20$aic

[1] 105.1803

#AICC
TT <- length(y)
(aicc <- aic+(2*(2+1+1)*(2+1+2))/(TT-2-1-2))

[1] 105.7136

arma20$aicc

[1] 105.7136

#BIC
(bic <- aic+(log(TT)-2)*(2+1+1))

[1] 114.7084

arma20$bic

[1] 114.7084

6.2.1.2 Diagnóstico

res20 <- residuals(arma20)
tsdiag(arma20)

checkresiduals(arma20)


    Ljung-Box test

data:  Residuals from ARIMA(2,0,0) with non-zero mean
Q* = 11.648, df = 8, p-value = 0.1676

Model df: 2.   Total lags used: 10

# Raíces del polinomio autorregresivo
arma20$coef

       ar1        ar2  intercept 
 0.9296974 -0.2355995  1.6988330

autoplot(arma20)

#Normalidad
shapiro.test(res20)


    Shapiro-Wilk normality test

data:  res20
W = 0.99313, p-value = 0.9501

qqPlot(res)

[1] 28  6

6.2.2 ARMA(1,1)

arma11 <- Arima(y, order = c(1, 0, 1))
summary(arma11)

Series: y 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1     ma1    mean
      0.5272  0.5487  1.6934
s.e.  0.1221  0.1456  0.1546

sigma^2 = 0.1917:  log likelihood = -46.51
AIC=101.01   AICc=101.55   BIC=110.54

Training set error measures:
                      ME      RMSE       MAE       MPE     MAPE      MASE
Training set 0.005141342 0.4295382 0.3319403 -13.06918 29.72265 0.9261926
                   ACF1
Training set -0.0428558

tsdiag(arma11)

c(arma20$aic,arma20$aicc,arma20$bic)

[1] 105.1803 105.7136 114.7084

c(arma11$aic,arma11$aicc,arma11$bic)

[1] 101.0149 101.5482 110.5430

res11 <- residuals(arma11)
ts.plot(res11)

shapiro.test(res11)


    Shapiro-Wilk normality test

data:  res11
W = 0.98617, p-value = 0.5456

tsdiag(arma11)

checkresiduals(arma11)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,1) with non-zero mean
Q* = 6.2487, df = 8, p-value = 0.6194

Model df: 2.   Total lags used: 10

autoplot(arma11)

arma20$aic

[1] 105.1803

arma11$aic

[1] 101.0149

6.2.3 `auto.arima()`

arma.auto<-auto.arima(y, max.p = 3, max.q = 3, start.p = 1,
                      start.q = 1, ic = "aic")

arma.auto

Series: y 
ARIMA(1,0,1) with non-zero mean 

Coefficients:
         ar1     ma1    mean
      0.5272  0.5487  1.6934
s.e.  0.1221  0.1456  0.1546

sigma^2 = 0.1917:  log likelihood = -46.51
AIC=101.01   AICc=101.55   BIC=110.54

6.2.3.1 Pronóstico

plot(forecast(arma11))

6.2.3.2 Comprobación de cálculo del pronóstico

arma11.pred <- predict(arma11, n.ahead = 10)
predict <- ts(c(rep(NA, length(y) - 1), y[length(y)],
                arma11.pred$pred), start = 1909,
              frequency = 1)
upper <- ts(c(rep(NA, length(y) - 1), y[length(y)],
              arma11.pred$pred + 2 * arma11.pred$se),
            start = 1909, frequency = 1)
lower <- ts(c(rep(NA, length(y) - 1), y[length(y)],
              arma11.pred$pred - 2 * arma11.pred$se),
            start = 1909, frequency = 1)
observed <- ts(c(y, rep(NA, 10)), start=1909,
               frequency = 1)
## Plot of actual and forecasted values
plot(observed, type = "l",
     ylab = "Actual and predicted values", xlab = "")
lines(predict, col = "blue", lty = 2)
lines(lower, col = "red", lty = 5)
lines(upper, col = "red", lty = 5)
abline(v = 1988, col = "gray", lty = 3)

6.3 Producto nacional bruto

Producto nacional bruto, U.S. (en mil millones y son datos trimestrales de 1947 a 2002) los datos fueron ajustada estacionalmente. (Ejemplo 3.40, Shumway&Stoffer)

y<-astsa::gnp
ts.plot(y)

acf2(y, 50)

     [,1] [,2]  [,3] [,4] [,5]  [,6]  [,7]  [,8]  [,9] [,10] [,11] [,12] [,13]
ACF  0.99 0.97  0.96 0.94 0.93  0.91  0.90  0.88  0.87  0.85  0.83  0.82  0.80
PACF 0.99 0.00 -0.02 0.00 0.00 -0.02 -0.02 -0.02 -0.01 -0.02  0.00 -0.01  0.01
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25]
ACF   0.79  0.77  0.76  0.74  0.73  0.72   0.7  0.69  0.68  0.66  0.65  0.64
PACF  0.00  0.00  0.00  0.01  0.00 -0.01   0.0 -0.01 -0.01  0.00  0.00  0.00
     [,26] [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37]
ACF   0.62  0.61  0.60  0.59  0.57  0.56  0.55  0.54  0.52  0.51   0.5  0.49
PACF -0.01  0.00 -0.01 -0.01 -0.01 -0.01 -0.01  0.00 -0.01  0.00   0.0  0.00
     [,38] [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49]
ACF   0.48  0.47  0.45  0.44  0.43  0.42  0.41  0.40  0.38  0.37  0.36  0.35
PACF -0.01 -0.01 -0.01  0.00 -0.01 -0.01 -0.01 -0.01 -0.01 -0.01 -0.02 -0.02
     [,50]
ACF   0.33
PACF -0.01

6.3.1 Contraste de Dickey-Fuller

adf.test(y)

Warning in adf.test(y): p-value greater than printed p-value


    Augmented Dickey-Fuller Test

data:  y
Dickey-Fuller = -0.29497, Lag order = 6, p-value = 0.99
alternative hypothesis: stationary

ts.plot(diff(y))

ts.plot(log(y))

dif.log.y = diff(log(y))      # taza de crecimiento
plot(dif.log.y)

acf2(dif.log.y, 24)

     [,1] [,2]  [,3]  [,4]  [,5]  [,6]  [,7]  [,8] [,9] [,10] [,11] [,12] [,13]
ACF  0.35 0.19 -0.01 -0.12 -0.17 -0.11 -0.09 -0.04 0.04  0.05  0.03 -0.12 -0.13
PACF 0.35 0.08 -0.11 -0.12 -0.09  0.01 -0.03 -0.02 0.05  0.01 -0.03 -0.17 -0.06
     [,14] [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24]
ACF  -0.10 -0.11  0.05  0.07  0.10  0.06  0.07 -0.09 -0.05 -0.10 -0.05
PACF  0.02 -0.06  0.10  0.00  0.02 -0.04  0.01 -0.11  0.03 -0.03  0.00

adf.test(dif.log.y)

Warning in adf.test(dif.log.y): p-value smaller than printed p-value


    Augmented Dickey-Fuller Test

data:  dif.log.y
Dickey-Fuller = -6.1756, Lag order = 6, p-value = 0.01
alternative hypothesis: stationary

6.3.2 Identificación de modelos

#AR(1)
moda<-Arima(dif.log.y, order=c(1,0,0))
summary(moda)

Series: dif.log.y 
ARIMA(1,0,0) with non-zero mean 

Coefficients:
         ar1    mean
      0.3467  0.0083
s.e.  0.0627  0.0010

sigma^2 = 9.112e-05:  log likelihood = 718.61
AIC=-1431.22   AICc=-1431.11   BIC=-1421.01

Training set error measures:
                       ME        RMSE        MAE  MPE MAPE      MASE
Training set 5.572162e-06 0.009502405 0.00713417 -Inf  Inf 0.6207883
                    ACF1
Training set -0.02706632

autoplot(moda)

#MA(2)
modb<-Arima(dif.log.y, order=c(0,0,2))
summary(modb)

Series: dif.log.y 
ARIMA(0,0,2) with non-zero mean 

Coefficients:
         ma1     ma2    mean
      0.3028  0.2035  0.0083
s.e.  0.0654  0.0644  0.0010

sigma^2 = 9.041e-05:  log likelihood = 719.96
AIC=-1431.93   AICc=-1431.75   BIC=-1418.32

Training set error measures:
                       ME       RMSE         MAE  MPE MAPE      MASE       ACF1
Training set 9.940243e-06 0.00944414 0.007108452 -Inf  Inf 0.6185504 0.01725908

autoplot(modb)

checkresiduals(moda)


    Ljung-Box test

data:  Residuals from ARIMA(1,0,0) with non-zero mean
Q* = 9.8979, df = 7, p-value = 0.1944

Model df: 1.   Total lags used: 8

checkresiduals(modb)


    Ljung-Box test

data:  Residuals from ARIMA(0,0,2) with non-zero mean
Q* = 7.6054, df = 6, p-value = 0.2685

Model df: 2.   Total lags used: 8

c(moda$aic,moda$aicc,moda$bic)

[1] -1431.221 -1431.111 -1421.013

c(modb$aic,modb$aicc,modb$bic)

[1] -1431.929 -1431.745 -1418.319

modc<-Arima(log(y),order=c(1,1,0),include.drift=TRUE)
summary(modc)

Series: log(y) 
ARIMA(1,1,0) with drift 

Coefficients:
         ar1   drift
      0.3467  0.0083
s.e.  0.0627  0.0010

sigma^2 = 9.136e-05:  log likelihood = 718.61
AIC=-1431.22   AICc=-1431.11   BIC=-1421.01

Training set error measures:
                       ME        RMSE         MAE          MPE       MAPE
Training set 3.827644e-05 0.009493664 0.007134913 0.0009659935 0.08772583
                  MASE        ACF1
Training set 0.1928199 -0.02800154

modd<-Arima(y,order=c(1,1,0),include.drift=TRUE,lambda=0)
summary(modd)

Series: y 
ARIMA(1,1,0) with drift 
Box Cox transformation: lambda= 0 

Coefficients:
         ar1   drift
      0.3467  0.0083
s.e.  0.0627  0.0010

sigma^2 = 9.136e-05:  log likelihood = 718.61
AIC=-1431.22   AICc=-1431.11   BIC=-1421.01

Training set error measures:
                    ME     RMSE      MAE           MPE      MAPE      MASE
Training set -1.361224 37.96823 28.76763 -0.0006770822 0.7134792 0.1831012
                    ACF1
Training set -0.07755085

mode<-Arima(y,order=c(0,1,2),include.drift=TRUE,lambda=0)
summary(mode)

Series: y 
ARIMA(0,1,2) with drift 
Box Cox transformation: lambda= 0 

Coefficients:
         ma1     ma2   drift
      0.3028  0.2035  0.0083
s.e.  0.0654  0.0644  0.0010

sigma^2 = 9.066e-05:  log likelihood = 719.96
AIC=-1431.93   AICc=-1431.74   BIC=-1418.32

Training set error measures:
                    ME     RMSE      MAE           MPE      MAPE      MASE
Training set -1.367236 37.76604 28.61229 -0.0001880699 0.7108687 0.1821125
                    ACF1
Training set -0.02703248

checkresiduals(modd,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(1,1,0) with drift
Q* = 10.69, df = 9, p-value = 0.2976

Model df: 1.   Total lags used: 10

checkresiduals(mode,lag=10)


    Ljung-Box test

data:  Residuals from ARIMA(0,1,2) with drift
Q* = 8.6404, df = 8, p-value = 0.3735

Model df: 2.   Total lags used: 10

res<-modd$res

ts.plot(res)

acf(res)

pacf(res)

#Normalidad
shapiro.test(res)


    Shapiro-Wilk normality test

data:  res
W = 0.97558, p-value = 0.0006627

jarque.bera.test(res)


    Jarque Bera Test

data:  res
X-squared = 23.489, df = 2, p-value = 7.933e-06

qqPlot(res)

[1]  13 126