-
-
Save hliang/070cb7ddc0e5f86b49b5ea342c93f1b2 to your computer and use it in GitHub Desktop.
time series analysis - demo script
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# king death | |
kings <- scan("http://robjhyndman.com/tsdldata/misc/kings.dat",skip=3) | |
kings | |
# store the data in a time series object | |
kingstimeseries <- ts(kings) | |
kingstimeseries | |
# the number of births per month in New York city, from January 1946 to December 1959 (originally collected by Newton) | |
births <- scan("http://robjhyndman.com/tsdldata/data/nybirths.dat") | |
birthstimeseries <- ts(births, frequency=12, start=c(1946,1)) | |
birthstimeseries | |
# monthly sales for a souvenir shop at a beach resort town in Queensland, Australia, for January 1987-December 1993 (original data from Wheelwright and Hyndman, 1998). | |
souvenir <- scan("http://robjhyndman.com/tsdldata/data/fancy.dat") | |
souvenirtimeseries <- ts(souvenir, frequency=12, start=c(1987,1)) | |
souvenirtimeseries | |
# this time series could probably be described using an additive model, since the random fluctuations in the data are roughly constant in size over time. | |
plot.ts(kingstimeseries) | |
# seasonal variation | |
plot.ts(birthstimeseries) | |
# the size of the seasonal fluctuations and random fluctuations seem to increase with the level of the time series. we may need to transform the time series in order to get a transformed time series that can be described using an additive model. | |
plot.ts(souvenirtimeseries) | |
logsouvenirtimeseries <- log(souvenirtimeseries) | |
plot.ts(logsouvenirtimeseries) | |
## Decomposing Time Series | |
# Decomposing Non-Seasonal Data | |
library("TTR") | |
kingstimeseriesSMA3 <- SMA(kingstimeseries,n=3) | |
plot.ts(kingstimeseriesSMA3) | |
kingstimeseriesSMA8 <- SMA(kingstimeseries,n=8) | |
plot.ts(kingstimeseriesSMA8) | |
# Decomposing Seasonal Data | |
birthstimeseriescomponents <- decompose(birthstimeseries) | |
birthstimeseriescomponents$seasonal | |
plot(birthstimeseriescomponents) | |
# Seasonally Adjusting | |
birthstimeseriesseasonallyadjusted <- birthstimeseries - birthstimeseriescomponents$seasonal | |
plot(birthstimeseriesseasonallyadjusted) | |
kingtimeseriesdiff1 <- diff(kingstimeseries, differences=1) | |
plot.ts(kingtimeseriesdiff1) | |
acf(kingtimeseriesdiff1, lag.max=20) | |
acf(kingtimeseriesdiff1, lag.max=20, plot=FALSE) | |
pacf(kingtimeseriesdiff1, lag.max=20) # plot a partial correlogram | |
pacf(kingtimeseriesdiff1, lag.max=20, plot=FALSE) # get the partial autocorrelation values | |
auto.arima(kings) | |
# | |
# | |
setwd("~/") | |
## Monthly Retail Sales - Sporting goods, hobby, book, and music stores | |
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1) | |
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1) | |
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment | |
# rownames(hobby) = hobby[, 1] | |
hobby = as.vector(t(hobby)); hobby | |
hobby_ts = ts(hobby, frequency=12, start=c(1992,1)) | |
plot(hobby_ts, main="Monthly Retail Sales - Sporting goods, hobby, book, and music stores", ylab="Million Dollars") | |
# A seasonal time series consists of a trend component, a seasonal component and an irregular component. Decomposing the time series means separating the time series into these three components: that is, estimating these three components. | |
hobby_components <- decompose(hobby_ts) | |
plot(hobby_components$seasonal) | |
# Seasonally Adjusting | |
# estimate the seasonal component using "decompose()", and then subtract the seasonal component from the original time series | |
hobby_ts_seasonallyadjusted <- hobby_ts - hobby_components$seasonal | |
plot(hobby_ts_seasonallyadjusted) | |
## Forecasting | |
# Holt-Winters Exponential Smoothing | |
hobby_fc_1 = HoltWinters(hobby_ts) | |
hobby_fc_1 | |
hobby_fc_1$SSE | |
plot(hobby_fc_1) | |
hobby_fc_2 = forecast.HoltWinters(hobby_fc_1, h=36) | |
plot(hobby_fc_2) | |
acf(hobby_fc_2$residuals, lag.max=36, na.action=na.pass) | |
Box.test(hobby_fc_2$residuals, lag=36, type="Ljung-Box") | |
plot.ts(hobby_fc_2$residuals) # make a time plot | |
plotForecastErrors(hobby_fc_2$residuals) # make a histogram | |
hist(hobby_fc_2$residuals) | |
# arima | |
hobby_arima_fit = auto.arima(hobby) | |
hobby_fc_3 = forecast.Arima(hobby_arima_fit, h=36) | |
plot.forecast(hobby_fc_3) | |
############################### | |
## Step 0: Prepare Time Series | |
setwd("~/") | |
## Monthly Retail Sales - Sporting goods, hobby, book, and music stores | |
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1) | |
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1) | |
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment | |
# rownames(hobby) = hobby[, 1] | |
hobby = as.vector(t(hobby)); hobby | |
hobby_ts = ts(hobby, frequency=12, start=c(1992,1)) | |
## Step 1: Visualize the Time Series | |
plot(hobby_ts) | |
hobby_ts_components <- decompose(hobby_ts) | |
plot(hobby_ts_components) | |
## Step 2: Stationarize the Series | |
# test for stationary | |
adf.test(hobby_ts, alternative="stationary", k=0) | |
## Step 3: Find Optimal Parameters | |
par(mfrow=c(2,1)) | |
acf(hobby_ts) | |
pacf(hobby_ts) | |
par(mfrow=c(1,1)) | |
auto.arima(hobby_ts) | |
## Step 4: Build ARIMA Model | |
hobby_autoarima_fit = auto.arima(hobby_ts) | |
# # manually setting p, d, q | |
# fit <- arima(hobby_ts, order=c(1, 1, 2), seasonal = list(order = c(2, 1, 2), period = 12)) | |
# fit <- arima(hobby_ts, order=c(0, 1, 2), seasonal = list(order = c(2, 1, 2), period = 12)) | |
## Step 5: Make Predictions | |
hobby_fc_4 = forecast.Arima(hobby_autoarima_fit, h=12*3) | |
plot.forecast(hobby_fc_4) | |
# pred <- predict(fit, n.ahead = 12*3) | |
# ts.plot(hobby_ts, pred$pred, lty = c(1,3)) | |
# plot(pred$pred) |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
--- | |
title: "Time Series Analysis Demo" | |
author: "H. Liang" | |
date: "April 28, 2017" | |
output: html_document | |
--- | |
```{r setup, include=FALSE} | |
knitr::opts_chunk$set(echo = TRUE) | |
``` | |
## Data set 1: Monthly Retail Sales - Sporting goods, hobby, book, and music stores | |
Source: [U.S. Bureau of the Censu](http://www.census.gov/retail/) | |
### Step 0: Prepare Time Series | |
Read data and create time series objects: | |
```{r, echo=FALSE} | |
library(forecast) | |
library(tseries) | |
hobby_adj = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=3, nrows=25, row.names=1) | |
hobby_fac = read.table("~/SportingGoodsHobbyBookandMusicStores.txt", sep="", header=TRUE, stringsAsFactors=FALSE, skip=36, nrows=25, row.names=1) | |
hobby = hobby_adj * hobby_fac # raw value before seasonal adjustment | |
hobby = as.vector(t(hobby)) | |
``` | |
```{r, echo=TRUE} | |
hobby_ts = ts(hobby, frequency=12, start=c(1992,1)) | |
``` | |
### Step 1: Visualize the Time Series | |
Plot the data: | |
```{r} | |
plot(hobby_ts, main="Monthly Retail Sales", ylab="Million Dollars") | |
``` | |
```{r, echo=FALSE} | |
hobby_ts_components <- decompose(hobby_ts) | |
plot(hobby_ts_components) | |
``` | |
### Step 2: Stationarize the Series | |
Stationarize the Series if necessary. Then test for stationary: | |
```{r} | |
adf.test(hobby_ts, alternative="stationary", k=0) | |
``` | |
### Step 3: Find Optimal Parameters | |
check out the acf and pacf plots: | |
```{r, echo=FALSE} | |
# par(mfrow=c(2,1)) | |
acf(hobby_ts) | |
pacf(hobby_ts) | |
# par(mfrow=c(1,1)) | |
# auto.arima(hobby_ts) | |
``` | |
### Step 4: Build the Model | |
Build a model manually, or let `auto.arima()` search for the best one (the search could be slow). | |
```{r} | |
hobby_autoarima_fit = auto.arima(hobby_ts) | |
hobby_autoarima_fit | |
``` | |
### Step 5: Make Predictions | |
Predict the future 3 years: | |
```{r} | |
hobby_fc_4 = forecast.Arima(hobby_autoarima_fit, h=12*3) | |
plot.forecast(hobby_fc_4) | |
``` | |
## Data set 2: User login frequency | |
We extracted hourly login counts from `last` output. Results of one login node are used, sessions of 5 seconds or shorter are ignored. | |
```{r, echo=FALSE} | |
suppressMessages(library(lubridate)) | |
suppressMessages(library(zoo)) | |
suppressWarnings(library(xts)) | |
login_raw = read.table("~/time_series/last_ln0_0426", header=FALSE, stringsAsFactors=FALSE, nrows=35851) | |
# head(login_raw) | |
login = login_raw[grep("^\\(00:0[0-5]\\)$", login_raw$V10, invert=TRUE), ] # remove login sessions of 5 seconds or shorter | |
login$datetime = paste(login$V5, login$V6, login$V7, sep=" ") | |
login$datetime = strptime(login$datetime, format="%b %e %R") | |
login2 = aggregate(list(counts=login$datetime), | |
list(hourofday = cut(login$datetime, "1 hour")), | |
length) | |
login2$hourofday = as.POSIXct(strptime(login2$hourofday, format="%Y-%m-%d %H:%M:%S")) # convert to dat/time format (POSIXlt to POSIXct) | |
# str(login2) | |
allhours = seq(min(login$datetime), max(login$datetime), by="1 hour") # POSIXct | |
allhours = data.frame(allhours=floor_date(allhours, unit="hour")) | |
# str(allhours) | |
# allhours$allhours = as.POSIXlt(allhours$allhours) | |
# head(login); dim(login) | |
# head(login$datetime) | |
login3 = merge(login2, allhours, by.x="hourofday", by.y="allhours", all=TRUE) | |
login3$counts[is.na(login3$counts)] = 0 | |
head(login3) | |
#dim(login3) | |
#plot(login2$hourofday, login2$x, type="l", xlab="time", ylab="logins") | |
#plot(login3$hourofday, login3$counts, type="l", xlab="time", ylab="logins") | |
## convert to zoo boject | |
login4 = zoo(login3$counts, order.by=login3$hourofday) | |
login5 = ts(login4, frequency=12, start=c(1992,1)) | |
login_ts = xts(login3$counts, order.by=login3$hourofday) | |
attr(login_ts, 'frequency') = 24 # set frequency attribute | |
#str(login_ts) | |
# | |
``` | |
### Step 1: Visualize the Time Series | |
Plot the data: | |
```{r} | |
plot(login_ts) | |
# Decomposing | |
login_ts_comp <- decompose(as.ts(login_ts)) | |
plot(login_ts_comp) | |
``` | |
### Step 2: Stationarize the Series | |
Stationarize the Series if necessary. Then test for stationary: | |
```{r} | |
adf.test(login_ts, alternative="stationary", k=0) | |
``` | |
### Step 3: Find Optimal Parameters | |
check out the acf and pacf plots: | |
```{r, echo=FALSE} | |
# par(mfrow=c(2,1)) | |
acf(login_ts) | |
pacf(login_ts) | |
# par(mfrow=c(1,1)) | |
# auto.arima(hobby_ts) | |
``` | |
### Step 4: Build the Model | |
Build a model manually, or let `auto.arima()` search for the best one (the search could be slow). | |
```{r} | |
#login_fit = auto.arima(login_ts) # auto.arima: ARIMA(1,0,1)(1,0,0)[24] with non-zero mean | |
#login_fit | |
login_fit2 <- arima(login_ts, order=c(1, 0, 1), seasonal = list(order = c(1, 0, 0), period = 24)) | |
``` | |
### Step 5: Make Predictions | |
Predict the future 7 days: | |
```{r} | |
#login_fc = forecast.Arima(login_fit, h=24*7) | |
#plot.forecast(login_fc) | |
login_fc2 = forecast.Arima(login_fit2, h=24*7) | |
plot.forecast(login_fc2) | |
``` | |
### Also Try Exponential Smoothing method | |
```{r} | |
login_fit3 <- HoltWinters(login_ts) # Holt-Winters Exponential Smoothing | |
#login_fit3 | |
plot(login_fit3) | |
login_fc3 <- forecast.HoltWinters(login_fit3, h=24*7) | |
plot.forecast(login_fc3) | |
``` |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment