# changepoint-skyjacks.R
#
# Simple example of a Poisson changepoint model using MCMCpack and R
#
# Patrick T. Brandt
#
# 20100210 : Initial version

# Read in the data
load("hostages-monthly.RData")

# Get the monthly skyjacks variable
skyjacks <- sm[,"incident"]

# Load the MCMCpack package and the zoo package
library(MCMCpack)

# Plot the data
plot(skyjacks)

# Fit the changepoint models to the data
set.seed(1986)
m1 <- MCMCpoissonChange(skyjacks ~ 1, m=1, c0=1, d0=1, marginal.likelihood="Chib95")
m2 <- MCMCpoissonChange(skyjacks ~ 1, m=2, c0=1, d0=1, marginal.likelihood="Chib95")
m3 <- MCMCpoissonChange(skyjacks ~ 1, m=3, c0=1, d0=1, marginal.likelihood="Chib95")
m4 <- MCMCpoissonChange(skyjacks ~ 1, m=4, c0=1, d0=1, marginal.likelihood="Chib95")
m5 <- MCMCpoissonChange(skyjacks ~ 1, m=5, c0=1, d0=1, marginal.likelihood="Chib95")
m6 <- MCMCpoissonChange(skyjacks ~ 1, m=6, c0=1, d0=1, marginal.likelihood="Chib95")

# Check the convergence of the samples
plot(m1)
plot(m2)
plot(m3)
plot(m4)
plot(m5)
plot(m6)

# Compute the Bayes factors
BayesFactor(m1,m2,m3,m4,m5,m6)

# So we select the 5 changepoint model.

# Now look at when the changepoints occur.  Here's a function that
# will do this and make a prettier plot than the default one produced
# by the code in MCMCpack.


# First, plot where the changepoints are.  Note that we are going to
# do this like a technical analysis plot so we can see the uncertainty
# as well.  Do this with a custom function so we can control more than
# what is done in plotState() and plotChangepoint() in MCMCpack /
# Jong Hee Park's code.


# Load the zoo library so we can get the time indices
library(zoo)

plotChange <- function(x, verbose=TRUE, name=NULL, start=0, freq=1)
{
    # Setup data for plot
    m <- attr(x, "m")
    out <- ts(cbind(attr(x, "y"), attr(x, "prob.state")), start=start, freq=freq)

    # Compute the location of the changepoints
    cp.means <- rep(NA, m+1)      # regime means
    pr.st <- diff(out[,3:(m+2)])  # regime probs
    pr.st[pr.st<0] <- 0           # zeros
    cp.store <- rep(NA, m)        # storage for CPs dates
    cp.idx <- rep(NA, m)

    for(i in 1:m)
    {
        tmp <- cumsum(pr.st[,i])
        cp.idx[i] <- which(tmp>0.5)[1]-1
        cp.store[i] <- index(pr.st)[cp.idx[i]]
    }

    # Now begin plotting the first two panels: data and the regimes
    par(mfrow=c(2+m, 1), mai=c(0.4, 0.6, 0.1, 0.05), xpd=TRUE)
    plot(out[,1], xlab="", ylab=name)
    plot(out[,2:(m+2)], plot.type="single", col=1:(m+1),
         xlab="", lwd=2, ylab="Regime Probabilities")

    # Add the text for the means
    cp.start <- c(1, cp.idx+1)
    cp.end <- c(cp.idx, nrow(out))

    for(i in 1:(m+1))
    {
        cp.mn <- mean(out[cp.start[i]:cp.end[i],1])
        tp <- tsp(as.ts(out))
        cp.mid <- tp[1] + cp.start[i]/tp[3] + (0.5*(cp.end[i]-cp.start[i])/tp[3])
        text(x=cp.mid, y=0.5, round(cp.mn, 1), cex=1.5)
    }

    # Now plot the densities for the changepoints
    par(xpd=FALSE)
    for(i in 1:m)
    {
        plot(pr.st[,i], type="h", main="", xlab="",
             ylab=paste("Pr(Regime ", i+1, ")", sep=""))
        abline(v = cp.store[i], lty = 3, lwd=2, col = i+1)
    }

    if (verbose == TRUE) {
        cat("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n")
        cat("Expected changepoint(s): \n")
        for (i in 1:(m)) print(cp.store[i])
        cat("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n")
    }
}


# Same as the above plotting function, but it only plots the first two
# panels.
plotChange2 <- function(x, verbose=TRUE, name=NULL, start=0, freq=1)
{
    # Setup data for plot
    m <- attr(x, "m")
    out <- ts(cbind(attr(x, "y"), attr(x, "prob.state")), start=start, freq=freq)

    # Compute the location of the changepoints
    cp.means <- rep(NA, m+1)      # regime means
    pr.st <- diff(out[,3:(m+2)])  # regime probs
    pr.st[pr.st<0] <- 0           # zeros
    cp.store <- rep(NA, m)        # storage for CPs dates
    cp.idx <- rep(NA, m)

    for(i in 1:m)
    {
        tmp <- cumsum(pr.st[,i])
        cp.idx[i] <- which(tmp>0.5)[1]-1
        cp.store[i] <- index(pr.st)[cp.idx[i]]
    }

    # Now begin plotting the first two panels: data and the regimes
    par(mfrow=c(2, 1), mai=c(0.5, 0.85, 0.1, 0.1), xpd=TRUE)
    plot(out[,1], xlab="", ylab=name)
    plot(out[,2:(m+2)], plot.type="single", col=1:(m+1),
         xlab="", lwd=2, ylab="Regime Probabilities")

    # Add the text for the means
    cp.start <- c(1, cp.idx+1)
    cp.end <- c(cp.idx, nrow(out))

    for(i in 1:(m+1))
    {
        cp.mn <- mean(out[cp.start[i]:cp.end[i],1])
        tp <- tsp(as.ts(out))
        cp.mid <- tp[1] + cp.start[i]/tp[3] + (0.5*(cp.end[i]-cp.start[i])/tp[3])
        text(x=cp.mid, y=0.5, round(cp.mn, 1), cex=1.25)
    }

    if (verbose == TRUE) {
        cat("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n")
        cat("Expected changepoint(s): \n")
        for (i in 1:(m)) print(cp.store[i])
        cat("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@\n")
    }
}


# Plot the results and get the dates of the changepoints
plotChange(m5, name="Skyjacks", start=c(1968,1), freq=12)

plotChange2(m5, name="Skyjacks", start=c(1968,1), freq=12)