Comparison with other R packages

Data setup

Univariate mean change

# Univariate mean change
set.seed(1)
p <- 1
mean_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_1)

Univariate mean and/or variance change

# Univariate mean and/or variance change
set.seed(1)
p <- 1
mv_data_1 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_1)

Multivariate mean change

# Multivariate mean change
set.seed(1)
p <- 3
mean_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(400, mean = rep(50, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(2, p), sigma = diag(100, p))
)

plot.ts(mean_data_3)

Multivariate mean and/or variance change

# Multivariate mean and/or variance change
set.seed(1)
p <- 4
mv_data_3 <- rbind(
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(100, p)),
  mvtnorm::rmvnorm(300, mean = rep(0, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(400, mean = rep(10, p), sigma = diag(1, p)),
  mvtnorm::rmvnorm(300, mean = rep(10, p), sigma = diag(100, p))
)

plot.ts(mv_data_3)

Linear regression

# Linear regression
set.seed(1)
n <- 300
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(c(1, 3.2, -1, 0), c(-1, -0.5, 2.5, -2), c(0.8, 0, 1, 2))
y <- c(
  x[1:100, ] %*% theta_0[1, ] + rnorm(100, 0, 3),
  x[101:200, ] %*% theta_0[2, ] + rnorm(100, 0, 3),
  x[201:n, ] %*% theta_0[3, ] + rnorm(100, 0, 3)
)
lm_data <- data.frame(y = y, x = x)

plot.ts(lm_data)

Logistic regression

# Logistic regression
set.seed(1)
n <- 500
p <- 4
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta <- rbind(rnorm(p, 0, 1), rnorm(p, 2, 1))
y <- c(
  rbinom(300, 1, 1 / (1 + exp(-x[1:300, ] %*% theta[1, ]))),
  rbinom(200, 1, 1 / (1 + exp(-x[301:n, ] %*% theta[2, ])))
)
binomial_data <- data.frame(y = y, x = x)

plot.ts(binomial_data)

Poisson regression

# Poisson regression
set.seed(1)
n <- 1100
p <- 3
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
delta <- rnorm(p)
theta_0 <- c(1, 0.3, -1)
y <- c(
  rpois(500, exp(x[1:500, ] %*% theta_0)),
  rpois(300, exp(x[501:800, ] %*% (theta_0 + delta))),
  rpois(200, exp(x[801:1000, ] %*% theta_0)),
  rpois(100, exp(x[1001:1100, ] %*% (theta_0 - delta)))
)
poisson_data <- data.frame(y = y, x = x)

plot.ts(log(poisson_data$y))

plot.ts(poisson_data[, -1])

Lasso

# Lasso
set.seed(1)
n <- 480
p_true <- 6
p <- 50
x <- mvtnorm::rmvnorm(n, rep(0, p), diag(p))
theta_0 <- rbind(
  runif(p_true, -5, -2),
  runif(p_true, -3, 3),
  runif(p_true, 2, 5),
  runif(p_true, -5, 5)
)
theta_0 <- cbind(theta_0, matrix(0, ncol = p - p_true, nrow = 4))
y <- c(
  x[1:80, ] %*% theta_0[1, ] + rnorm(80, 0, 1),
  x[81:200, ] %*% theta_0[2, ] + rnorm(120, 0, 1),
  x[201:320, ] %*% theta_0[3, ] + rnorm(120, 0, 1),
  x[321:n, ] %*% theta_0[4, ] + rnorm(160, 0, 1)
)
lasso_data <- data.frame(y = y, x = x)

plot.ts(lasso_data[, seq_len(p_true + 1)])

AR(3)

# AR(3)
set.seed(1)
n <- 1000
x <- rep(0, n + 3)
for (i in 1:600) {
  x[i + 3] <- 0.6 * x[i + 2] - 0.2 * x[i + 1] + 0.1 * x[i] + rnorm(1, 0, 3)
}
for (i in 601:1000) {
  x[i + 3] <- 0.3 * x[i + 2] + 0.4 * x[i + 1] + 0.2 * x[i] + rnorm(1, 0, 3)
}
ar_data <- x[-seq_len(3)]

plot.ts(ar_data)

GARCH(1, 1)

# GARCH(1, 1)
set.seed(1)
n <- 400
sigma_2 <- rep(1, n + 1)
x <- rep(0, n + 1)
for (i in seq_len(200)) {
  sigma_2[i + 1] <- 20 + 0.5 * x[i]^2 + 0.1 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
for (i in 201:400) {
  sigma_2[i + 1] <- 1 + 0.1 * x[i]^2 + 0.5 * sigma_2[i]
  x[i + 1] <- rnorm(1, 0, sqrt(sigma_2[i + 1]))
}
garch_data <- x[-1]

plot.ts(garch_data)

VAR(2)

# VAR(2)
set.seed(1)
n <- 800
p <- 2
theta_1 <- matrix(c(-0.3, 0.6, -0.5, 0.4, 0.2, 0.2, 0.2, -0.2), nrow = p)
theta_2 <- matrix(c(0.3, -0.4, 0.1, -0.5, -0.5, -0.2, -0.5, 0.2), nrow = p)
x <- matrix(0, n + 2, p)
for (i in 1:500) {
  x[i + 2, ] <- theta_1 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
for (i in 501:n) {
  x[i + 2, ] <- theta_2 %*% c(x[i + 1, ], x[i, ]) + rnorm(p, 0, 1)
}
var_data <- x[-seq_len(2), ]

plot.ts(var_data)

Univariate mean change

The true change points are 300 and 700. Some methods are plotted due to the un-retrievable change points.

(fastcpd_result <- fastcpd::fastcpd.mean(mean_data_1, r.progress = FALSE)@cp_set)
#> error: fastcpd_class_public.cc:85: Invalid family.
#> error: fastcpd_class_public.cc:99: Invalid family.
#> [1] 300 700
(CptNonPar_result <- CptNonPar::np.mojo(mean_data_1, G = floor(length(mean_data_1) / 6))$cpts)
#> [1] 300 700
strucchange::breakpoints(y ~ 1, data = data.frame(y = mean_data_1))$breakpoints
#> [1] 300 700
ecp::e.divisive(mean_data_1)$estimates
#> [1]    1  301  701 1001
(changepoint_result <- changepoint::cpt.mean(c(mean_data_1))@cpts)
#> [1]  300 1000
(breakfast_result <- breakfast::breakfast(mean_data_1)$cptmodel.list[[6]]$cpts)
#> [1] 300 700
(wbs_result <- wbs::wbs(mean_data_1)$cpt$cpt.ic$mbic.penalty)
#> [1] 300 700
mosum::mosum(c(mean_data_1), G = 40)$cpts.info$cpts
#> [1] 300 700
(fpop_result <- fpop::Fpop(mean_data_1, nrow(mean_data_1))$t.est)
#> [1]  300  700 1000
gfpop::gfpop(
  data = mean_data_1,
  mygraph = gfpop::graph(
    penalty = 2 * log(nrow(mean_data_1)) * gfpop::sdDiff(mean_data_1) ^ 2,
    type = "updown"
  ),
  type = "mean"
)$changepoints
#> [1]  300  700 1000
invisible(
  suppressMessages(
    capture.output(
      result_InspectChangepoint <- InspectChangepoint::inspect(
        t(mean_data_1),
        threshold = InspectChangepoint::compute.threshold(
          nrow(mean_data_1), ncol(mean_data_1)
        )
      )
    )
  )
)
result_InspectChangepoint$changepoints[, "location"]
#> [1] 300 700
(jointseg_result <- jointseg::jointSeg(mean_data_1, K = 2)$bestBkp)
#> [1] 300 700
Rbeast::beast(
  mean_data_1, season = "none", print.progress = FALSE, quiet = TRUE
)$trend$cp
#>  [1] 701 301 NaN NaN NaN NaN NaN NaN NaN NaN
(stepR_result <- stepR::stepFit(mean_data_1, alpha = 0.5)$rightEnd)
#> [1]  300  700 1000
(cpm_result <- cpm::processStream(mean_data_1, cpmType = "Student")$changePoints)
#> [1] 299 699
(segmented_result <- segmented::stepmented(
  as.numeric(mean_data_1), npsi = 2
)$psi[, "Est."])
#> psi1.index psi2.index 
#>   298.1981   699.1524
plot(
  mcp::mcp(
    list(y ~ 1, ~ 1, ~ 1),
    data = data.frame(y = mean_data_1, x = seq_len(nrow(mean_data_1))),
    par_x = "x"
  )
)