## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE, comment = "#>",
  fig.width = 8, fig.height = 4.5, out.width = "100%"
)
library(MacroFilters)
library(data.table)
library(ggplot2)
data("us_gdp_vintage", package = "MacroFilters")

## ----expanding-window, cache=TRUE---------------------------------------------
T_max    <- nrow(us_gdp_vintage)   # full-sample size (316 rows as of 2025)
ref_date <- as.Date("2019-10-01")  # 2019 Q4 — last pre-COVID quarter
ref_idx  <- which(us_gdp_vintage$date == ref_date)

# d calibrated on the output-gap scale (not the growth-rate scale).
# Using mad(diff(y_log)) sets d ~0.006 which is far below a typical output
# gap (0.01–0.05), so MBH over-smooths to a near-straight line.  As
# post-COVID recovery data arrive, that straight line tilts upward and
# revises the 2019 Q4 trend estimate — precisely the instability we want
# to avoid.  Fixing d = mad(hp_cycle) anchors the threshold to the right
# scale and keeps the backward revision near zero.
d_fixed <- stats::mad(hp_filter(us_gdp_vintage$gdp_log, freq = 4)$cycle)

# df = 10: avoids the extreme endpoint under-fit that df = 4 (default)
# produces.  With df = 4, the cycle at the 2019 Q4 endpoint is ~20 ppts;
# with df = 10 it is ~0.003 ppts (same as HP), removing the spurious
# upward drift that would otherwise accumulate as recovery data arrives.
# mstop = 1000 gives the additional boosting budget needed at df = 10.
fixed_df     <- 10L
fixed_mstop  <- 1000L

# Freeze the B-spline domain so every vintage uses the same basis — knot
# count and domain are set once from the full sample and never change.
fixed_knots  <- 50L                            # invariant functional space
fixed_bounds <- c(1L, T_max)                   # global B-spline domain

# Vintages: 2016 Q1 – 2022 Q4 (28 publication dates)
eval_dates   <- us_gdp_vintage[date >= "2016-01-01" & date <= "2022-10-01", date]
eval_indices <- which(us_gdp_vintage$date %in% eval_dates)

fan_list  <- vector("list", length(eval_indices))
back_list <- vector("list", length(eval_indices))

for (k in seq_along(eval_indices)) {
  i         <- eval_indices[k]
  y_current <- us_gdp_vintage$gdp_log[seq_len(i)]

  hp_res  <- hp_filter(y_current, freq = 4)

  # Single call per vintage — all parameters frozen outside the loop so
  # both the fan chart and the backward-revision series use the same basis.
  mbh_res <- mbh_filter(
    y_current,
    d              = d_fixed,
    knots          = fixed_knots,
    df             = fixed_df,
    mstop          = fixed_mstop,
    boundary.knots = fixed_bounds
  )

  n_cur    <- length(y_current)
  tail_idx <- max(1L, n_cur - 27L):n_cur   # 28-obs trailing window

  fan_list[[k]] <- data.table(
    vintage_date = us_gdp_vintage$date[i],
    obs_date     = us_gdp_vintage$date[tail_idx],
    hp_trend     = hp_res$trend[tail_idx],
    mbh_trend    = mbh_res$trend[tail_idx],
    gdp_log      = y_current[tail_idx]
  )

  if (i >= ref_idx) {
    back_list[[k]] <- data.table(
      vintage_date = us_gdp_vintage$date[i],
      hp_at_ref    = hp_res$trend[ref_idx],
      mbh_at_ref   = mbh_res$trend[ref_idx]
    )
  }
}

revisions_dt <- rbindlist(fan_list)
backward_dt  <- rbindlist(Filter(Negate(is.null), back_list))

## ----boundary-knots-demo------------------------------------------------------
n_demo  <- 200L   # truncated sample
y_demo  <- us_gdp_vintage$gdp_log[seq_len(n_demo)]

res_free      <- mbh_filter(y_demo, knots = fixed_knots, df = fixed_df,
                             mstop = fixed_mstop, boundary.knots = NULL)
res_anchored  <- mbh_filter(y_demo, knots = fixed_knots, df = fixed_df,
                             mstop = fixed_mstop, boundary.knots = fixed_bounds)

# Extend by one observation and refit
y_demo_p1 <- us_gdp_vintage$gdp_log[seq_len(n_demo + 1L)]

res_free_p1     <- mbh_filter(y_demo_p1, knots = fixed_knots, df = fixed_df,
                               mstop = fixed_mstop, boundary.knots = NULL)
res_anchored_p1 <- mbh_filter(y_demo_p1, knots = fixed_knots, df = fixed_df,
                               mstop = fixed_mstop, boundary.knots = fixed_bounds)

# Revision at the final shared observation (position n_demo)
rev_free     <- abs(res_free_p1$trend[n_demo]     - res_free$trend[n_demo])
rev_anchored <- abs(res_anchored_p1$trend[n_demo] - res_anchored$trend[n_demo])

cat(sprintf(
  "Revision at obs %d after adding one data point:\n  free domain   : %.6f\n  anchored domain: %.6f\n",
  n_demo, rev_free, rev_anchored
))

## ----fan-chart----------------------------------------------------------------
# Show only the shared overlap window (2018 Q1 onward) where every one of
# the 28 vintages contributes data — this eliminates the staircase / accordion
# artefact that arises when staggered trailing windows are plotted together.
fan_shared <- revisions_dt[obs_date >= as.Date("2018-01-01")]

p1 <- ggplot(fan_shared, aes(x = obs_date)) +
  geom_line(aes(y = hp_trend,  group = vintage_date),
            colour = "#0072B2", alpha = 0.4, linewidth = 0.6) +
  geom_line(aes(y = mbh_trend, group = vintage_date),
            colour = "#E69F00", alpha = 0.4, linewidth = 0.6) +
  geom_line(
    data = us_gdp_vintage[date >= as.Date("2018-01-01") & date <= as.Date("2022-10-01")],
    aes(x = date, y = gdp_log),
    colour = "black", linewidth = 0.8, linetype = "dashed"
  ) +
  annotate("rect",
           xmin = as.Date("2020-01-01"), xmax = as.Date("2020-10-01"),
           ymin = -Inf, ymax = Inf, alpha = 0.08, fill = "firebrick") +
  annotate("text", x = as.Date("2020-04-01"), y = Inf,
           label = "COVID\nshock", vjust = 1.4, size = 3.2, colour = "firebrick") +
  labs(
    title    = "Vintage Fan Chart: HP (blue) vs MBH (orange)",
    subtitle = "Each line = one vintage estimate (2018–2022 overlap window). Dashed = observed.",
    x        = NULL,
    y        = "Log Real GDP"
  ) +
  theme_minimal(base_size = 12)

print(p1)

## ----backward-revision--------------------------------------------------------
# backward_dt was built in the expanding-window loop above.
# MBH used df = 10 (correct endpoint estimate), knots = 50, and
# boundary.knots = c(1, T_max) (frozen basis) — so all vintages are
# numerically comparable and the baseline trend is unbiased.
back_dt <- backward_dt[order(vintage_date)]
setnames(back_dt, c("hp_at_ref", "mbh_at_ref"), c("hp_trend", "mbh_trend"))

# Normalise to the base vintage (2019 Q4 = first vintage that includes ref_date)
base_hp  <- back_dt[vintage_date == ref_date, hp_trend]
base_mbh <- back_dt[vintage_date == ref_date, mbh_trend]

back_dt[, hp_revision  := (hp_trend  - base_hp)  * 100]   # × 100 = ppts log GDP
back_dt[, mbh_revision := (mbh_trend - base_mbh) * 100]

p2 <- ggplot(back_dt, aes(x = vintage_date)) +
  geom_hline(yintercept = 0, linetype = "dashed", colour = "grey60") +
  geom_line(aes(y = hp_revision),  colour = "#0072B2", linewidth = 1) +
  geom_point(aes(y = hp_revision), colour = "#0072B2", size = 2.5) +
  geom_line(aes(y = mbh_revision),  colour = "#E69F00", linewidth = 1) +
  geom_point(aes(y = mbh_revision), colour = "#E69F00", size = 2.5) +
  annotate("rect",
           xmin = as.Date("2020-01-01"), xmax = as.Date("2020-10-01"),
           ymin = -Inf, ymax = Inf, alpha = 0.1, fill = "firebrick") +
  annotate("text", x = as.Date("2020-04-01"), y = Inf,
           label = "COVID\nshock", vjust = 1.4, size = 3.2, colour = "firebrick") +
  annotate("text", x = max(back_dt$vintage_date),
           y = tail(back_dt$hp_revision, 1), label = "HP",
           hjust = -0.3, colour = "#0072B2", fontface = "bold") +
  annotate("text", x = max(back_dt$vintage_date),
           y = tail(back_dt$mbh_revision, 1), label = "MBH",
           hjust = -0.3, colour = "#E69F00", fontface = "bold") +
  scale_x_date(
    date_breaks = "6 months",
    labels      = function(x) paste0(format(x, "%Y"), " ", quarters(x)),
    expand      = expansion(add = c(30, 90))
  ) +
  labs(
    title    = "Backward Revision at 2019 Q4 as New Data Arrive",
    subtitle = "How does each filter re-estimate the pre-COVID trend as COVID data are published?",
    x        = "Vintage (publication date)",
    y        = "Revision vs 2019 Q4 baseline (log pts x100)"
  ) +
  theme_minimal(base_size = 11) +
  theme(axis.title = element_text(size = 9))

print(p2)

## ----spread-table-------------------------------------------------------------
knitr::kable(
  back_dt[, .(
    Vintage              = format(vintage_date),
    `HP trend at 2019Q4`  = round(hp_trend,  5),
    `MBH trend at 2019Q4` = round(mbh_trend, 5),
    `HP revision (ppts)`  = round(hp_revision,  3),
    `MBH revision (ppts)` = round(mbh_revision, 3)
  )],
  caption = "Backward revision of 2019 Q4 trend estimate across vintages"
)