## ----include = FALSE----------------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment = "#>"
)

## ----setup, message = FALSE, warning = FALSE----------------------------------
library(openNCAI)
library(ggplot2)
library(dplyr)
library(tibble)

## ----inspect_measurement-data-------------------------------------------------
head(ns_habitat_extent[,1:3])

## ----metadata-table, echo = FALSE---------------------------------------------
d_subset <- data.frame(
  `Object Name` = c(
    "ns_habitats_label_tree", 
    "ns_es_label_tree", 
    "ns_year_list"
  ),
  `Description` = c(
    "Habitats Label Tree: a hierarchy of natural habitats (based on EUNIS[^1] level 2) grouped within their respective broad habitat categories (EUNIS level 1) found in Scotland.",
    "Ecosystem Services Label Tree: a hierarchy of individual CICES[^2] ecosystem services (ES) organized by their SEEA[^3] service type groups (Provisioning, Regulation & Maintenance, and Cultural).",
    "Year List: the specific range of years over which the index is calculated."
  ),
  `Data Format` = c(
    "A named list of character vectors where names are broad habitats and vectors contain the specific habitats within them.",
    "A named list of character vectors where names are service types and vectors contain the individual services.",
    "A character vector of years."
  ),
  check.names = FALSE
)

knitr::kable(d_subset)

## ----view-metadata------------------------------------------------------------
# The label trees are nested lists.
# E.g. See the first couple of broad habitats and the habitats within them.
ns_habitats_label_tree[1:2]

# Or the names of the ES label tree:
names(ns_es_label_tree)

# And the labels within the last group 'Cultural':
ns_es_label_tree[3]

## ----measurement_table, echo = FALSE------------------------------------------
d_subset <- data.frame(
  `Object Name` = c(
    "ns_habitat_extent", 
    "ns_ci_scores"
  ),
  `Description` = c(
    "Habitat Extent for Scotland: measurement in hectares of the area of each natural habitat over the years.",
    "Condition Indicator Score Matrix: yearly condition score on each of the Condition Indicators (CIs)."
  ),
  `Data Format` = c(
    "A data frame where rows are habitats and columns are years.",
    "A data frame where rows are years and columns are CIs."
  ),
  check.names = FALSE
)

knitr::kable(d_subset)

## ----view-measurement---------------------------------------------------------
# E.g. The habitat extent data has 31 rows and 23 columns: 
dim(ns_habitat_extent)

# The 31 rows match the habitats in the (unlisted) Habitats label tree:
all_habitats <- unlist(ns_habitats_label_tree, use.names = FALSE)
length(all_habitats)
all.equal(all_habitats, rownames(ns_habitat_extent))

# The condition score data has one row per year and these years match the year list: 
dim(ns_ci_scores)
length(ns_year_list)

# The row names match the year list too:
all.equal(rownames(ns_ci_scores), ns_year_list)

## ----weights_table, echo = FALSE----------------------------------------------
d_subset <- data.frame(
  `Object Name` = c(
    "ns_provision_per_unit_scores", 
    "ns_between_importance_scores", 
    "ns_within_importance_scores", 
    "ns_ci_relevance_matrices", 
    "ns_indicator_directory", 
    "ns_custom_weight_matrix"
  ),
  `Description` = c(
    "Ecosystem Service Provision Potential per Unit Scores: scores denoting the relative potential of one area unit of a habitat to provide each of the ecosystem services.",
    "Ecosystem Service Importance Scores (**between**-service-type): scores denoting the relative importance of the different ecosystem service **types** ('Provisioning', 'Regulation & Maintenance', 'Cultural') to Scotland.",
    "Ecosystem Service Importance Score (**within**-service-type): scores denoting the relative importance of individual ecosystem services within their service type group.",
    "Condition Indicator Relevance Matrices: a set of matrices marking whether an indicator is relevant to specific habitat/ecosystem service combinations (e.g., 'Woodland bird index' relevance to 'Pollination').",
    "Indicator Directory: a table recording the salience of each condition indicator (between 0 and 1) in representing the capacity of habitats to provide each ecosystem service type.",
    "NatureScot's Custom Divisor Matrix: weights used in this particular case to adjust potential provision scores to reflect scale and context dependence (e.g., adjusting scores by dividing by 1 instead of 5)."
  ),
  `Data Format` = c(
    "A data frame where rows are habitats and columns are ecosystem services.",
    "A named list of numeric values where names match the top-level names of the Service Label Tree.",
    "A nested list of named lists, with a hierarchy matching the Service Label Tree.",
    "A named list of data frames (one per CI) with binary (1/0) values; rows are habitats and columns are ecosystem services.",
    "A data frame with a `ci_id` column and columns for each ecosystem service type containing salience scores.",
    "A data frame where row names are habitats and column names are ecosystem services."
  ),
  check.names = FALSE
)

knitr::kable(d_subset)

## ----view-weights-------------------------------------------------------------
all_habitats <- unlist(ns_habitats_label_tree, use.names = FALSE)
all_ecosystem_services <- unlist(ns_es_label_tree, use.names = FALSE)

# The Provision Per Unit Scores, any custom divisor matrix, and each matrix in the CIRMs list need to match:
one_ci_relevance_matrix <- ns_ci_relevance_matrices[[4]]

length(all_habitats)
nrow(ns_provision_per_unit_scores)
nrow(ns_custom_divisor_matrix)
nrow(one_ci_relevance_matrix)

all.equal(all_habitats, rownames(ns_provision_per_unit_scores))
all.equal(all_habitats, rownames(ns_custom_divisor_matrix))
all.equal(all_habitats, rownames(one_ci_relevance_matrix))

length(all_ecosystem_services)
ncol(ns_provision_per_unit_scores)
ncol(ns_custom_divisor_matrix)
ncol(one_ci_relevance_matrix)

all.equal(all_ecosystem_services, colnames(ns_provision_per_unit_scores))
all.equal(all_ecosystem_services, colnames(ns_custom_divisor_matrix))
all.equal(all_ecosystem_services, colnames(one_ci_relevance_matrix))

## ----calc-overall-------------------------------------------------------------
overall_ncai <- get_ncai(
                  habitat_extent = ns_habitat_extent,
                  ci_scores = ns_ci_scores,
                  habitats_label_tree = ns_habitats_label_tree,
                  es_label_tree = ns_es_label_tree,
                  year_list = ns_year_list,
                  provision_per_unit_scores = ns_provision_per_unit_scores,
                  custom_divisor_matrix = ns_custom_divisor_matrix,
                  between_importance_scores = ns_between_importance_scores,
                  within_importance_scores = ns_within_importance_scores,
                  ci_relevance_matrices = ns_ci_relevance_matrices,
                  indicator_directory = ns_indicator_directory
                  )

## ----show-head-overall--------------------------------------------------------
head(overall_ncai)

## ----selct-raw-overall--------------------------------------------------------
raw_index <- overall_ncai[, "raw_index", drop = FALSE]
raw_index

## ----build-overall-plot-------------------------------------------------------
# Make some nice display labels:
graph_labels <- c(
  "overall"
  = "Overall",
  # --- Service Types ---
  "provisioning"
  = "Provisioning",
  "regulation_and_maintenance"
  = "Regulation & Maintenance",
  "cultural"
  = "Cultural",
  # --- Broad Habitats ---
  "b_coastal_habitats"
  = "Coastal",
  "c_inland_surface_waters"
  = "Freshwater",
  "d_mires_bogs_and_fens"
  = "Mires, Bogs & Fens",
  "e_grasslands_and_lands_dominated_by_forbs_mosses_or_lichens"
  = "Grasslands",
  "f_heathland_scrub_and_tundra"
  = "Heathland",
  "g_woodland_forest_and_other_wooded_land"
  = "Woodland",
  "h_inland_unvegetated_or_sparsely_vegetated_habitats"
  = "Inland Unvegetated",
  "i_cultivated_agricultural_horticultural_and_domestic_habitats"
  = "Agri/Horticultural",
  "j_constructed_industrial_and_other_artificial_habitats"
  = "Constructed",
  "k_montane"
  = "Montane"
)


# Prepare overall index data:
main_index_for_plot <- overall_ncai |>
  rownames_to_column(var = "year") |>
  mutate(
    year = as.numeric(year),
    breakdown = "overall",
    display_name = recode(breakdown, !!!graph_labels)
  )

# Plot the Overall Index
overall_plot <- ggplot(main_index_for_plot, aes(x = year, y = raw_index)) +
  geom_line() +
  scale_y_continuous(
    limits = c(90, 110),           # <--- Forces the 90-110 range
    breaks = seq(90, 110, by = 2)
  ) +
  scale_x_continuous(
    breaks = seq(2000, 2022, by = 3)
  ) +
  labs(
    title = "NCAI (Overall)",
    x = "Year",
    y = "Index (Base = 100)"
  ) +
  theme_classic() +
  theme(plot.title = element_text(face = "bold", hjust = 0.5))


## ----show-overall-plot, fig.width = 7, fig.height = 4.5, fig.align = "center"----
overall_plot

## ----index-breakdown-st-------------------------------------------------------
ncai_by_ecosystem_service_type <- get_ncai(
    habitat_extent = ns_habitat_extent,
    ci_scores = ns_ci_scores,
    habitats_label_tree = ns_habitats_label_tree,
    es_label_tree = ns_es_label_tree,
    year_list = ns_year_list,
    provision_per_unit_scores = ns_provision_per_unit_scores,
    custom_divisor_matrix = ns_custom_divisor_matrix,
    between_importance_scores = ns_between_importance_scores,
    within_importance_scores = ns_within_importance_scores,
    ci_relevance_matrices = ns_ci_relevance_matrices,
    indicator_directory = ns_indicator_directory,
    return = "by_ecosystem_service_type"
  )

# A named list of data frames is returned, with names as per the ES label tree:
names(ncai_by_ecosystem_service_type)

# See the contribution of cultural services to the index:
cultural_breakdown <- ncai_by_ecosystem_service_type[["cultural"]]
cultural_breakdown

## ----build-st-plot------------------------------------------------------------
# Set up the data:
by_st_for_plot <- ncai_by_ecosystem_service_type[names(ns_es_label_tree)] |>
  lapply(function(df) {
    df |>
      rownames_to_column(var = "year") |>
      mutate(year = as.numeric(year)) # Convert here!
  }) |>
  bind_rows(.id = "breakdown") |>
  bind_rows(main_index_for_plot) |>
  mutate(
    display_name = recode(breakdown, !!!graph_labels)
  )

# Build the plot
st_plot <- ggplot(by_st_for_plot, aes(x = year, y = raw_index, color = display_name)) +
  # Basic lines
  geom_line() +

  # Point marker for overall line to distinguish trend
  geom_point(
    data = filter(by_st_for_plot, breakdown == "overall"),
    shape = 18,
    size = 3
  ) +

  # Fix the Y scale to match the 90-110 range
  scale_y_continuous(
    limits = c(90, 110),           # Forces the axis to show the full range
    breaks = seq(90, 110, by = 2)   # Sets consistent tick marks
  ) +

  # Consistent X scale
  scale_x_continuous(
    breaks = seq(2000, 2022, by = 3)
  ) +

  # Labels
  labs(
    title = "NCAI by Ecosystem Service Type",
    x = "Year",
    y = "Index (Base = 100)",
    color = "Service Type"
  ) +

  # Classic theme (solid axis lines, no grid)
  theme_classic() +

  # Minimal centering for the title
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5)
  )


## ----show-st-plot, fig.width = 7, fig.height = 5.25, fig.align = "center"-----
st_plot + 
  theme(legend.position = "bottom") +
  guides(color = guide_legend(nrow = 1))

## ----index-breakdown-bh-------------------------------------------------------
ncai_by_broad_habitat <- get_ncai(
                          habitat_extent = ns_habitat_extent,
                          ci_scores = ns_ci_scores,
                          habitats_label_tree = ns_habitats_label_tree,
                          es_label_tree = ns_es_label_tree,
                          year_list = ns_year_list,
                          provision_per_unit_scores = ns_provision_per_unit_scores,
                          custom_divisor_matrix = ns_custom_divisor_matrix,
                          between_importance_scores = ns_between_importance_scores,
                          within_importance_scores = ns_within_importance_scores,
                          ci_relevance_matrices = ns_ci_relevance_matrices,
                          indicator_directory = ns_indicator_directory,
                          return = "by_broad_habitat"
                        )

# We get a list of data frames with names as per the top level of the habitats label tree:
names(ncai_by_broad_habitat)

# See the contribution to the index of the broad habitat group Mires, Bogs and Fens:
bogs_breakdown <- ncai_by_broad_habitat[["d_mires_bogs_and_fens"]]
bogs_breakdown

## ----build-bh-plot------------------------------------------------------------
# Specify custom list of broad habitats to use:
ns_bh_breakdown_list <- c(names(ns_habitats_label_tree)[c(1:6, 8)])

# Prepare the data:
by_bh_for_plot <- ncai_by_broad_habitat[ns_bh_breakdown_list] |>
  lapply(function(df) {
    df |>
      rownames_to_column(var = "year") |>
      mutate(year = as.numeric(year))
  }) |>
  bind_rows(.id = "breakdown") |>
  bind_rows(main_index_for_plot) |>
  mutate(
    display_name = recode(breakdown, !!!graph_labels)
  )

# Build the plot:
bh_plot <- ggplot(by_bh_for_plot, aes(x = year, y = raw_index, color = display_name)) +
  # Lines for all habitats
  geom_line() +

  # Diamond marker on the overall trend line
  # We filter by 'breakdown' because that remains "overall" internally
  geom_point(
    data = filter(by_bh_for_plot, breakdown == "overall"),
    shape = 18,
    size = 3
  ) +

  # Fix the Y scale 
  scale_y_continuous(
    limits = c(90, 120),           # Forces the axis to show the full range
    breaks = seq(90, 120, by = 2)   # Sets consistent tick marks
  ) +

  # Consistent X scale
  scale_x_continuous(
    breaks = seq(2000, 2022, by = 3)
  ) +

  # Labels
  labs(
    title = "NCAI by Broad Habitat",
    x = "Year",
    y = "Index (Base = 100)",
    color = "Habitat Type"
  ) +

  # The requested classic theme
  theme_classic() +

  # Minimal centering for the title
  theme(
    plot.title = element_text(face = "bold", hjust = 0.5)
  )

## ----show-bh-plot, fig.width = 7, fig.height = 6, fig.align = "center"--------
bh_plot + 
  theme(legend.position = "bottom") +
  guides(color = guide_legend(nrow = 3))

## ----return-options, echo = FALSE---------------------------------------------
# Define the return options and their descriptions
ncai_options <- data.frame(
  `Value` = c(
    "**\"overall_ncai\"**", 
    "**\"by_ecosystem_service_type\"**", 
    "**\"by_broad_habitat\"**", 
    "**\"wellbeing_index\"**",
    "**\"flow_of_es_index\"**",
    "**\"yearly_ncai_matrices\"**", 
    "**\"yearly_wellbeing_matrices\"**",
    "**\"yearly_flow_of_es_matrices\"**",
    "**\"es_potential_base\"**", 
    "**\"wellbeing_potential_base\"**", 
    "**\"flow_of_es_base\"**",
    "**\"everything\"**"
  ),
  `Output Type` = c(
    "Data Frame", "Data Frame", "Data Frame", "Data Frame", "Data Frame",
    "List of Matrices", "List of Matrices", "List of Matrices",
    "Matrix", "Matrix", "Matrix", "Named List"
  ),
  `Description` = c(
    "The standard overall NCAI data frame (default).",
    "NCAI results broken down specifically by Ecosystem Service Type.",
    "NCAI results broken down specifically by Broad Habitat category.",
    "The indexed potential wellbeing contribution of habitats over time (habitat extent weighted by provision-per-unit and importance).",
    "The indexed likely flow of ecosystem services over time, derived from relevance-weighted condition indicator data.",
    "Unaggregated annual matrices of asset value for every habitat/ecosystem service combination.",
    "Unaggregated annual matrices representing potential wellbeing values per habitat and ecosystem service. Calculated as yearly Habitat Extent * Wellbeing Base",
    "Unaggregated annual matrices representing the likely flow of ecosystem services per habitat and ecosystem service, for each year.",
    "Ecosystem Service Potential Base: habitat extent weighted by exemplary provision-per-unit scores in year one.",
    "Wellbeing Potential Base: the potential wellbeing matrix for the baseline year (Year One).",
    "The likely service flow matrix for the baseline year (Year One).",
    "A comprehensive named list containing all of the objects listed above."
  )
)

# Render the table
knitr::kable(ncai_options,
             col.names = c("Argument Value", "Output Type", "Description"),
             escape = FALSE)

## ----echo=FALSE, fig.cap="NCAI Calculation Process Diagram", out.width="100%"----
knitr::include_graphics("../man/figures/ncai_calculation_process.png")

## ----get-everything-----------------------------------------------------------
# Generate all intermediate and final objects
ncai_all_objects <- get_ncai(
  habitat_extent = ns_habitat_extent,
  ci_scores = ns_ci_scores,
  habitats_label_tree = ns_habitats_label_tree,
  es_label_tree = ns_es_label_tree,
  year_list = ns_year_list,
  provision_per_unit_scores = ns_provision_per_unit_scores,
  custom_divisor_matrix = ns_custom_divisor_matrix,
  between_importance_scores = ns_between_importance_scores,
  within_importance_scores = ns_within_importance_scores,
  ci_relevance_matrices = ns_ci_relevance_matrices,
  indicator_directory = ns_indicator_directory,
  return = "everything"
)

## ----build-extent-condition-plot----------------------------------------------
library(ggplot2)
library(tidyr)
library(dplyr)

# 1. Prepare the data
# Extract years and values into a long-format data frame
plot_data <- data.frame(
  Year = as.numeric(rownames(ncai_all_objects$overall_ncai)),
  Overall = ncai_all_objects$overall_ncai$raw_index,
  Wellbeing = ncai_all_objects$wellbeing_index$raw_index,
  Flow = ncai_all_objects$flow_of_es_index$raw_index
)

# Calculate correlations for the annotation
cor_wellbeing <- round(cor(plot_data$Overall, plot_data$Wellbeing, use = "complete.obs"), 3)
cor_flow <- round(cor(plot_data$Overall, plot_data$Flow, use = "complete.obs"), 3)

# Pivot to long format for ggplot
plot_data_long <- plot_data %>%
  tidyr::pivot_longer(cols = -Year, names_to = "Index", values_to = "Value")

# 2. Create the ggplot
driver_plot <- ggplot(plot_data_long, aes(x = Year, y = Value, color = Index, shape = Index)) +
  geom_line(linewidth = 1) +
  geom_point(size = 3) +
  geom_hline(yintercept = 100, linetype = "dashed", color = "gray50") +
  ylim(90, 110) +
  scale_color_manual(values = c("Overall" = "#BCEE68",
                                "Wellbeing" = "#9AC0CD",
                                "Flow" = "#EEA2AD")) +
  labs(title = "NCAI Components & Correlation",
       subtitle = "Comparing overall index trends with area (Wellbeing) and quality (Flow) drivers",
       x = "Year",
       y = "Index (Base Year = 100)",
       color = "Metric",
       shape = "Metric") +
  theme_minimal() +
  annotate("label", x = Inf, y = -Inf,
           label = paste0("Correlations: Overall/Wellbeing: ", cor_wellbeing,
               "\nOverall/Flow: ", cor_flow),
           hjust = 1.1, vjust = -0.5, size = 3.5,
           fill = "white", label.size = 0.5)


## ----show-driver-plot, fig.width = 7, fig.height = 6, fig.align = "center"----
driver_plot