## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  eval     = FALSE   # examples require optional packages; set eval=TRUE locally
)

## ----xgb-clf------------------------------------------------------------------
# library(e2tree)
# if (!require("xgboost")) install.packages("xgboost",
#                                                repos="https://cran.r-project.org")
# library(xgboost)
# 
# data(iris)
# set.seed(42)
# n  <- floor(0.75 * nrow(iris))
# tr <- iris[sample(nrow(iris), n), ]
# va <- iris[setdiff(seq_len(nrow(iris)), as.integer(rownames(tr))), ]
# 
# # XGBoost requires a numeric matrix and 0-indexed integer labels
# X_tr <- as.matrix(tr[, 1:4])
# y_tr <- as.integer(tr$Species) - 1L
# dm_tr <- xgb.DMatrix(data = X_tr, label = y_tr)
# 
# ensemble <- xgb.train(
#   params  = list(objective  = "multi:softmax",
#                  num_class  = 3,
#                  max_depth  = 4,
#                  eta        = 0.3),
#   data    = dm_tr,
#   nrounds = 100,
#   verbose = 0
# )
# 
# # Attach the response back to the data.frame so the formula in e2tree()
# # can find it; createDisMatrix() will use it to annotate the dissimilarity
# # matrix (in classification, `label` is optional but recommended).
# tr_xgb         <- tr[, 1:4]
# tr_xgb$Species <- tr$Species
# 
# D <- createDisMatrix(ensemble, data = tr_xgb, label = "Species",
#                      parallel = list(active = FALSE, no_cores = 1))
# 
# setting  <- list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5)
# tree_xgb <- e2tree(Species ~ ., data = tr_xgb, D = D,
#                    ensemble = ensemble, setting = setting)
# print(tree_xgb)

## ----xgb-reg------------------------------------------------------------------
# library(xgboost)
# 
# data(mtcars)
# set.seed(42)
# n  <- floor(0.75 * nrow(mtcars))
# tr <- mtcars[sample(nrow(mtcars), n), ]
# 
# X_tr  <- as.matrix(tr[, -1])
# y_tr  <- tr$mpg
# dm_tr <- xgb.DMatrix(data = X_tr, label = y_tr)
# 
# ensemble <- xgb.train(
#   params  = list(objective = "reg:squarederror", max_depth = 4, eta = 0.3),
#   data    = dm_tr,
#   nrounds = 100,
#   verbose = 0
# )
# 
# # `data = tr` carries the response column too; the XGBoost adapter
# # automatically trims the matrix to the features used at training time.
# D    <- createDisMatrix(ensemble, data = tr, label = "mpg",
#                         parallel = list(active = FALSE, no_cores = 1))
# tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble,
#                setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5))
# print(tree)

## ----gbm-clf------------------------------------------------------------------
# if (!require("gbm")) install.packages("gbm",
#                                                repos="https://cran.r-project.org")
# library(gbm)
# 
# data(iris)
# set.seed(42)
# df <- iris
# df$is_setosa     <- as.integer(df$Species == "setosa")
# df$is_setosa_fct <- factor(df$is_setosa, levels = c(0L, 1L))
# n  <- floor(0.75 * nrow(df))
# tr <- df[sample(nrow(df), n), ]
# 
# ensemble <- gbm(is_setosa ~ Sepal.Length + Sepal.Width + Petal.Length + Petal.Width,
#                 data              = tr,
#                 distribution      = "bernoulli",
#                 n.trees           = 200,
#                 interaction.depth = 4,
#                 verbose           = FALSE)
# 
# D    <- createDisMatrix(ensemble,
#                         data     = tr[, c("Sepal.Length","Sepal.Width",
#                                           "Petal.Length","Petal.Width",
#                                           "is_setosa_fct")],
#                         label    = "is_setosa_fct",
#                         parallel = list(active = FALSE, no_cores = 1))
# tree <- e2tree(is_setosa_fct ~ Sepal.Length + Sepal.Width +
#                                Petal.Length + Petal.Width,
#                data = tr, D = D, ensemble = ensemble,
#                setting = list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5))
# print(tree)

## ----gbm-reg------------------------------------------------------------------
# library(gbm)
# 
# data(mtcars)
# set.seed(42)
# n  <- floor(0.75 * nrow(mtcars))
# tr <- mtcars[sample(nrow(mtcars), n), ]
# 
# ensemble <- gbm(mpg ~ ., data = tr,
#                 distribution      = "gaussian",
#                 n.trees           = 200,
#                 interaction.depth = 4,
#                 n.minobsinnode    = 2,
#                 bag.fraction      = 0.8,
#                 verbose           = FALSE)
# 
# D    <- createDisMatrix(ensemble, data = tr, label = "mpg",
#                         parallel = list(active = FALSE, no_cores = 1))
# tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble,
#                setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5))
# print(tree)

## ----lgb-clf------------------------------------------------------------------
# if (!require("lightgbm")) install.packages("lightgbm",
#                                                repos="https://cran.r-project.org")
# library(lightgbm)
# 
# data(iris)
# set.seed(42)
# n  <- floor(0.75 * nrow(iris))
# tr <- iris[sample(nrow(iris), n), ]
# 
# X_tr <- as.matrix(tr[, 1:4])
# y_tr <- as.integer(tr$Species) - 1L
# ds   <- lgb.Dataset(X_tr, label = y_tr)
# 
# ensemble <- lgb.train(
#   params  = list(objective  = "multiclass",
#                  num_class  = 3,
#                  num_leaves = 15,
#                  verbose    = -1),
#   data    = ds,
#   nrounds = 100
# )
# 
# tr_lgb         <- tr[, 1:4]
# tr_lgb$Species <- tr$Species
# 
# D <- createDisMatrix(ensemble, data = tr_lgb, label = "Species",
#                      parallel = list(active = FALSE, no_cores = 1))
# 
# tree <- e2tree(Species ~ ., data = tr_lgb, D = D, ensemble = ensemble,
#                setting = list(impTotal = 0.1, maxDec = 0.01, n = 2, level = 5))
# print(tree)

## ----lgb-reg------------------------------------------------------------------
# library(lightgbm)
# 
# data(mtcars)
# set.seed(42)
# n  <- floor(0.75 * nrow(mtcars))
# tr <- mtcars[sample(nrow(mtcars), n), ]
# 
# X_tr <- as.matrix(tr[, -1])
# y_tr <- tr$mpg
# ds   <- lgb.Dataset(X_tr, label = y_tr)
# 
# ensemble <- lgb.train(
#   params  = list(objective        = "regression",
#                  num_leaves       = 8,
#                  min_data_in_leaf = 2,
#                  learning_rate    = 0.1,
#                  verbose          = -1),
#   data    = ds,
#   nrounds = 200
# )
# 
# # Pass the response column to createDisMatrix() via `label`. The
# # LightGBM adapter selects the columns it needs through the booster's
# # stored feature names, so any extra columns in `data` are ignored.
# D    <- createDisMatrix(ensemble, data = tr, label = "mpg",
#                         parallel = list(active = FALSE, no_cores = 1))
# tree <- e2tree(mpg ~ ., data = tr, D = D, ensemble = ensemble,
#                setting = list(impTotal = 0.1, maxDec = 1e-6, n = 2, level = 5))
# print(tree)

