## ----setup, include = FALSE---------------------------------------------------
knitr::opts_chunk$set(
  collapse = TRUE,
  comment  = "#>",
  fig.width  = 7,
  fig.height = 4
)
library(ensembleML)

## ----fit----------------------------------------------------------------------
data(iris)
set.seed(42)
idx   <- sample(nrow(iris), 120)
train <- iris[idx,  ]
test  <- iris[-idx, ]

rf <- em_fit(Species ~ ., data = train, method = "random_forest",
             verbose = TRUE)

## ----xgb, eval = FALSE--------------------------------------------------------
# xgb <- em_fit(Species ~ ., data = train, method = "xgboost")
# ada <- em_fit(Species ~ ., data = train, method = "adaboost")
# bag <- em_fit(Species ~ ., data = train, method = "bagging")

## ----predict------------------------------------------------------------------
preds <- em_predict(rf, newdata = test)
head(preds)

## ----prob---------------------------------------------------------------------
probs <- em_predict(rf, newdata = test, type = "prob")
head(probs, 3)

## ----evaluate-----------------------------------------------------------------
em_evaluate(rf, newdata = test)

## ----metrics------------------------------------------------------------------
em_evaluate(rf, newdata = test, metrics = c("accuracy", "f1", "kappa"))

## ----cv, eval = FALSE---------------------------------------------------------
# cv_res <- em_cv(Species ~ ., data = iris, method = "random_forest",
#                 cv_folds = 5, repeats = 3)
# cv_res$summary
# em_plot_cv(cv_res, metric = "accuracy")

## ----tune, eval = FALSE-------------------------------------------------------
# grid <- list(ntree = c(100, 300, 500), mtry = c(1, 2, 3))
# 
# tuned <- em_tune(
#   Species ~ ., data = train, method = "random_forest",
#   param_grid = grid, cv_folds = 5
# )
# 
# tuned$best_params
# tuned$best_score
# tuned$results

## ----compare, eval = FALSE----------------------------------------------------
# cmp <- em_compare(Species ~ ., train = train, test = test)
# cmp$table

## ----importance---------------------------------------------------------------
em_importance(rf, top_n = 4)

## ----partial, eval = FALSE----------------------------------------------------
# em_partial(rf, data = train, feature = "Petal.Length")

## ----confusion, eval = FALSE--------------------------------------------------
# em_confusion(rf, newdata = test)
# em_confusion(rf, newdata = test, normalise = TRUE)

## ----regression---------------------------------------------------------------
set.seed(7)
reg_data  <- data.frame(
  x1 = rnorm(200), x2 = rnorm(200),
  y  = 3 + 2 * rnorm(200) + rnorm(200))
reg_train <- reg_data[1:160, ]
reg_test  <- reg_data[161:200, ]

reg_model <- em_fit(y ~ ., data = reg_train, method = "random_forest")
em_evaluate(reg_model, reg_test)
em_residuals(reg_model, reg_test)

## ----citation, eval = FALSE---------------------------------------------------
# citation("ensembleML")

## ----session------------------------------------------------------------------
sessionInfo()