## ----message = FALSE, warning= FALSE------------------------------------------
library(gendercoder)

count_values <- function(x, name = "value") {
  counts <- sort(table(x, useNA = "ifany"), decreasing = TRUE)
  out <- data.frame(value = names(counts), count = as.integer(counts))
  names(out)[1] <- name
  out
}

knitr::kable(
  count_values(sample$Gender, "Gender"),
  caption = "Summary of gender categories before coding"
)

## -----------------------------------------------------------------------------
manylevels_sample <- sample
manylevels_sample$recoded_gender <- recode_gender(
  gender = manylevels_sample$Gender,
  dictionary = manylevels_en
)

knitr::kable(
  head(manylevels_sample, 10),
  caption = "The manylevels_en dictionary applied to `head(sample)`"
)

knitr::kable(
  count_values(na.omit(manylevels_sample$recoded_gender), "recoded_gender"),
  caption = "Summary of gender categories after use of the *manylevels_en* dictionary"
)


## -----------------------------------------------------------------------------

knitr::kable(
  manylevels_sample[is.na(manylevels_sample$recoded_gender), ],
  caption = "All responses not classified by the built-in dictionary"
)


## -----------------------------------------------------------------------------
fewlevels_sample <- sample
fewlevels_sample$recoded_gender <- recode_gender(
  gender = fewlevels_sample$Gender,
  dictionary = fewlevels_en
)

knitr::kable(
  head(fewlevels_sample, 10),
  caption = "The fewlevels_en dictionary applied to `head(sample)`"
)

knitr::kable(
  count_values(fewlevels_sample$recoded_gender, "recoded_gender"),
  caption = "Summary of gender categories after use of the *fewlevels_en* dictionary"
)

## -----------------------------------------------------------------------------
# name of the vector element is the user input value and the vector element is the 
# replacement value corresponding to that name as a lower case string.
custom_dictionary <- c(
  masculino = "man",
  hombre = "man",
  mujer = "woman",
  femenina = "woman"
)

str(custom_dictionary)

## -----------------------------------------------------------------------------

combined_sample <- sample
combined_sample$recoded_gender <- recode_gender(
  gender = combined_sample$Gender,
  dictionary = c(fewlevels_en, custom_dictionary)
)

knitr::kable(
  count_values(combined_sample$recoded_gender, "recoded_gender"),
  caption = "Summary of gender categories after use of the combined dictionaries"
)

knitr::kable(
  combined_sample[is.na(combined_sample$recoded_gender), ],
  caption = "All responses not classified by the combined dictionaries"
)

## -----------------------------------------------------------------------------
retained_sample <- sample
retained_sample$recoded_gender <- recode_gender(
  gender = retained_sample$Gender,
  dictionary = c(fewlevels_en, custom_dictionary),
  retain_unmatched = TRUE
)

knitr::kable(
  count_values(retained_sample$recoded_gender, "recoded_gender"),
  caption = "Summary of gender categories after use of the combined dictionary and `retain_unmatched = TRUE`"
)

