Following are some examples of how the bagyo dataset can
be used to demonstrate various data wrangling approaches, particularly
those using the tidyverse packages.
## Get number of cyclone categories per year ----
bagyo |>
  group_by(year, category_name) |>
  count() |>
  group_by(year) |>
  complete(category_name) |>
  ungroup()
#> # A tibble: 20 × 3
#>     year category_name             n
#>    <dbl> <fct>                 <int>
#>  1  2017 Tropical Depression       5
#>  2  2017 Tropical Storm            9
#>  3  2017 Severe Tropical Storm     5
#>  4  2017 Typhoon                   3
#>  5  2017 Super Typhoon            NA
#>  6  2018 Tropical Depression       4
#>  7  2018 Tropical Storm            7
#>  8  2018 Severe Tropical Storm     4
#>  9  2018 Typhoon                   6
#> 10  2018 Super Typhoon            NA
#> 11  2019 Tropical Depression       8
#> 12  2019 Tropical Storm            2
#> 13  2019 Severe Tropical Storm     3
#> 14  2019 Typhoon                   8
#> 15  2019 Super Typhoon            NA
#> 16  2020 Tropical Depression       6
#> 17  2020 Tropical Storm            7
#> 18  2020 Severe Tropical Storm     3
#> 19  2020 Typhoon                   4
#> 20  2020 Super Typhoon             2## Get yearly mean cyclone pressure and speed ----
bagyo |>
  group_by(year) |>
  summarise(mean_pressure = mean(pressure), mean_speed = mean(speed))
#> # A tibble: 4 × 3
#>    year mean_pressure mean_speed
#>   <dbl>         <dbl>      <dbl>
#> 1  2017          986.       88.0
#> 2  2018          961.       66.7
#> 3  2019          976.       59.0
#> 4  2020          973.       62.0## Get cyclone category mean pressure and speed ----
bagyo |>
  group_by(category_name) |>
  summarise(
    n = n(),
    mean_pressure = mean(pressure), 
    mean_speed = mean(speed)
  )
#> # A tibble: 5 × 4
#>   category_name             n mean_pressure mean_speed
#>   <fct>                 <int>         <dbl>      <dbl>
#> 1 Tropical Depression      23          996.       39.8
#> 2 Tropical Storm           25          986.       61.6
#> 3 Severe Tropical Storm    15          978.       75  
#> 4 Typhoon                  21          941.      102. 
#> 5 Super Typhoon             2          908.      112.## Get cyclone category mean duration (in hours) ----
bagyo |>
  mutate(duration = end - start) |>
  group_by(category_name) |>
  summarise(mean_duration = mean(duration))
#> # A tibble: 5 × 2
#>   category_name         mean_duration  
#>   <fct>                 <drtn>         
#> 1 Tropical Depression    46.69565 hours
#> 2 Tropical Storm         57.48000 hours
#> 3 Severe Tropical Storm  79.13333 hours
#> 4 Typhoon               106.66667 hours
#> 5 Super Typhoon          77.50000 hours## Get number of cyclones per month by year ----
bagyo |>
  mutate(month = month(start, label = TRUE)) |>
  group_by(month, year) |>
  count() |>
  ungroup() |>
  complete(month, year, fill = list(n = 0)) |>
  arrange(year, month)
#> # A tibble: 48 × 3
#>    month  year     n
#>    <ord> <dbl> <int>
#>  1 Jan    2017     1
#>  2 Feb    2017     1
#>  3 Mar    2017     0
#>  4 Apr    2017     2
#>  5 May    2017     0
#>  6 Jun    2017     0
#>  7 Jul    2017     4
#>  8 Aug    2017     2
#>  9 Sep    2017     4
#> 10 Oct    2017     3
#> # ℹ 38 more rowsFollowing are some examples of how the bagyo dataset can
be used to demonstrate various data visualisation approaches,
particularly those using the tidyverse and
ggplot2 packages.
## Get cyclone category mean duration (in hours) ----
bagyo |>
  mutate(duration = end - start) |>
  group_by(category_name) |>
  summarise(mean_duration = mean(duration)) |>
  ggplot(mapping = aes(x = mean_duration, y = category_name)) +
  geom_col(colour = "#4b876e", fill = "#4b876e", alpha = 0.5) +
  labs(
    title = "Mean duration of cyclones",
    subtitle = "By cyclone categories",
    x = "mean duration (hours)",
    y = NULL
  ) +
  theme_minimal() +
  theme(
    panel.grid.minor.x = element_blank(),
    panel.grid.major.y = element_blank(),
    panel.grid.minor.y = element_blank()
  )## Cyclone speed by presssure ----
bagyo |>
  dplyr::mutate(year = factor(year)) |>
  ggplot(mapping = aes(x = speed, y = pressure)) +
  geom_point(mapping = aes(colour = category_name), size = 3, alpha = 0.5) +
  scale_colour_manual(
    name = NULL,
    values = c("#9c5e60", "#4b876e", "#465b92", "#e5be72", "#5d0505")
  ) +
  labs(
    title = "Cyclone maximum sustained wind speed and maximum central pressure",
    subtitle = "By cyclone categories and year",
    x = "wind speed (km/h)",
    y = "central pressure (hPa)"
  ) +
  facet_wrap(. ~ year, ncol = 4) +
  theme_bw() +
  theme(
    legend.position = "top",
    strip.background = element_rect(
      fill = alpha("#465b92", 0.7), colour = "#465b92"
    ),
    panel.border = element_rect(colour = "#465b92"),
    panel.grid.minor = element_blank()
  )bagyo |>
  mutate(
    year = factor(year),
    duration = as.numeric(end - start)
  ) |>
  ggplot(mapping = aes(x = speed, y = duration)) +
  geom_point(
    mapping = aes(colour = year, shape = year), size = 3, alpha = 0.5
  ) +
  geom_smooth(
    mapping = aes(colour = year), method = "lm", se = FALSE, linewidth = 0.75
  ) +
  scale_colour_manual(
    values = c("#9c5e60", "#4b876e", "#465b92", "#e5be72")
  ) +
  scale_shape_manual(values = 15:18) +
  labs(
    title = "Maximum sustained wind speed by duration of cyclones",
    subtitle = "2017-2020",
    x = "speed (km/h)", y = "duration (hours)",
    colour = "Year", shape = "Year"
  ) +
  theme_minimal() +
  theme(legend.position = "top")## Get number of cyclones per month by year and plot ----
bagyo |>
  mutate(month = month(start, label = TRUE)) |>
  group_by(month, year) |>
  count() |>
  ungroup() |>
  complete(month, year, fill = list(n = 0)) |>
  arrange(year, month) |>
  ggplot(mapping = aes(x = month, y = n)) +
  geom_col(colour = "#4b876e", fill = "#4b876e", alpha = 0.5) +
  scale_y_continuous(breaks = seq(from = 0, to = 6, by = 1)) +
  labs(
    title = "Number of cyclones over time",
    subtitle = "2017-2020",
    x = NULL,
    y = "n"
  ) +
  facet_wrap(. ~ year, ncol = 4) +
  theme_bw() +
  theme(
    strip.background = element_rect(
      fill = alpha("#465b92", 0.7), colour = "#465b92"
    ),
    panel.border = element_rect(colour = "#465b92"),
    panel.grid.minor.y = element_blank(),
    panel.grid.major.x = element_blank(),
    axis.text.x = element_text(size = 10, angle = 90, hjust = 1, vjust = 0.5)
  )bagyo |>
  mutate(year = factor(year)) |>
  ggplot(mapping = aes(x = year, y = speed)) +
  geom_boxplot(colour = "#4b876e", fill = "#4b876e", alpha = 0.5) +
  labs(
    title = "Distribution of tropical cyclone maximum sustained wind speed",
    subtitle = "2017-2022",
    x = NULL, y = "speed (km/h)"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())bagyo |>
  mutate(year = factor(year)) |>
  ggplot(mapping = aes(x = year, y = speed)) +
  geom_boxplot(colour = "#4b876e") +
  geom_jitter(
    colour = "#4b876e", fill = "#4b876e", alpha = 0.5,
    shape = 21, size = 2, width = 0.2
  ) +
  labs(
    title = "Distribution of tropical cyclone maximum sustained wind speed",
    subtitle = "2017-2022",
    x = NULL, y = "speed (km/h)"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())bagyo |>
  mutate(year = factor(year)) |>
  ggplot(mapping = aes(x = year, y = speed)) +
  geom_violin(colour = "#4b876e", fill = "#4b876e", alpha = 0.5) +
  geom_jitter(colour = "#4b876e", size = 3, width = 0.2) +
  labs(
    title = "Distribution of tropical cyclone maximum sustained wind speed",
    subtitle = "2017-2022",
    x = NULL, y = "speed (km/h)"
  ) +
  theme_minimal() +
  theme(panel.grid.major.x = element_blank())