Sampling recordings - Multple Time Periods

This brief vignette shows an example of a basic workflow selecting recordings for different times of day by site and year.

First we’ll load the packages we want to work with

library(ARUtools)
library(dplyr)
library(purrr)
library(tidyr)
library(glue)
library(lubridate)

Next we’ll prepare our metadata on the recordings, by cleaning, adding site-level information and calculating the time to sunrise/sunset for each file. We’ll also define recordings as either ‘early’ (occurring before 6am) or ‘late’ (occurring after 6am).

s <- clean_site_index(example_sites_clean,
  name_date = c("date_time_start", "date_time_end")
)
m <- clean_metadata(project_files = example_files) |>
  add_sites(s) |>
  calc_sun() |>
  mutate(
    time_period = if_else(hour(date_time) < 6, "early", "late"),
    year = year(date)
  )
#> Extracting ARU info...
#> Extracting Dates and Times...
#> Joining by columns `date_time_start` and `date_time_end`
m
#> # A tibble: 42 × 18
#>   file_name     type  path  aru_id manufacturer model aru_type site_id tz_offset
#>   <chr>         <chr> <chr> <chr>  <chr>        <chr> <chr>    <chr>   <chr>    
#> 1 P01_1_202005… wav   a_BA… BARLT… Frontier La… BAR-… BARLT    P01_1   -0400    
#> 2 P01_1_202005… wav   a_BA… BARLT… Frontier La… BAR-… BARLT    P01_1   -0400    
#> 3 P02_1_202005… wav   a_S4… S4A01… Wildlife Ac… Song… SongMet… P02_1   <NA>     
#> 4 P02_1_202005… wav   a_S4… S4A01… Wildlife Ac… Song… SongMet… P02_1   <NA>     
#> # ℹ 38 more rows
#> # ℹ 9 more variables: date_time <dttm>, date <date>, longitude <dbl>,
#> #   latitude <dbl>, tz <chr>, t2sr <dbl>, t2ss <dbl>, time_period <chr>,
#> #   year <dbl>

Time to do some sampling!

First we define the selection parameters for each time frame we’re interested in sampling. This might be “dawn” and “dusk”, or in this example, “early” and “late” morning.

This function will also simulate the selection weights so we can see what we’ve defined.

p <- list(
  "early" = sim_selection_weights(min_range = c(-70, 240)),
  "late" = sim_selection_weights(min_range = c(100, 300), min_mean = 200)
)

p
#> $early
#> $early$min_range
#> [1] -70 240
#> 
#> $early$min_mean
#> [1] 30
#> 
#> $early$min_sd
#> [1] 60
#> 
#> $early$day_range
#> [1] 120 201
#> 
#> $early$day_mean
#> [1] 161
#> 
#> $early$day_sd
#> [1] 20
#> 
#> $early$offset
#> [1] 0
#> 
#> $early$return_log
#> [1] TRUE
#> 
#> $early$selection_fun
#> [1] "norm"
#> 
#> 
#> $late
#> $late$min_range
#> [1] 100 300
#> 
#> $late$min_mean
#> [1] 200
#> 
#> $late$min_sd
#> [1] 60
#> 
#> $late$day_range
#> [1] 120 201
#> 
#> $late$day_mean
#> [1] 161
#> 
#> $late$day_sd
#> [1] 20
#> 
#> $late$offset
#> [1] 0
#> 
#> $late$return_log
#> [1] TRUE
#> 
#> $late$selection_fun
#> [1] "norm"

Now we can calculate selection weights

Here we’ll calculate a separate set of selection weights for early and late recordings in each year. Then we’ll group recordings by site, year, and time period.

w <- m |>
  nest(data = c(-time_period, -year)) |>
  mutate(
    params = p,
    sel = map2(data, params, calc_selection_weights)
  ) |>
  unnest(sel) |>
  select(-"data", -"params") |>
  mutate(selection_group = glue("{site_id}_{year}_{time_period}"))
w
#> # A tibble: 21 × 27
#>   time_period  year file_name     type  path  aru_id manufacturer model aru_type
#>   <chr>       <dbl> <chr>         <chr> <chr> <chr>  <chr>        <chr> <chr>   
#> 1 early        2020 P01_1_202005… wav   a_BA… BARLT… Frontier La… BAR-… BARLT   
#> 2 early        2020 P02_1_202005… wav   a_S4… S4A01… Wildlife Ac… Song… SongMet…
#> 3 early        2020 P06_1_202005… wav   a_BA… BARLT… Frontier La… BAR-… BARLT   
#> 4 early        2020 P07_1_202005… wav   a_S4… S4A01… Wildlife Ac… Song… SongMet…
#> # ℹ 17 more rows
#> # ℹ 18 more variables: site_id <chr>, tz_offset <chr>, date_time <dttm>,
#> #   date <date>, longitude <dbl>, latitude <dbl>, tz <chr>, t2sr <dbl>,
#> #   t2ss <dbl>, doy <dbl>, psel_by <chr>, psel_min <dbl>, psel_doy <dbl>,
#> #   psel <dbl>, psel_scaled <dbl>, psel_std <dbl>, psel_normalized <dbl>,
#> #   selection_group <glue>

This w data sets contains the original sampling recordings, but now also new columns containing various measures of the probability of selection.

We’ll define the number of samples we’d like to have.

n <- w |>
  summarize(n_recordings = n(), .by = c("selection_group", "time_period")) |>
  mutate(
    n = if_else(time_period == "early", 5, 2),
    n_os = if_else(time_period == "early", floor(n * 1 / 3), floor(n * 1 / 4)),
    n_os = pmax(0, pmin(n_recordings - n, round(n / 3))),
    n = pmin(n, n_recordings)
  )
n
#> # A tibble: 7 × 5
#>   selection_group  time_period n_recordings     n  n_os
#>   <glue>           <chr>              <int> <dbl> <dbl>
#> 1 P01_1_2020_early early                  3     3     0
#> 2 P02_1_2020_early early                  3     3     0
#> 3 P06_1_2020_early early                  3     3     0
#> 4 P07_1_2020_early early                  3     3     0
#> # ℹ 3 more rows

And finally sample the recordings!

g <- sample_recordings(w, n,
  col_site_id = selection_group,
  col_sel_weights = psel_normalized
)
g
#> Summary of Site Counts: 
#> 
#> siteuse by total: 
#>       Base Over
#> total   19    2
#> 
#> siteuse by stratum: 
#>                  Base Over
#> P01_1_2020_early    3    0
#> P02_1_2020_early    3    0
#> P03_1_2020_late     2    1
#> P06_1_2020_early    3    0
#> P07_1_2020_early    3    0
#> P08_1_2020_late     2    1
#> P09_1_2020_early    3    0

The recordings selected for sampling…

g$sites_base
#> Simple feature collection with 19 features and 35 fields
#> Geometry type: POINT
#> Dimension:     XY
#> Bounding box:  xmin: 124 ymin: -53.21667 xmax: 132 ymax: 238.3167
#> Projected CRS: WGS 84 / World Mercator
#> First 10 features:
#>       siteID siteuse replsite   lon_WGS84     lat_WGS84          stratum wgt ip
#> 1  sample-01    Base     None 0.001113911 -4.812753e-04 P01_1_2020_early   1  1
#> 2  sample-02    Base     None 0.001113911 -4.812753e-04 P01_1_2020_early   1  1
#> 3  sample-03    Base     None 0.001113911 -4.812753e-04 P01_1_2020_early   1  1
#> 4  sample-04    Base     None 0.001122894 -4.273146e-04 P02_1_2020_early   1  1
#> 5  sample-05    Base     None 0.001122894 -4.273146e-04 P02_1_2020_early   1  1
#> 6  sample-06    Base     None 0.001122894 -4.273146e-04 P02_1_2020_early   1  1
#> 7  sample-07    Base     None 0.001167810  3.240657e-05 P06_1_2020_early   1  1
#> 8  sample-08    Base     None 0.001167810  3.240657e-05 P06_1_2020_early   1  1
#> 9  sample-09    Base     None 0.001167810  3.240657e-05 P06_1_2020_early   1  1
#> 10 sample-10    Base     None 0.001167810 -3.701886e-04 P07_1_2020_early   1  1
#>    caty   aux time_period year                          file_name type
#> 1  None 0.001       early 2020 P01_1_20200503T052000-0400_ARU.wav  wav
#> 2  None 0.001       early 2020 P01_1_20200503T052000-0400_ARU.wav  wav
#> 3  None 0.001       early 2020 P01_1_20200503T052000-0400_ARU.wav  wav
#> 4  None 0.001       early 2020      P02_1_20200504T052500_ARU.wav  wav
#> 5  None 0.001       early 2020      P02_1_20200504T052500_ARU.wav  wav
#> 6  None 0.001       early 2020      P02_1_20200504T052500_ARU.wav  wav
#> 7  None 0.001       early 2020 P06_1_20200509T052000-0400_ARU.wav  wav
#> 8  None 0.001       early 2020 P06_1_20200509T052000-0400_ARU.wav  wav
#> 9  None 0.001       early 2020 P06_1_20200509T052000-0400_ARU.wav  wav
#> 10 None 0.001       early 2020      P07_1_20200509T052500_ARU.wav  wav
#>                                                     path     aru_id
#> 1  a_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 2  j_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 3  o_BARLT10962_P01_1/P01_1_20200503T052000-0400_ARU.wav BARLT10962
#> 4         a_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav   S4A01234
#> 5         j_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav   S4A01234
#> 6         o_S4A01234_P02_1/P02_1_20200504T052500_ARU.wav   S4A01234
#> 7  a_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 8  j_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 9  o_BARLT10962_P06_1/P06_1_20200509T052000-0400_ARU.wav BARLT10962
#> 10        a_S4A01234_P07_1/P07_1_20200509T052500_ARU.wav   S4A01234
#>          manufacturer        model  aru_type site_id tz_offset
#> 1       Frontier Labs       BAR-LT     BARLT   P01_1     -0400
#> 2       Frontier Labs       BAR-LT     BARLT   P01_1     -0400
#> 3       Frontier Labs       BAR-LT     BARLT   P01_1     -0400
#> 4  Wildlife Acoustics Song Meter 4 SongMeter   P02_1      <NA>
#> 5  Wildlife Acoustics Song Meter 4 SongMeter   P02_1      <NA>
#> 6  Wildlife Acoustics Song Meter 4 SongMeter   P02_1      <NA>
#> 7       Frontier Labs       BAR-LT     BARLT   P06_1     -0400
#> 8       Frontier Labs       BAR-LT     BARLT   P06_1     -0400
#> 9       Frontier Labs       BAR-LT     BARLT   P06_1     -0400
#> 10 Wildlife Acoustics Song Meter 4 SongMeter   P07_1      <NA>
#>              date_time       date longitude latitude               tz     t2ss
#> 1  2020-05-03 05:20:00 2020-05-03    -85.03    50.01  America/Toronto 498.4167
#> 2  2020-05-03 05:20:00 2020-05-03    -85.03    50.01  America/Toronto 498.4167
#> 3  2020-05-03 05:20:00 2020-05-03    -85.03    50.01  America/Toronto 498.4167
#> 4  2020-05-04 05:25:00 2020-05-04    -87.45    52.68  America/Toronto 483.4167
#> 5  2020-05-04 05:25:00 2020-05-04    -87.45    52.68  America/Toronto 483.4167
#> 6  2020-05-04 05:25:00 2020-05-04    -87.45    52.68  America/Toronto 483.4167
#> 7  2020-05-09 05:20:00 2020-05-09    -90.08    52.00 America/Winnipeg 521.9333
#> 8  2020-05-09 05:20:00 2020-05-09    -90.08    52.00 America/Winnipeg 521.9333
#> 9  2020-05-09 05:20:00 2020-05-09    -90.08    52.00 America/Winnipeg 521.9333
#> 10 2020-05-09 05:25:00 2020-05-09    -86.03    50.45  America/Toronto 488.7500
#>    psel_by   psel_min   psel_doy      psel psel_scaled psel_std psel_normalized
#> 1     t2sr -0.5359972 -0.9351720 0.2296568   0.8502302        1           0.001
#> 2     t2sr -0.5359972 -0.9351720 0.2296568   0.8502302        1           0.001
#> 3     t2sr -0.5359972 -0.9351720 0.2296568   0.8502302        1           0.001
#> 4     t2sr -0.5240265 -0.9200039 0.2359748   0.8736204        1           0.001
#> 5     t2sr -0.5240265 -0.9200039 0.2359748   0.8736204        1           0.001
#> 6     t2sr -0.5240265 -0.9200039 0.2359748   0.8736204        1           0.001
#> 7     t2sr -0.4585242 -0.8503970 0.2701113   1.0000000        1           0.001
#> 8     t2sr -0.4585242 -0.8503970 0.2701113   1.0000000        1           0.001
#> 9     t2sr -0.4585242 -0.8503970 0.2701113   1.0000000        1           0.001
#> 10    t2sr -0.5129536 -0.8503970 0.2558023   0.9470254        1           0.001
#>     selection_group              geometry
#> 1  P01_1_2020_early POINT (124 -53.21667)
#> 2  P01_1_2020_early POINT (124 -53.21667)
#> 3  P01_1_2020_early POINT (124 -53.21667)
#> 4  P02_1_2020_early    POINT (125 -47.25)
#> 5  P02_1_2020_early    POINT (125 -47.25)
#> 6  P02_1_2020_early    POINT (125 -47.25)
#> 7  P06_1_2020_early  POINT (130 3.583333)
#> 8  P06_1_2020_early  POINT (130 3.583333)
#> 9  P06_1_2020_early  POINT (130 3.583333)
#> 10 P07_1_2020_early POINT (130 -40.93333)