rquery re-maps a number of symbols during
SQL translation.
During expression parsing the internal rquery function
tokenize_call_for_SQL() implements the following
re-mappings from R idioms to SQL notation.
library("rquery")
library("wrapr")
show_translation <- function(strings) {
  vapply(strings,
         function(si) {
           format(rquery::tokenize_for_SQL(parse(text = si, keep.source = FALSE)[[1]], colnames = NULL)$parsed_toks)
         }, character(1))
}
mapping_table <- data.frame(
  example = c('!x', 'is.na(x)', 'ifelse(a, b, c)', 'a^b', 'a%%b', 
               'a==b', 'a&&b', 'a&b', 'a||b', 'a|b', 
              'pmin(a, b)', 'pmax(a, b)'),
  stringsAsFactors = FALSE)
mapping_table$translation <- show_translation(mapping_table$example)
knitr::kable(mapping_table)| example | translation | 
|---|---|
| !x | ( NOT ( x ) ) | 
| is.na(x) | ( ( x ) IS NULL ) | 
| ifelse(a, b, c) | ( CASE WHEN ( a ) THEN ( b ) WHEN NOT ( a ) THEN ( c ) ELSE NULL END ) | 
| a^b | POWER ( a , b ) | 
| a%%b | MOD ( a , b ) | 
| a==b | a = b | 
| a&&b | a AND b | 
| a&b | a AND b | 
| a||b | a OR b | 
| a|b | a OR b | 
| pmin(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) <= ( b ) THEN ( a ) ELSE ( b ) END ) | 
| pmax(a, b) | ( CASE WHEN ( a ) IS NULL THEN ( b ) WHEN ( b ) IS NULL THEN ( a ) WHEN ( a ) >= ( b ) THEN ( a ) ELSE ( b ) END ) | 
Note: not all possible mappings are implemented. For example we
currently do not re-map %in%, preferring the user to
explicitly work with set_indicator()
directly.
In addition to this the database connectors can specify additional re-mappings. This can be found by building a formal connector and inspecting the re-mappings.
raw_RSQLite_connection <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")
RSQLite::initExtension(raw_RSQLite_connection)
db <- rquery_db_info(
  connection = raw_RSQLite_connection,
  is_dbi = TRUE,
  connection_options = rq_connection_tests(raw_RSQLite_connection))
fn_name_map <- db$connection_options[[paste0("rquery.", rq_connection_name(db), ".", "fn_name_map")]]
fn_name_map## NULLWe see above that “mean” is re-mapped to
“avg”.
In all cases we can see what re-mappings happen by examining a query.
d_local <- build_frame(
   "subjectID", "surveyCategory"     , "assessmentTotal", "irrelevantCol1", "irrelevantCol2" |
   1L         , "withdrawal behavior", 5                , "irrel1"        , "irrel2"         |
   1L         , "positive re-framing", 2                , "irrel1"        , "irrel2"         |
   3L         , "withdrawal behavior", 3                , "irrel1"        , "irrel2"         |
   3L         , "positive re-framing", 2                , "irrel1"        , "irrel2"         |
   3L         , "other"              , 1                , "irrel1"        , "irrel2"         )
table_handle <- rq_copy_to(db, 'd',
            d_local,
            temporary = TRUE, 
            overwrite = TRUE)
print(table_handle)## [1] "mk_td(\"d\", c( \"subjectID\", \"surveyCategory\", \"assessmentTotal\", \"irrelevantCol1\", \"irrelevantCol2\"))"
##   subjectID      surveyCategory assessmentTotal irrelevantCol1 irrelevantCol2
## 1         1 withdrawal behavior               5         irrel1         irrel2
## 2         1 positive re-framing               2         irrel1         irrel2
## 3         3 withdrawal behavior               3         irrel1         irrel2
## 4         3 positive re-framing               2         irrel1         irrel2
## 5         3               other               1         irrel1         irrel2ops <- table_handle %.>% 
  project(., 
          avg_total := avg(pmax(0, assessmentTotal)),
          groupby = "subjectID")
cat(to_sql(ops, db))## SELECT `subjectID`, avg ( ( CASE WHEN ( 0 ) IS NULL THEN ( `assessmentTotal` ) WHEN ( `assessmentTotal` ) IS NULL THEN ( 0 ) WHEN ( 0 ) >= ( `assessmentTotal` ) THEN ( 0 ) ELSE ( `assessmentTotal` ) END ) ) AS `avg_total` FROM (
##  SELECT
##   `subjectID`,
##   `assessmentTotal`
##  FROM
##   `d`
##  ) tsql_47208226594582856777_0000000000
## GROUP BY
##  `subjectID`| subjectID | avg_total | 
|---|---|
| 1 | 3.5 | 
| 3 | 2.0 | 
The basic mappings are stored in database option structures, and
depend on the database. For example MOD is re-mapped back
to % for RSQLite.
| fn_name | sql_mapping | 
|---|---|
| as.Date | to_date ( .(3) , ‘YYYY-MM-DD’ ) | 
| n | COUNT ( 1 ) | 
| mean | AVG ( .(3) ) | 
| cumsum | SUM ( .(3) ) | 
| shift | LAG ( .(3) ) | 
| MOD | ( .(3) % .(5) ) | 
| rand | ABS ( ( RANDOM ( ) % 268435456 ) / 268435455.0 ) | 
ops <- table_handle %.>% 
  project(., groupby = "subjectID",
          n := 5, 
          count := n(),
          mean := mean(assessmentTotal)) %.>% 
  extend(., was_n := n)
                 
cat(to_sql(ops, db))## SELECT
##  `subjectID`,
##  `n`,
##  `count`,
##  `mean`,
##  `n`  AS `was_n`
## FROM (
##  SELECT `subjectID`, 5 AS `n`, COUNT ( 1 ) AS `count`, AVG ( `assessmentTotal` ) AS `mean` FROM (
##   SELECT
##    `subjectID`,
##    `assessmentTotal`
##   FROM
##    `d`
##   ) tsql_15438122330756525169_0000000000
##  GROUP BY
##   `subjectID`
##  ) tsql_15438122330756525169_0000000001| subjectID | n | count | mean | was_n | 
|---|---|---|---|---|
| 1 | 5 | 2 | 3.5 | 5 | 
| 3 | 5 | 3 | 2.0 | 5 | 
Additional function re-mappings can be specified by user code. One
such example is re-writing MOD as % for
RSQLite.
ops <- table_handle %.>% 
  extend(., z := 1 + subjectID %% 3) %.>%
  select_columns(., c("subjectID", "z"))
                 
cat(to_sql(ops, db))## SELECT
##  `subjectID`,
##  `z`
## FROM (
##  SELECT
##   `subjectID`,
##   1 + ( `subjectID` % 3 )  AS `z`
##  FROM (
##   SELECT
##    `subjectID`
##   FROM
##    `d`
##   ) tsql_93440808473188090527_0000000000
## ) tsql_93440808473188090527_0000000001| subjectID | z | 
|---|---|
| 1 | 2 | 
| 1 | 2 | 
| 3 | 1 | 
| 3 | 1 | 
| 3 | 1 | 
rqdatatable also supplies some re-mappings (described here).
This can allow us to use a uniform notation for tasks such as random
number generation to allow portable pipelines.