Make a general SQL node.
sql_node(
source,
exprs,
...,
mods = NULL,
orig_columns = TRUE,
expand_braces = TRUE,
translate_quotes = TRUE,
env = parent.frame()
)
source to work from.
SQL expressions
force later arguments to bind by name
SQL modifiers (GROUP BY, ORDER BY, and so on)
logical if TRUE select all original columns.
logical if TRUE use col notation to ensure col is a column name.
logical if TRUE translate quotes to SQL choice (simple replacement, no escaping).
environment to look to.
sql node.
if (requireNamespace("DBI", quietly = TRUE) && requireNamespace("RSQLite", quietly = TRUE)) {
# example database connection
my_db <- DBI::dbConnect(RSQLite::SQLite(),
":memory:")
# load up example data
d <- rq_copy_to(
my_db, 'd',
data.frame(v1 = c(1, 2, NA, 3),
v2 = c(NA, "b", NA, "c"),
v3 = c(NA, NA, 7, 8),
stringsAsFactors = FALSE))
# look at table
execute(my_db, d)
# get list of columns
vars <- column_names(d)
print(vars)
# build a NA/NULLs per-row counting expression.
# names are "quoted" by wrapping them with as.name().
# constants can be quoted by an additional list wrapping.
expr <- lapply(vars,
function(vi) {
list("+ (CASE WHEN (",
as.name(vi),
"IS NULL ) THEN 1.0 ELSE 0.0 END)")
})
expr <- unlist(expr, recursive = FALSE)
expr <- c(list(0.0), expr)
cat(paste(unlist(expr), collapse = " "))
# instantiate the operator node
op_tree <- d %.>%
sql_node(., "num_missing" %:=% list(expr))
cat(format(op_tree))
# examine produced SQL
sql <- to_sql(op_tree, my_db)
cat(sql)
# execute
execute(my_db, op_tree) %.>%
print(.)
# whole process wrapped in convenience node
op_tree2 <- d %.>%
count_null_cols(., vars, "nnull")
execute(my_db, op_tree2) %.>%
print(.)
# sql_node also allows marking variable in quoted expressions
ops <- d %.>%
sql_node(., qae(sqrt_v1 = sqrt(.[v1])))
execute(my_db, ops) %.>%
print(.)
# marking variables allows for error-checking of column names
tryCatch({
ops <- d %.>%
sql_node(., qae(sqrt_v1 = sqrt(.[v1_misspellled])))
},
error = function(e) {print(e)})
DBI::dbDisconnect(my_db)
}
#> [1] "v1" "v2" "v3"
#> 0 + (CASE WHEN ( v1 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v2 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v3 IS NULL ) THEN 1.0 ELSE 0.0 END)mk_td("d", c(
#> "v1",
#> "v2",
#> "v3")) %.>%
#> sql_node(.,
#> num_missing %:=% 0 + (CASE WHEN ( v1 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v2 IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( v3 IS NULL ) THEN 1.0 ELSE 0.0 END),
#> *=TRUE)
#> SELECT
#> `v3` AS `v3`,
#> 0 + (CASE WHEN ( `v1` IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( `v2` IS NULL ) THEN 1.0 ELSE 0.0 END) + (CASE WHEN ( `v3` IS NULL ) THEN 1.0 ELSE 0.0 END) AS `num_missing`,
#> `v1` AS `v1`,
#> `v2` AS `v2`
#> FROM (
#> SELECT
#> `v1`,
#> `v2`,
#> `v3`
#> FROM
#> `d`
#> ) tsql_45099785557350063084_0000000000
#> v3 num_missing v1 v2
#> 1 NA 2 1 <NA>
#> 2 NA 1 2 b
#> 3 7 2 NA <NA>
#> 4 8 0 3 c
#> v3 nnull v1 v2
#> 1 NA 2 1 <NA>
#> 2 NA 1 2 b
#> 3 7 2 NA <NA>
#> 4 8 0 3 c
#> v3 sqrt_v1 v1 v2
#> 1 NA 1.000000 1 <NA>
#> 2 NA 1.414214 2 b
#> 3 7 NA NA <NA>
#> 4 8 1.732051 3 c
#> <simpleError in sql_node.relop(., qae(sqrt_v1 = sqrt(.[v1_misspellled]))): rquery::sql_node.relop undefined columns: v1_misspellled>