Complete an experimental design.

complete_design(design_table, data_table)

Arguments

design_table

optree or for experimental design.

data_table

optree for data.

Value

joined and annotated table optree.

Examples


if (requireNamespace("DBI", quietly = TRUE) &&
    requireNamespace("RSQLite", quietly = TRUE)) {
  my_db <- DBI::dbConnect(RSQLite::SQLite(), ":memory:")

  # example experimental design
  values <- list(nums = 1:3, lets = c("a", "b"))
  design <- expand_grid(my_db, values)

  # not quite matching data
  data <- build_frame(
    "nums", "lets"   |
      1L    , "a"    |
      1L    , "b"    |
      77L   , "a"    |  # out of place ID
      2L    , "b"    |
      3L    , "a"    |
      3L    , "a"    | # duplicated
      3L    , "b"    )
  data$row_number <- seq_len(nrow(data))
  data <- rq_copy_to(my_db, "data", data)

  # compare/augment
  res <- complete_design(design, data)
  cat(format(res))
  res <- materialize(my_db, res)

  print("completed data design")
  print(execute(my_db, res))

  # look for dups (can use extende_se(partation) on
  # databases with window fns.
  print("duplicate key rows:")
  res %.>%
    project_se(.,
               groupby = column_names(design),
               "count" %:=% "SUM(1)") %.>%
    select_rows_se(., "count>1") %.>%
    execute(my_db, .) %.>%
    print(.)

  # look for data that was not in design
  print("data rows not in design:")
  data %.>%
    natural_join(., res,
                 jointype = "LEFT",
                 by = column_names(design)) %.>%
    select_rows_se(., "is.na(row_in_design_table)") %.>%
    execute(my_db, .) %.>%
    print(.)

  DBI::dbDisconnect(my_db)
}
#> mk_td("eg_94539510986991312440_0000000000", c(
#>   "nums",
#>   "lets")) %.>%
#>  extend(.,
#>   row_in_design_table := 1) %.>%
#>  natural_join(.,
#>   mk_td("data", c(
#>     "nums",
#>     "lets",
#>     "row_number")) %.>%
#>    extend(.,
#>     row_in_data_table := 1),
#>   jointype = "LEFT", by = c('nums', 'lets')) %.>%
#>  null_replace(.; row_in_design_table,
#>   row_in_data_table: 0)
#> [1] "completed data design"
#>   nums lets row_in_design_table row_number row_in_data_table
#> 1    1    a                   1          1                 1
#> 2    1    b                   1          2                 1
#> 3    2    a                   1         NA                 0
#> 4    2    b                   1          4                 1
#> 5    3    a                   1          5                 1
#> 6    3    a                   1          6                 1
#> 7    3    b                   1          7                 1
#> [1] "duplicate key rows:"
#>   nums lets count
#> 1    3    a     2
#> [1] "data rows not in design:"
#>   nums lets row_number row_in_design_table row_in_data_table
#> 1   77    a          3                  NA                NA