progress

ddsjoberg · ddsjoberg · commit a956317d73b0 · 2024-05-02T09:31:33.000-07:00
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -45,7 +45,7 @@ BugReports: https://github.com/ddsjoberg/gtsummary/issues
 Depends: 
     R (>= 4.1)
 Imports: 
-    cards (>= 0.1.0.9016),
+    cards (>= 0.1.0.9020),
     cli (>= 3.6.1),
     dplyr (>= 1.1.3),
     glue (>= 1.6.2),
@@ -55,9 +55,10 @@ Imports:
     tibble (>= 3.2.1),
     tidyr (>= 1.3.0)
 Suggests: 
-    cardx (>= 0.1.0.9031),
+    cardx (>= 0.1.0.9033),
     knitr,
-    testthat (>= 3.2.0)
+    testthat (>= 3.2.0),
+    withr
 VignetteBuilder: 
     knitr
 RdMacros: 
diff --git a/NAMESPACE b/NAMESPACE
@@ -74,3 +74,4 @@ importFrom(dplyr,select)
 importFrom(dplyr,starts_with)
 importFrom(dplyr,vars)
 importFrom(dplyr,where)
+importFrom(glue,glue)
diff --git a/NEWS.md b/NEWS.md
@@ -10,7 +10,7 @@
 
 * Added a family of function `styfn_*()` that are similar to the `style_*()` except they return a styling _function_, rather than a styled value.
 
-* Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be either explicitly defined in a factor or be a logical vector. This means that a vector of all `"yes"` values will default to a categorical summary.
+* Previously, in a `tbl_summary()` variables that were `c(0, 1)`, `c("no", "yes")`, `c("No", "Yes")`, and `c("NO", "YES")` would default to a dichotomous summary with the `1` and `yes` level being shown in the table. This would occur even in the case when, for example, only `0` was observed. In this release, the line shown for dichotomous variables must be observed OR the unobserved level must be either explicitly defined in a factor or be a logical vector. This means that a character vector of all `"yes"` or all `"no"` values will default to a categorical summary instead of dichotomous.
 
 * Previously, indentation was handled with `modify_table_styling(text_format = c("indent", "indent2"))`, which would indent a cell 4 and 8 spaces, respectively. Handling of indentation has been migrated to `modify_table_styling(indentation = integer())`, and by default, the label column is indented to zero spaces. This makes it easier to indent a group of rows.
 
@@ -22,7 +22,7 @@
 
 * The values passed in `tbl_summary(value)` are now only checked for columns that are summary type `"dichotomous"`. 
 
-* Previously, the gtsummary selecting functions, e.g. `all_categorical()`, `all_continuous()`, etc., would error if used out of context. Now they won't select the columns silently.
+* Previously, the gtsummary selecting functions, e.g. `all_categorical()`, `all_continuous()`, etc., would error if used out of context. They will now select no columns when used out-of-context.
 
 #### Internal Updates
 
@@ -38,6 +38,12 @@
 
 * The `modify_header(stat_by)` argument was deprecated in v1.3.6 (2021-01-08), and has now been fully removed from the package.
 
+* Use of the `vars()` selector was first removed in v1.2.5 (2020-02-11), and the messaging about the deprecation was kicked up in June 2022. This use is now defunct and the function will soon no longer be exported.
+
+* The `add_p(test = ~'aov')` test is now deprecated as identical results can be obtained with `add_p(test = ~'oneway.test', test.args = ~list(var.equal = TRUE))`.
+
+* Previously, `add_p.tbl_summary()` would coerce various data types to classes compatible with some base R tests. One example, is that we would convert `difftime` classes to general numeric before passing to `wilcox.test()`. We have eliminated type- and class-specific handling in these functions and it is now left to the the user pass data compatible with the functions that calculate the p-values or to create a custom test that wraps `wilcox.test()` and performs the conversion. This change is effective immediately.
+
 # gtsummary 1.7.2
 
 * Removed messaging about the former auto-removal of the `tbl_summary(group)` variable from the table: a change that occurred 3+ years ago in gtsummary v1.3.1
diff --git a/R/add_p.R b/R/add_p.R
@@ -87,7 +87,7 @@ add_p.tbl_summary <- function(x,
   # checking that input x has a by var
   if (is_empty(x$inputs$by)) {
     "Cannot run {.fun add_p} when {.code tbl_summary(by)} argument not included." |>
-      cli::cli_abort()
+      cli::cli_abort(call = get_cli_abort_call())
   }
 
   cards::process_selectors(
@@ -133,16 +133,34 @@ add_p.tbl_summary <- function(x,
     )
 
   # add all available test meta data to a data frame ---------------------------
-  df_test_meta_data <- .test_meta_data(test)
+  df_test_meta_data <-
+    imap(
+      test,
+      ~dplyr::tibble(variable = .y, fun_to_run = list(.x), test_name = attr(.x, "test_name") %||% NA_character_)
+    ) |>
+    dplyr::bind_rows()
 
   # add test names to `.$table_body` so it can be used in selectors ------------
-  x$table_body <-
-    dplyr::left_join(
-      x$table_body,
-      df_test_meta_data[c("variable", "test_name")],
-      by = "variable"
-    ) |>
-    dplyr::relocate("test_name", .after = "variable")
+  if (!"test_name" %in% names(x$table_body)) {
+    x$table_body <-
+      dplyr::left_join(
+        x$table_body,
+        df_test_meta_data[c("variable", "test_name")],
+        by = "variable"
+      ) |>
+      dplyr::relocate("test_name", .after = "variable")
+  }
+  else {
+    x$table_body <-
+      dplyr::rows_update(
+        x$table_body,
+        df_test_meta_data[c("variable", "test_name")],
+        by = "variable",
+        unmatched = "ignore"
+      ) |>
+      dplyr::relocate("test_name", .after = "variable")
+  }
+
 
   # now process the `test.args` argument ---------------------------------------
   cards::process_formula_selectors(
@@ -185,8 +203,7 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
                 df_test_meta_data |>
                 dplyr::filter(.data$variable %in% .env$variable) |>
                 dplyr::pull("fun_to_run") %>%
-                getElement(1) |>
-                eval(),
+                getElement(1),
               args = list(
                 data = x$inputs$data,
                 variable = variable,
@@ -198,6 +215,14 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
             )
           )
 
+        # if there was a warning captured, print it now
+        if (!is.null(lst_captured_results[["warning"]])) {
+          cli::cli_inform(c(
+            "The following warning was returned in {.fun {calling_fun}} for variable {.val {variable}}",
+            "!" = lst_captured_results[["warning"]]
+          ))
+        }
+
         # if test evaluated without error, return the result
         if (!is.null(lst_captured_results[["result"]])) return(lst_captured_results[["result"]]) # styler: off
         # otherwise, construct a {cards}-like object with error
@@ -206,7 +231,7 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
           variable = variable,
           stat_name = switch(calling_fun, "add_p" = "p.value", "add_difference" = "estimate"),
           stat = list(NULL),
-          warning = lst_captured_results["result"],
+          warning = lst_captured_results["warning"],
           error = lst_captured_results["error"]
         ) %>%
           structure(., class = c("card", class(.)))
@@ -217,10 +242,14 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
   # print any errors or warnings
   lst_results |>
     map(\(x) if (inherits(x, "card")) x else NULL) |>
-    dplyr::bind_rows() |>
-    dplyr::filter(.data$stat_name %in% c("estimate", "std.error", "parameter", "statistic",
-                                         "conf.low", "conf.high", "p.value")) |>
-    cards::print_ard_conditions()
+    dplyr::bind_rows() %>%
+    {switch(
+      !is_empty(.),
+      dplyr::filter(., .data$stat_name %in% c("estimate", "std.error", "parameter", "statistic",
+                                           "conf.low", "conf.high", "p.value")) |>
+        cards::print_ard_conditions()
+    )}
+
 
   # combine results into a single data frame
   df_results <-
@@ -251,6 +280,16 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
     ) |>
     dplyr::bind_rows()
 
+  # remove new columns that already exist in gtsummary table
+  new_columns <- names(df_results) |> setdiff(names(x$table_body))
+  if (is_empty(new_columns)) {
+    cli::cli_abort(
+      c("Columns {.val {names(df_results) |> setdiff('variable')}} are already present in table (although, some may be hidden), and no new columns were added.",
+        i = "Use {.code tbl |> modify_table_body(\\(x) dplyr::select(x, -p.value))} to remove columns and they will be replaced by the new columns from the current call."),
+      call = get_cli_abort_call()
+    )
+  }
+
   # create default footnote text
   footnote <- map(
     lst_results,
@@ -270,6 +309,7 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
     unlist() |>
     unique() |>
     paste(collapse = "; ")
+  if (footnote == "" || is_empty(footnote)) footnote <- NULL
 
   # add results to `.$table_body` ----------------------------------------------
   x <- x |>
@@ -284,11 +324,21 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
   x <-
     modify_table_styling(
       x,
-      columns = any_of("p.value"),
+      columns = intersect("p.value", new_columns),
       label = "**p-value**",
       hide = FALSE,
       fmt_fun = styfn_pvalue(),
       footnote = footnote
+    ) |>
+    modify_table_styling(
+      columns =
+        intersect(
+          c("estimate", "std.error", "parameter", "statistic", "conf.low", "conf.high"),
+          new_columns
+        ),
+      hide = TRUE,
+      fmt_fun = styfn_sigfig(),
+      footnote = footnote
     )
 
   # adding labels for hidden columns
@@ -298,8 +348,6 @@ calculate_and_add_test_results <- function(x, include, group, test.args, adj.var
     ) |>
     tidyr::fill("modify_stat_N", .direction = "downup") # fill missing N for new cols
 
-
-
   # add raw results to `.$card`
   x$cards[[calling_fun]] <- lst_results
 
diff --git a/R/assign_tests.R b/R/assign_tests.R
@@ -52,30 +52,77 @@ assign_tests.tbl_summary <- function(x, test = NULL, group = NULL, include,
   lapply(
     include,
     function(variable) {
-      # if there is a user-supplied test, use that one
-      if (!is.null(test[[variable]])) return(test[[variable]]) # styler: off
-
-      if (calling_fun %in% "add_p") {
-        default_test <-
-          add_p_tbl_summary_default_test(data, variable = variable,
-                                         by = by, group = group,
-                                         summary_type = summary_type[[variable]])
+      if (is.null(test[[variable]]) && calling_fun %in% "add_p") {
+        test[[variable]] <-
+          .add_p_tbl_summary_default_test(data, variable = variable,
+                                          by = by, group = group,
+                                          summary_type = summary_type[[variable]])
       }
 
-      if (is.null(default_test)) {
+      if (is.null(test[[variable]])) {
         cli::cli_abort(c(
           "There is no default test set for column {.val {variable}}.",
           i = "Set a value in the {.arg test} argument for column {.val {variable}} or exclude with {.code include = -{variable}}."),
           call = get_cli_abort_call()
         )
       }
-      default_test
+
+      test[[variable]] <-
+        .process_test_argument_value(
+          test = test[[variable]],
+          class = "tbl_summary",
+          calling_fun = calling_fun
+        )
     }
   ) |>
     stats::setNames(include)
 }
 
-add_p_tbl_summary_default_test <- function(data, variable, by, group, summary_type) {
+
+.process_test_argument_value <- function(test, class, calling_fun) {
+  # subset the data frame
+  df_tests <-
+    df_add_p_tests |>
+    dplyr::filter(.data$class %in% .env$class, .data[[calling_fun]])
+
+  # if the test is character and it's an internal test
+  if (is.character(test) && test %in% df_tests$test_name) {
+    test_to_return <- df_tests$fun_to_run[df_tests$test_name %in% test][[1]] |> eval()
+    attr(test_to_return, "test_name") <- df_tests$test_name[df_tests$test_name %in% test]
+    return(test_to_return)
+  }
+
+  # if the test is character and it's NOT an internal test
+  if (is.character(test)) {
+    return(eval(parse_expr(test), envir = attr(test, ".Environment")))
+  }
+
+  # if passed test is a function and it's an internal test
+  internal_test_index <- df_tests$test_fun |>
+    map_lgl(~identical_no_attr(eval(.x), test)) |>
+    which()
+  if (is.function(test) && !is_empty(internal_test_index)) {
+    test_to_return <- df_add_p_tests$fun_to_run[[internal_test_index]] |> eval()
+    attr(test_to_return, "test_name") <- df_add_p_tests$test_name[internal_test_index]
+    return(test_to_return)
+  }
+
+  # otherwise, if it's a function, return it
+  return(eval(test, envir = attr(test, ".Environment")))
+
+}
+
+# compare after removing attributes
+identical_no_attr <- function(x, y) {
+  tryCatch({
+    attributes(x) <- NULL
+    attributes(y) <- NULL
+    identical(x, y)},
+    error = \(x) FALSE
+  )
+}
+
+.add_p_tbl_summary_default_test <- function(data, variable, by, group, summary_type) {
   # for continuous data, default to non-parametric tests
   if (is_empty(group) && summary_type %in% c("continuous", "continuous2") && length(unique(data[[by]])) == 2) {
     test_func <-
@@ -131,4 +178,6 @@ add_p_tbl_summary_default_test <- function(data, variable, by, group, summary_ty
       return(test_func)
     }
   }
+
+  return(NULL)
 }
diff --git a/R/gtsummary-package.R b/R/gtsummary-package.R
@@ -1,6 +1,7 @@
 #' @keywords internal
 #' @import rlang
 #' @importFrom dplyr across
+#' @importFrom glue glue
 "_PACKAGE"
 
 ## usethis namespace: start
diff --git a/R/reexport.R b/R/reexport.R
@@ -59,6 +59,7 @@ dplyr::where
 #' @export
 dplyr::one_of
 
+# Remove after Jan 1, 2025
 #' @importFrom dplyr vars
 #' @export
 dplyr::vars
diff --git a/R/utils-add_p_tests.R b/R/utils-add_p_tests.R
diff --git a/tests/testthat/_snaps/add_p.tbl_summary.md b/tests/testthat/_snaps/add_p.tbl_summary.md
diff --git a/tests/testthat/test-add_p.tbl_summary.R b/tests/testthat/test-add_p.tbl_summary.R