Skip to content

Commit b829943

Browse files
boshekjonkeane
authored andcommitted
ARROW-16038: [R] different behavior from dplyr when mutate's .keep option is set
This PR does two things to match some dplyr behaviour around column order: 1) Mimics dplyr implementation of `mutate(..., .keep = "none")` to append new columns after the existing columns (if suggested) as [per](tidyverse/dplyr#6086) 2) As per this [discussion](tidyverse/dplyr#6086), this required a bespoke approach to `transmute` as it not simply a wrapper for `mutate(..., .keep = "none")`. This cascades into needing to catch a couple edge cases. I have also added some tests which will test for this behaviour. Closes #12818 from boshek/mutate-keep Authored-by: SAm Albers <[email protected]> Signed-off-by: Jonathan Keane <[email protected]>
1 parent f0b5c49 commit b829943

File tree

4 files changed

+46
-8
lines changed

4 files changed

+46
-8
lines changed

r/NAMESPACE

+1
Original file line numberDiff line numberDiff line change
@@ -331,6 +331,7 @@ importFrom(rlang,":=")
331331
importFrom(rlang,.data)
332332
importFrom(rlang,abort)
333333
importFrom(rlang,as_function)
334+
importFrom(rlang,as_label)
334335
importFrom(rlang,as_quosure)
335336
importFrom(rlang,call2)
336337
importFrom(rlang,caller_env)

r/R/arrow-package.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@
2323
#' @importFrom rlang eval_tidy new_data_mask syms env new_environment env_bind set_names exec
2424
#' @importFrom rlang is_bare_character quo_get_expr quo_get_env quo_set_expr .data seq2 is_interactive
2525
#' @importFrom rlang expr caller_env is_character quo_name is_quosure enexpr enexprs as_quosure
26-
#' @importFrom rlang is_list call2 is_empty as_function
26+
#' @importFrom rlang is_list call2 is_empty as_function as_label
2727
#' @importFrom tidyselect vars_pull vars_rename vars_select eval_select
2828
#' @useDynLib arrow, .registration = TRUE
2929
#' @keywords internal

r/R/dplyr-mutate.R

+15-2
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,10 @@ mutate.arrow_dplyr_query <- function(.data,
9494

9595
# Respect .keep
9696
if (.keep == "none") {
97-
.data$selected_columns <- .data$selected_columns[new_vars]
97+
## for consistency with dplyr, this appends new columns after existing columns
98+
## by specifying the order
99+
new_cols_last <- c(intersect(old_vars, new_vars), setdiff(new_vars, old_vars))
100+
.data$selected_columns <- .data$selected_columns[new_cols_last]
98101
} else if (.keep != "all") {
99102
# "used" or "unused"
100103
used_vars <- unlist(lapply(exprs, all.vars), use.names = FALSE)
@@ -112,7 +115,17 @@ mutate.Dataset <- mutate.ArrowTabular <- mutate.RecordBatchReader <- mutate.arro
112115

113116
transmute.arrow_dplyr_query <- function(.data, ...) {
114117
dots <- check_transmute_args(...)
115-
dplyr::mutate(.data, !!!dots, .keep = "none")
118+
has_null <- map_lgl(dots, quo_is_null)
119+
.data <- dplyr::mutate(.data, !!!dots, .keep = "none")
120+
if (is_empty(dots) | any(has_null)) {
121+
return(.data)
122+
}
123+
124+
## keeping with: https://github.com/tidyverse/dplyr/issues/6086
125+
cur_exprs <- map_chr(dots, as_label)
126+
transmute_order <- names(cur_exprs)
127+
transmute_order[!nzchar(transmute_order)] <- cur_exprs[!nzchar(transmute_order)]
128+
dplyr::select(.data, all_of(transmute_order))
116129
}
117130
transmute.Dataset <- transmute.ArrowTabular <- transmute.RecordBatchReader <- transmute.arrow_dplyr_query
118131

r/tests/testthat/test-dplyr-mutate.R

+29-5
Original file line numberDiff line numberDiff line change
@@ -74,6 +74,16 @@ test_that("transmute", {
7474
)
7575
})
7676

77+
test_that("transmute respect bespoke dplyr implementation", {
78+
## see: https://github.com/tidyverse/dplyr/issues/6086
79+
compare_dplyr_binding(
80+
.input %>%
81+
transmute(dbl, int = int + 6L) %>%
82+
collect(),
83+
tbl
84+
)
85+
})
86+
7787
test_that("transmute() with NULL inputs", {
7888
compare_dplyr_binding(
7989
.input %>%
@@ -92,6 +102,20 @@ test_that("empty transmute()", {
92102
)
93103
})
94104

105+
test_that("transmute with unnamed expressions", {
106+
compare_dplyr_binding(
107+
.input %>%
108+
select(int, padded_strings) %>%
109+
transmute(
110+
int, # bare column name
111+
nchar(padded_strings) # expression
112+
) %>%
113+
filter(int > 5) %>%
114+
collect(),
115+
tbl
116+
)
117+
})
118+
95119
test_that("transmute() with unsupported arguments", {
96120
expect_error(
97121
tbl %>%
@@ -329,13 +353,13 @@ test_that("dplyr::mutate's examples", {
329353
#> <chr> <chr> <dbl>
330354
#> 1 a b 3
331355
compare_dplyr_binding(
332-
.input %>% mutate(z = x + y, .keep = "none") %>% collect(), # same as transmute()
356+
.input %>% mutate(z = x + y, x, .keep = "none") %>% collect(),
333357
df
334358
)
335-
#> # A tibble: 1 x 1
336-
#> z
337-
#> <dbl>
338-
#> 1 3
359+
#> # A tibble: 1 × 2
360+
#> x z
361+
#> <dbl> <dbl>
362+
#> 1 1 3
339363

340364
# Grouping ----------------------------------------
341365
# The mutate operation may yield different results on grouped

0 commit comments

Comments
 (0)