Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions r/R/dataset-write.R
Original file line number Diff line number Diff line change
Expand Up @@ -105,8 +105,8 @@
#' # output directory will be different.
#' library(dplyr)
#' two_levels_tree_2 <- tempfile()
#' mtcars %>%
#' group_by(cyl, gear) %>%
#' mtcars |>
#' group_by(cyl, gear) |>
#' write_dataset(two_levels_tree_2)
#' list.files(two_levels_tree_2, recursive = TRUE)
#'
Expand All @@ -115,8 +115,8 @@
#'
#' # Write a structure X/Y/part-Z.parquet.
#' two_levels_tree_no_hive <- tempfile()
#' mtcars %>%
#' group_by(cyl, gear) %>%
#' mtcars |>
#' group_by(cyl, gear) |>
#' write_dataset(two_levels_tree_no_hive, hive_style = FALSE)
#' list.files(two_levels_tree_no_hive, recursive = TRUE)
#' @export
Expand Down
8 changes: 4 additions & 4 deletions r/R/dplyr-funcs-augmented.R
Original file line number Diff line number Diff line change
Expand Up @@ -28,16 +28,16 @@
#' augmented column.
#'
#' @examples \dontrun{
#' open_dataset("nyc-taxi") %>% mutate(
#' open_dataset("nyc-taxi") |> mutate(
#' file =
#' add_filename()
#' )
#'
#' # To use a verb like mutate() with add_filename() we need to first call
#' # compute()
#' open_dataset("nyc-taxi") %>%
#' mutate(file = add_filename()) %>%
#' compute() %>%
#' open_dataset("nyc-taxi") |>
#' mutate(file = add_filename()) |>
#' compute() |>
#' mutate(filename_length = nchar(file))
#' }
#'
Expand Down
4 changes: 2 additions & 2 deletions r/R/dplyr-funcs-type.R
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@ register_bindings_type <- function() {
#'
#' @examples
#' \dontrun{
#' mtcars %>%
#' arrow_table() %>%
#' mtcars |>
#' arrow_table() |>
#' mutate(cyl = cast(cyl, string()))
#' }
#' @keywords internal
Expand Down
6 changes: 3 additions & 3 deletions r/R/dplyr-summarize.R
Original file line number Diff line number Diff line change
Expand Up @@ -57,8 +57,8 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
# and aggregations, but that's not how Acero works. For example, for us to do
# summarize(mean = sum(x) / n())
# we basically have to translate it into
# summarize(..temp0 = sum(x), ..temp1 = n()) %>%
# mutate(mean = ..temp0 / ..temp1) %>%
# summarize(..temp0 = sum(x), ..temp1 = n()) |>
# mutate(mean = ..temp0 / ..temp1) |>
# select(-starts_with("..temp"))
# That is, "first aggregate, then transform the result further."
#
Expand Down Expand Up @@ -97,7 +97,7 @@ do_arrow_summarize <- function(.data, ..., .groups = NULL) {
# One last check: it's possible that an expression like y - mean(y) would
# successfully evaluate, but it's not supported. It gets transformed to:
# nolint start
# summarize(..temp0 = mean(y)) %>%
# summarize(..temp0 = mean(y)) |>
# mutate(y - ..temp0)
# nolint end
# but y is not in the schema of the data after summarize(). To catch this
Expand Down
8 changes: 4 additions & 4 deletions r/R/dplyr.R
Original file line number Diff line number Diff line change
Expand Up @@ -279,10 +279,10 @@ tail.arrow_dplyr_query <- function(x, n = 6L, ...) {
#'
#' @examplesIf arrow_with_dataset() && requireNamespace("dplyr", quietly = TRUE)
#' library(dplyr)
#' mtcars %>%
#' arrow_table() %>%
#' filter(mpg > 20) %>%
#' mutate(x = gear / carb) %>%
#' mtcars |>
#' arrow_table() |>
#' filter(mpg > 20) |>
#' mutate(x = gear / carb) |>
#' show_exec_plan()
show_exec_plan <- function(x) {
result <- as_record_batch_reader(as_adq(x))
Expand Down
20 changes: 10 additions & 10 deletions r/R/duckdb.R
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,10 @@
#'
#' ds <- InMemoryDataset$create(mtcars)
#'
#' ds %>%
#' filter(mpg < 30) %>%
#' group_by(cyl) %>%
#' to_duckdb() %>%
#' ds |>
#' filter(mpg < 30) |>
#' group_by(cyl) |>
#' to_duckdb() |>
#' slice_min(disp)
to_duckdb <- function(.data,
con = arrow_duck_connection(),
Expand Down Expand Up @@ -154,12 +154,12 @@ duckdb_disconnector <- function(con, tbl_name) {
#'
#' ds <- InMemoryDataset$create(mtcars)
#'
#' ds %>%
#' filter(mpg < 30) %>%
#' to_duckdb() %>%
#' group_by(cyl) %>%
#' summarize(mean_mpg = mean(mpg, na.rm = TRUE)) %>%
#' to_arrow() %>%
#' ds |>
#' filter(mpg < 30) |>
#' to_duckdb() |>
#' group_by(cyl) |>
#' summarize(mean_mpg = mean(mpg, na.rm = TRUE)) |>
#' to_arrow() |>
#' collect()
to_arrow <- function(.data) {
# If this is an Arrow object already, return quickly since we're already Arrow
Expand Down
4 changes: 2 additions & 2 deletions r/R/type.R
Original file line number Diff line number Diff line change
Expand Up @@ -408,11 +408,11 @@ NestedType <- R6Class("NestedType", inherit = DataType)
#' # You can also use `cast()` in an Arrow dplyr query.
#' if (requireNamespace("dplyr", quietly = TRUE)) {
#' library(dplyr, warn.conflicts = FALSE)
#' arrow_table(mtcars) %>%
#' arrow_table(mtcars) |>
#' transmute(
#' col1 = cast(cyl, string()),
#' col2 = cast(cyl, int8())
#' ) %>%
#' ) |>
#' compute()
#' }
int8 <- function() Int8__initialize()
Expand Down
6 changes: 3 additions & 3 deletions r/R/udf.R
Original file line number Diff line number Diff line change
Expand Up @@ -66,9 +66,9 @@
#' auto_convert = TRUE
#' )
#'
#' as_arrow_table(mtcars) %>%
#' transmute(mpg, mpg_predicted = mtcars_predict_mpg(disp, cyl)) %>%
#' collect() %>%
#' as_arrow_table(mtcars) |>
#' transmute(mpg, mpg_predicted = mtcars_predict_mpg(disp, cyl)) |>
#' collect() |>
#' head()
#'
register_scalar_function <- function(name, fun, in_type, out_type,
Expand Down
28 changes: 14 additions & 14 deletions r/data-raw/codegen.R
Original file line number Diff line number Diff line change
Expand Up @@ -41,13 +41,13 @@ suppressPackageStartupMessages({
})

get_exported_functions <- function(decorations, export_tag) {
out <- decorations %>%
filter(decoration %in% paste0(export_tag, "::export")) %>%
mutate(functions = map(context, decor:::parse_cpp_function)) %>%
out <- decorations |>
filter(decoration %in% paste0(export_tag, "::export")) |>
mutate(functions = map(context, decor:::parse_cpp_function)) |>
{
vec_cbind(., vec_rbind(!!!pull(., functions)))
} %>%
select(-functions) %>%
} |>
select(-functions) |>
mutate(decoration = sub("::export", "", decoration))
message(glue("*** > {n} functions decorated with [[{tags}::export]]", n = nrow(out), tags = paste0(export_tag, collapse = "|")))
out
Expand Down Expand Up @@ -123,8 +123,8 @@ extern "C" SEXP {sexp_signature}{{
#endif\n\n')
}

cpp_functions_definitions <- arrow_exports %>%
select(name, return_type, args, file, line, decoration) %>%
cpp_functions_definitions <- arrow_exports |>
select(name, return_type, args, file, line, decoration) |>
pmap_chr(function(name, return_type, args, file, line, decoration) {
sexp_params <- glue_collapse_data(args, "SEXP {name}_sexp")
sexp_signature <- glue("_arrow_{name}({sexp_params})")
Expand All @@ -147,14 +147,14 @@ cpp_functions_definitions <- arrow_exports %>%
",
sep = "\n"
)
}) %>%
}) |>
glue_collapse(sep = "\n")

cpp_functions_registration <- arrow_exports %>%
select(name, return_type, args) %>%
cpp_functions_registration <- arrow_exports |>
select(name, return_type, args) |>
pmap_chr(function(name, return_type, args) {
glue('\t\t{{ "_arrow_{name}", (DL_FUNC) &_arrow_{name}, {nrow(args)}}}, ')
}) %>%
}) |>
glue_collapse(sep = "\n")

cpp_file_header <- '// Generated by using data-raw/codegen.R -> do not edit by hand
Expand Down Expand Up @@ -198,8 +198,8 @@ static const R_CallMethodDef CallEntries[] = {

write_if_modified(arrow_exports_cpp, "src/arrowExports.cpp")

r_functions <- arrow_exports %>%
select(name, return_type, args) %>%
r_functions <- arrow_exports |>
select(name, return_type, args) |>
pmap_chr(function(name, return_type, args) {
params <- if (nrow(args)) {
paste0(", ", glue_collapse_data(args, "{name}"))
Expand All @@ -220,7 +220,7 @@ r_functions <- arrow_exports %>%
list_params = glue_collapse_data(args, "{name}"),
sep = "\n"
)
}) %>%
}) |>
glue_collapse(sep = "\n")

arrow_exports_r <- glue::glue("
Expand Down
16 changes: 8 additions & 8 deletions r/data-raw/docgen.R
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,8 @@ render_fun <- function(fun, pkg_fun, notes) {

# This renders a bulleted list under a package heading
render_pkg <- function(df, pkg) {
bullets <- df %>%
transmute(render_fun(fun, pkg_fun, notes)) %>%
bullets <- df |>
transmute(render_fun(fun, pkg_fun, notes)) |>
pull()
header <- paste0("## ", pkg, "\n#'")
# Some packages have global notes to include
Expand Down Expand Up @@ -158,7 +158,7 @@ docs <- c(docs, setNames(rep(list(NULL), length(tidyselect)), tidyselect))
fun_df <- tibble::tibble(
pkg_fun = names(docs),
notes = docs
) %>%
) |>
mutate(
has_pkg = grepl("::", pkg_fun),
fun = sub("^.*?:{+}", "", pkg_fun),
Expand All @@ -167,7 +167,7 @@ fun_df <- tibble::tibble(
pkg = if_else(has_pkg, pkg, "base"),
# Flatten notes to a single string
notes = map_chr(notes, ~ paste(., collapse = "\n#' "))
) %>%
) |>
arrange(pkg, fun)

# Group by package name and render the lists
Expand All @@ -182,13 +182,13 @@ dplyr_verbs <- c(
verb_bullets <- tibble::tibble(
fun = names(dplyr_verbs),
notes = dplyr_verbs
) %>%
) |>
mutate(
pkg_fun = paste0("dplyr::", fun),
notes = map_chr(notes, ~ paste(., collapse = " "))
) %>%
arrange(fun) %>%
transmute(render_fun(fun, pkg_fun, notes)) %>%
) |>
arrange(fun) |>
transmute(render_fun(fun, pkg_fun, notes)) |>
pull()

writeLines(
Expand Down
2 changes: 1 addition & 1 deletion r/tests/testthat/_snaps/dataset-dplyr.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# dplyr method not implemented messages

Code
ds %>% filter(int > 6, dbl > max(dbl))
filter(ds, int > 6, dbl > max(dbl))
Condition
Error in `dbl > max(dbl)`:
! Expression not supported in filter() in Arrow
Expand Down
2 changes: 1 addition & 1 deletion r/tests/testthat/_snaps/dplyr-across.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# expand_across correctly expands quosures

Code
InMemoryDataset$create(example_data) %>% mutate(across(c(dbl, dbl2), round,
mutate(InMemoryDataset$create(example_data), across(c(dbl, dbl2), round,
digits = -1))
Condition
Error in `mutate.Dataset()`:
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/_snaps/dplyr-funcs-datetime.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# `as.Date()` and `as_date()`

Code
test_df %>% InMemoryDataset$create() %>% transmute(date_char_ymd = as.Date(
character_ymd_var, tryFormats = c("%Y-%m-%d", "%Y/%m/%d"))) %>% collect()
collect(transmute(InMemoryDataset$create(test_df), date_char_ymd = as.Date(
character_ymd_var, tryFormats = c("%Y-%m-%d", "%Y/%m/%d"))))
Condition
Error in `as.Date()`:
! `as.Date()` with multiple `tryFormats` not supported in Arrow
Expand Down
9 changes: 4 additions & 5 deletions r/tests/testthat/_snaps/dplyr-glimpse.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@
# glimpse() calls print() instead of showing data for RBR

Code
example_data %>% as_record_batch_reader() %>% glimpse()
glimpse(as_record_batch_reader(example_data))
Message
Cannot glimpse() data from a RecordBatchReader because it can only be read one time; call `as_arrow_table()` to consume it first.
Output
Expand All @@ -103,7 +103,7 @@
---

Code
example_data %>% as_record_batch_reader() %>% select(int) %>% glimpse()
glimpse(select(as_record_batch_reader(example_data), int))
Message
Cannot glimpse() data from a RecordBatchReader because it can only be read one time. Call `compute()` to evaluate the query first.
Output
Expand Down Expand Up @@ -131,7 +131,7 @@
# glimpse() on Dataset query only shows data for streaming eval

Code
ds %>% summarize(max(int)) %>% glimpse()
glimpse(summarize(ds, max(int)))
Message
This query requires a full table scan, so glimpse() may be expensive. Call `compute()` to evaluate the query first.
Output
Expand All @@ -143,8 +143,7 @@
# glimpse() on in-memory query shows data even if aggregating

Code
example_data %>% arrow_table() %>% summarize(sum(int, na.rm = TRUE)) %>%
glimpse()
glimpse(summarize(arrow_table(example_data), sum(int, na.rm = TRUE)))
Output
Table (query)
?? rows x 1 columns
Expand Down
4 changes: 2 additions & 2 deletions r/tests/testthat/_snaps/dplyr-mutate.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# transmute() defuses dots arguments (ARROW-13262)

Code
tbl %>% Table$create() %>% transmute(a = stringr::str_c(padded_strings,
padded_strings), b = stringr::str_squish(a)) %>% collect()
collect(transmute(Table$create(tbl), a = stringr::str_c(padded_strings,
padded_strings), b = stringr::str_squish(a)))
Condition
Warning:
In stringr::str_squish(a):
Expand Down
8 changes: 4 additions & 4 deletions r/tests/testthat/_snaps/dplyr-summarize.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Functions that take ... but we only accept a single arg

Code
InMemoryDataset$create(tbl) %>% summarize(distinct = n_distinct())
summarize(InMemoryDataset$create(tbl), distinct = n_distinct())
Condition
Error in `n_distinct()`:
! n_distinct() with 0 arguments not supported in Arrow
Expand All @@ -16,7 +16,7 @@
# Expressions on aggregations

Code
record_batch(tbl) %>% summarise(any(any(lgl)))
summarise(record_batch(tbl), any(any(lgl)))
Condition
Warning:
In any(any(lgl)):
Expand All @@ -31,8 +31,8 @@
# Can use across() within summarise()

Code
data.frame(x = 1, y = 2) %>% arrow_table() %>% group_by(x) %>% summarise(across(
everything())) %>% collect()
collect(summarise(group_by(arrow_table(data.frame(x = 1, y = 2)), x), across(
everything())))
Condition
Warning:
In y:
Expand Down
Loading
Loading