Skip to contents

Excluding irrelevant columns and duplicates, then pivot from long to wide

Usage

exclude_cols_then_pivot_wider(
  data,
  ...,
  exclude_cols = NULL,
  avoid_list_cols = FALSE
)

Arguments

data

A data frame to pivot.

...

Arguments passed to tidyr::pivot_wider().

exclude_cols

A character vector giving regular expressions matching column names to exclude. If lengh > 1, the union is taken.

avoid_list_cols

Logical. Avoid all list-columns, duplicates, and the associated warning?

Value

A data frame giving the result you get from tidyr::pivot_wider() if data lacks the excluded columns and the resulting duplicates.

Examples

library(tidyr, warn.conflicts = FALSE)
library(dplyr, warn.conflicts = FALSE)

# styler: off
data <- tribble(
  ~to_exclude,  ~id, ~name,  ~value,
            1, "id",   "a",       1,
            2, "id",   "a",       1,
            1, "id",   "b",       2,
            2, "id",   "b",       2,
)
# styler: on

# `exclude_cols_then_pivot_wider()` excludes columns and duplicates
data |> exclude_cols_then_pivot_wider(exclude_cols = "exclude")
#> # A tibble: 1 × 3
#>   id        a     b
#>   <chr> <dbl> <dbl>
#> 1 id        1     2

# Why is this useful?
# `pivot_wider()` defaults to using all columns
data |> pivot_wider()
#> # A tibble: 2 × 4
#>   to_exclude id        a     b
#>        <dbl> <chr> <dbl> <dbl>
#> 1          1 id        1     2
#> 2          2 id        1     2

# You may specify relevant columns but the result may still have duplicates
data |>
  pivot_wider(id_cols = id, names_from = "name", values_from = "value") |>
  unnest(c(a, b))
#> Warning: Values from `value` are not uniquely identified; output will contain list-cols.
#>  Use `values_fn = list` to suppress this warning.
#>  Use `values_fn = {summary_fun}` to summarise duplicates.
#>  Use the following dplyr code to identify duplicates.
#>   {data} |>
#>   dplyr::summarise(n = dplyr::n(), .by = c(id, name)) |>
#>   dplyr::filter(n > 1L)
#> # A tibble: 2 × 3
#>   id        a     b
#>   <chr> <dbl> <dbl>
#> 1 id        1     2
#> 2 id        1     2

# styler: off
data <- tribble(
   ~id, ~name, ~value, ~to_exclude, ~yields_duplicates,
  "id",   "a",      1,           1,                  1,
  "id",   "a",      1,           2,                  2
)
# styler: on

# `data` may have columns that yield duplicates and thus list-columns
with_list_cols <- exclude_cols_then_pivot_wider(
  data,
  exclude_cols = "to_exclude",
  id_cols = "id"
)
#> Warning: Values from `value` are not uniquely identified; output will contain list-cols.
#>  Use `values_fn = list` to suppress this warning.
#>  Use `values_fn = {summary_fun}` to summarise duplicates.
#>  Use the following dplyr code to identify duplicates.
#>   {data} |>
#>   dplyr::summarise(n = dplyr::n(), .by = c(id, name)) |>
#>   dplyr::filter(n > 1L)
# You can handle it after the fact
with_list_cols |>
  tidyr::unnest(everything()) |>
  distinct()
#> # A tibble: 1 × 2
#>   id        a
#>   <chr> <dbl>
#> 1 id        1

# But also you can avoid it with `avoid_list_cols = TRUE`
exclude_cols_then_pivot_wider(
  data,
  exclude_cols = "to_exclude",
  id_cols = "id",
  avoid_list_cols = TRUE
)
#> # A tibble: 1 × 2
#>   id        a
#>   <chr> <dbl>
#> 1 id        1