dplyr::select
dplyr::select
は、tidyverse コレクションの dplyr パッケージに含まれる関数で、データフレームから指定した列を選択するために使用される。
列を選択する様々な方法を利用できるため、柔軟に列の選択をすることができる。
クイックリファレンス
library(tidyverse)
df %>%
select(col1, col2, ...)
penguins %>%
select(species, starts_with("bill"))
# # A tibble: 344 × 3
# species bill_length_mm bill_depth_mm
# <fct> <dbl> <dbl>
# 1 Adelie 39.1 18.7
# 2 Adelie 39.5 17.4
# 3 Adelie 40.3 18
# 4 Adelie NA NA
# 5 Adelie 36.7 19.3
# 6 Adelie 39.3 20.6
# 7 Adelie 38.9 17.8
# 8 Adelie 39.2 19.6
# 9 Adelie 34.1 18.1
# 10 Adelie 42 20.2
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
基本構文
select(.data, ...,)
引数 | 説明 |
---|---|
.data | データフレーム(または tibble)。 |
... | 選択する列名や列番号。<tidy-select> も指定可能。 |
使用例
1. 指定した列を選択
penguins %>%
select(species, island)
# # A tibble: 344 × 2
# species island
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
列名を変更して選択することも出来る。
penguins %>%
select(SPECIES = species, ISLAND = island)
# # A tibble: 344 × 2
# SPECIES ISLAND
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
2. 指定した列以外を選択
penguins %>%
select(!species, !island)
# # A tibble: 344 × 5
# bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
# <dbl> <dbl> <int> <int> <fct>
# 1 39.1 18.7 181 3750 male
# 2 39.5 17.4 186 3800 female
# 3 40.3 18 195 3250 female
# 4 NA NA NA NA NA
# 5 36.7 19.3 193 3450 female
# 6 39.3 20.6 190 3650 male
# 7 38.9 17.8 181 3625 female
# 8 39.2 19.6 195 4675 male
# 9 34.1 18.1 193 3475 NA
# 10 42 20.2 190 4250 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
3. 連続する列を範囲選択
penguins %>%
select(1:3)
# # A tibble: 344 × 3
# species island bill_length_mm
# <fct> <fct> <dbl>
# 1 Adelie Torgersen 39.1
# 2 Adelie Torgersen 39.5
# 3 Adelie Torgersen 40.3
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen 36.7
# 6 Adelie Torgersen 39.3
# 7 Adelie Torgersen 38.9
# 8 Adelie Torgersen 39.2
# 9 Adelie Torgersen 34.1
# 10 Adelie Torgersen 42
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(species:bill_length_mm)
# # A tibble: 344 × 3
# species island bill_length_mm
# <fct> <fct> <dbl>
# 1 Adelie Torgersen 39.1
# 2 Adelie Torgersen 39.5
# 3 Adelie Torgersen 40.3
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen 36.7
# 6 Adelie Torgersen 39.3
# 7 Adelie Torgersen 38.9
# 8 Adelie Torgersen 39.2
# 9 Adelie Torgersen 34.1
# 10 Adelie Torgersen 42
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
4. 全ての列を選択
tidyselect::everything
を使用すると、全ての列を選択できる。
penguins %>%
select(everything())
# # A tibble: 344 × 7
# species island bill_length_mm bill_depth_mm flipper_length_mm body_mass_g sex
# <fct> <fct> <dbl> <dbl> <int> <int> <fct>
# 1 Adelie Torgersen 39.1 18.7 181 3750 male
# 2 Adelie Torgersen 39.5 17.4 186 3800 female
# 3 Adelie Torgersen 40.3 18 195 3250 female
# 4 Adelie Torgersen NA NA NA NA NA
# 5 Adelie Torgersen 36.7 19.3 193 3450 female
# 6 Adelie Torgersen 39.3 20.6 190 3650 male
# 7 Adelie Torgersen 38.9 17.8 181 3625 female
# 8 Adelie Torgersen 39.2 19.6 195 4675 male
# 9 Adelie Torgersen 34.1 18.1 193 3475 NA
# 10 Adelie Torgersen 42 20.2 190 4250 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
5. 最後の列を選択
tidyselect::last_col
を使用すると、最後の列を選択する。
penguins %>%
select(last_col())
# # A tibble: 344 × 1
# sex
# <fct>
# 1 male
# 2 female
# 3 female
# 4 NA
# 5 female
# 6 male
# 7 female
# 8 male
# 9 NA
# 10 NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
offset
パラメータ に、数値 n
を指定した場合、最後から n
番目の列が選択される。
penguins %>%
select(last_col(offset = 2))
# # A tibble: 344 × 1
# flipper_length_mm
# <int>
# 1 181
# 2 186
# 3 195
# 4 NA
# 5 193
# 6 190
# 7 181
# 8 195
# 9 193
# 10 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
6. グループ化した列を選択
tidyselect::group_cols
を使用すると、グループ化された列を選択する。
penguins %>%
group_by(species, island) %>%
select(group_cols())
# # A tibble: 344 × 2
# # Groups: species, island [5]
# species island
# <fct> <fct>
# 1 Adelie Torgersen
# 2 Adelie Torgersen
# 3 Adelie Torgersen
# 4 Adelie Torgersen
# 5 Adelie Torgersen
# 6 Adelie Torgersen
# 7 Adelie Torgersen
# 8 Adelie Torgersen
# 9 Adelie Torgersen
# 10 Adelie Torgersen
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
7. パターンに一致する列を選択
ヘルパー関数 | パターン |
---|---|
tidyselect::starts_with | 前方一致 |
tidyselect::ends_with | 後方一致 |
tidyselect::contains | 部分一致 |
tidyselect::matches | 正規表現と一致(貪欲マッチ) |
tidyselect::num_range | 数値範囲と一致 |
penguins %>%
select(starts_with("bill"))
# # A tibble: 344 × 2
# bill_length_mm bill_depth_mm
# <dbl> <dbl>
# 1 39.1 18.7
# 2 39.5 17.4
# 3 40.3 18
# 4 NA NA
# 5 36.7 19.3
# 6 39.3 20.6
# 7 38.9 17.8
# 8 39.2 19.6
# 9 34.1 18.1
# 10 42 20.2
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(ends_with("_mm"))
# # A tibble: 344 × 3
# bill_length_mm bill_depth_mm flipper_length_mm
# <dbl> <dbl> <int>
# 1 39.1 18.7 181
# 2 39.5 17.4 186
# 3 40.3 18 195
# 4 NA NA NA
# 5 36.7 19.3 193
# 6 39.3 20.6 190
# 7 38.9 17.8 181
# 8 39.2 19.6 195
# 9 34.1 18.1 193
# 10 42 20.2 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(contains("length"))
# # A tibble: 344 × 2
# bill_length_mm flipper_length_mm
# <dbl> <int>
# 1 39.1 181
# 2 39.5 186
# 3 40.3 195
# 4 NA NA
# 5 36.7 193
# 6 39.3 190
# 7 38.9 181
# 8 39.2 195
# 9 34.1 193
# 10 42 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(matches(r"(^\w{3,7}$)"))
# # A tibble: 344 × 3
# species island sex
# <fct> <fct> <fct>
# 1 Adelie Torgersen male
# 2 Adelie Torgersen female
# 3 Adelie Torgersen female
# 4 Adelie Torgersen NA
# 5 Adelie Torgersen female
# 6 Adelie Torgersen male
# 7 Adelie Torgersen female
# 8 Adelie Torgersen male
# 9 Adelie Torgersen NA
# 10 Adelie Torgersen NA
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
billboard %>%
select(num_range("wk", 10:15))
# # A tibble: 317 × 6
# wk10 wk11 wk12 wk13 wk14 wk15
# <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
# 1 NA NA NA NA NA NA
# 2 NA NA NA NA NA NA
# 3 51 51 51 47 44 38
# 4 61 61 59 61 66 72
# 5 57 64 70 75 76 78
# 6 6 7 22 29 36 47
# 7 NA NA NA NA NA NA
# 8 36 37 37 38 49 61
# 9 10 9 8 6 1 2
# 10 59 66 68 61 67 59
# # ℹ 307 more rows
# # ℹ Use `print(n = ...)` to see more rows
8. 文字ベクトルから列を選択
ヘルパー関数 | 説明 |
---|---|
tidyselect::all_of | 文字ベクトル内の列の欠落を許可しない。 |
tidyselect::any_of | 文字ベクトル内の列の欠落を許可する。 |
vars <- c("flipper_length_mm", "flipper_depth_mm")
penguins %>%
select(all_of(vars))
# Error in `all_of()`:
# ! Can't subset columns that don't exist.
# ✖ Column `flipper_depth_mm` doesn't exist.
# Run `rlang::last_trace()` to see where the error occurred.
vars <- c("flipper_length_mm", "flipper_depth_mm")
penguins %>%
select(any_of(vars))
# # A tibble: 344 × 1
# flipper_length_mm
# <int>
# 1 181
# 2 186s
# 3 195
# 4 NA
# 5 193
# 6 190
# 7 181
# 8 195
# 9 193
# 10 190
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
9. 関数で列を選択
tidyselect::where
を使用すると、関数(または、purrr-like
関数)が TRUE
を返す列を選択する。
penguins %>%
select(where(is.numeric))
# # A tibble: 344 × 4
# bill_length_mm bill_depth_mm flipper_length_mm body_mass_g
# <dbl> <dbl> <int> <int>
# 1 39.1 18.7 181 3750
# 2 39.5 17.4 186 3800
# 3 40.3 18 195 3250
# 4 NA NA NA NA
# 5 36.7 19.3 193 3450
# 6 39.3 20.6 190 3650
# 7 38.9 17.8 181 3625
# 8 39.2 19.6 195 4675
# 9 34.1 18.1 193 3475
# 10 42 20.2 190 4250
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows
penguins %>%
select(where(~ is.numeric(.x) && mean(.x, na.rm = TRUE) > 100))
# # A tibble: 344 × 2
# flipper_length_mm body_mass_g
# <int> <int>
# 1 181 3750
# 2 186 3800
# 3 195 3250
# 4 NA NA
# 5 193 3450
# 6 190 3650
# 7 181 3625
# 8 195 4675
# 9 193 3475
# 10 190 4250
# # ℹ 334 more rows
# # ℹ Use `print(n = ...)` to see more rows