filter(
flights,
arr_time >= 2200,
carrier == "UA",
arr_delay > 120,
dest != "HNL"
)
## # A tibble: 330 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 2 2131 1512 379 2340
## 2 2013 1 9 1906 1727 99 2246
## 3 2013 1 13 2021 1730 171 2328
## 4 2013 1 23 1947 1629 198 2247
## 5 2013 1 24 2016 1727 169 2342
## 6 2013 1 25 2012 1900 72 2302
## 7 2013 1 27 2025 1810 135 2203
## 8 2013 1 30 1923 1529 234 2216
## 9 2013 1 30 2039 1730 189 2340
## 10 2013 1 31 1809 1629 100 2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Alternative variant:
filter(
flights,
arr_time >= 2200 &
carrier == "UA" &
arr_delay > 120 &
dest != "HNL"
)
## # A tibble: 330 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 2 2131 1512 379 2340
## 2 2013 1 9 1906 1727 99 2246
## 3 2013 1 13 2021 1730 171 2328
## 4 2013 1 23 1947 1629 198 2247
## 5 2013 1 24 2016 1727 169 2342
## 6 2013 1 25 2012 1900 72 2302
## 7 2013 1 27 2025 1810 135 2203
## 8 2013 1 30 1923 1529 234 2216
## 9 2013 1 30 2039 1730 189 2340
## 10 2013 1 31 1809 1629 100 2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Naming is hard!
late_flights <- filter(flights, arr_time >= 2200)
late_ua_flights <- filter(late_flights, carrier == "UA")
late_late_ua_flights <- filter(late_ua_flights, arr_delay > 120)
late_late_ua_flights_not_honolulu <- filter(late_late_ua_flights, dest != "HNL")
late_late_ua_flights_not_honolulu
## # A tibble: 330 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 2 2131 1512 379 2340
## 2 2013 1 9 1906 1727 99 2246
## 3 2013 1 13 2021 1730 171 2328
## 4 2013 1 23 1947 1629 198 2247
## 5 2013 1 24 2016 1727 169 2342
## 6 2013 1 25 2012 1900 72 2302
## 7 2013 1 27 2025 1810 135 2203
## 8 2013 1 30 1923 1529 234 2216
## 9 2013 1 30 2039 1730 189 2340
## 10 2013 1 31 1809 1629 100 2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Difficult to read.
filter(
filter(
filter(
filter(
flights,
arr_time >= 2200
),
carrier == "UA"
),
arr_delay > 120
),
dest != "HNL"
)
## # A tibble: 330 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 2 2131 1512 379 2340
## 2 2013 1 9 1906 1727 99 2246
## 3 2013 1 13 2021 1730 171 2328
## 4 2013 1 23 1947 1629 198 2247
## 5 2013 1 24 2016 1727 169 2342
## 6 2013 1 25 2012 1900 72 2302
## 7 2013 1 27 2025 1810 135 2203
## 8 2013 1 30 1923 1529 234 2216
## 9 2013 1 30 2039 1730 189 2340
## 10 2013 1 31 1809 1629 100 2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL")
## # A tibble: 330 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 2 2131 1512 379 2340
## 2 2013 1 9 1906 1727 99 2246
## 3 2013 1 13 2021 1730 171 2328
## 4 2013 1 23 1947 1629 198 2247
## 5 2013 1 24 2016 1727 169 2342
## 6 2013 1 25 2012 1900 72 2302
## 7 2013 1 27 2025 1810 135 2203
## 8 2013 1 30 1923 1529 234 2216
## 9 2013 1 30 2039 1730 189 2340
## 10 2013 1 31 1809 1629 100 2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Two options for assigning the result of a pipe to a variable:
late_late_ua_flights_not_honolulu <-
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL")
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL") ->
late_late_ua_flights_not_honolulu
Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.