Combined

filter(
  flights,
  arr_time >= 2200,
  carrier == "UA",
  arr_delay > 120,
  dest != "HNL"
)
## # A tibble: 330 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     2     2131           1512       379     2340
##  2  2013     1     9     1906           1727        99     2246
##  3  2013     1    13     2021           1730       171     2328
##  4  2013     1    23     1947           1629       198     2247
##  5  2013     1    24     2016           1727       169     2342
##  6  2013     1    25     2012           1900        72     2302
##  7  2013     1    27     2025           1810       135     2203
##  8  2013     1    30     1923           1529       234     2216
##  9  2013     1    30     2039           1730       189     2340
## 10  2013     1    31     1809           1629       100     2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Alternative variant:

filter(
  flights,
  arr_time >= 2200 &
    carrier == "UA" &
    arr_delay > 120 &
    dest != "HNL"
)
## # A tibble: 330 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     2     2131           1512       379     2340
##  2  2013     1     9     1906           1727        99     2246
##  3  2013     1    13     2021           1730       171     2328
##  4  2013     1    23     1947           1629       198     2247
##  5  2013     1    24     2016           1727       169     2342
##  6  2013     1    25     2012           1900        72     2302
##  7  2013     1    27     2025           1810       135     2203
##  8  2013     1    30     1923           1529       234     2216
##  9  2013     1    30     2039           1730       189     2340
## 10  2013     1    31     1809           1629       100     2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Intermediate variables

Naming is hard!

late_flights <- filter(flights, arr_time >= 2200)
late_ua_flights <- filter(late_flights, carrier == "UA")
late_late_ua_flights <- filter(late_ua_flights, arr_delay > 120)
late_late_ua_flights_not_honolulu <- filter(late_late_ua_flights, dest != "HNL")
late_late_ua_flights_not_honolulu
## # A tibble: 330 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     2     2131           1512       379     2340
##  2  2013     1     9     1906           1727        99     2246
##  3  2013     1    13     2021           1730       171     2328
##  4  2013     1    23     1947           1629       198     2247
##  5  2013     1    24     2016           1727       169     2342
##  6  2013     1    25     2012           1900        72     2302
##  7  2013     1    27     2025           1810       135     2203
##  8  2013     1    30     1923           1529       234     2216
##  9  2013     1    30     2039           1730       189     2340
## 10  2013     1    31     1809           1629       100     2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Nested expressions

Difficult to read.

filter(
  filter(
    filter(
      filter(
        flights,
        arr_time >= 2200
      ),
      carrier == "UA"
    ),
    arr_delay > 120
  ),
  dest != "HNL"
)
## # A tibble: 330 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     2     2131           1512       379     2340
##  2  2013     1     9     1906           1727        99     2246
##  3  2013     1    13     2021           1730       171     2328
##  4  2013     1    23     1947           1629       198     2247
##  5  2013     1    24     2016           1727       169     2342
##  6  2013     1    25     2012           1900        72     2302
##  7  2013     1    27     2025           1810       135     2203
##  8  2013     1    30     1923           1529       234     2216
##  9  2013     1    30     2039           1730       189     2340
## 10  2013     1    31     1809           1629       100     2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Pipe

flights %>% 
  filter(arr_time >= 2200) %>% 
  filter(carrier == "UA") %>% 
  filter(arr_delay > 120) %>% 
  filter(dest != "HNL")
## # A tibble: 330 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     2     2131           1512       379     2340
##  2  2013     1     9     1906           1727        99     2246
##  3  2013     1    13     2021           1730       171     2328
##  4  2013     1    23     1947           1629       198     2247
##  5  2013     1    24     2016           1727       169     2342
##  6  2013     1    25     2012           1900        72     2302
##  7  2013     1    27     2025           1810       135     2203
##  8  2013     1    30     1923           1529       234     2216
##  9  2013     1    30     2039           1730       189     2340
## 10  2013     1    31     1809           1629       100     2220
## # ... with 320 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Two options for assigning the result of a pipe to a variable:

late_late_ua_flights_not_honolulu <-
  flights %>% 
  filter(arr_time >= 2200) %>% 
  filter(carrier == "UA") %>% 
  filter(arr_delay > 120) %>% 
  filter(dest != "HNL")
flights %>% 
  filter(arr_time >= 2200) %>% 
  filter(carrier == "UA") %>% 
  filter(arr_delay > 120) %>% 
  filter(dest != "HNL") ->
  late_late_ua_flights_not_honolulu

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.