All flights on this day x years ago

Be careful with the equality operator ==:

flights %>% 
  filter(month = 6, day = 2)
## Error: `month` (`month = 6`), `day` (`day = 2`) must not be named, do you need `==`?
flights %>% 
  filter(month == 6, day == 2)
## # A tibble: 911 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     6     2       14           2359        15      339
##  2  2013     6     2       20           2155       145      222
##  3  2013     6     2       24           2245        99      133
##  4  2013     6     2       33           2059       214      150
##  5  2013     6     2       35           2130       185      332
##  6  2013     6     2       36           1914       322      223
##  7  2013     6     2       44           2359        45      420
##  8  2013     6     2      128           2159       209      325
##  9  2013     6     2      131           2146       225      229
## 10  2013     6     2      219           2055       324      322
## # ... with 901 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Can we make a dynamic query?

flights %>% 
  filter(
    month == lubridate::month(Sys.Date()),
    day == lubridate::day(Sys.Date())
  )
## # A tibble: 982 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     5    16        8           2127       161      159
##  2  2013     5    16       13           2145       148      255
##  3  2013     5    16       58           2359        59      426
##  4  2013     5    16      456            500        -4      629
##  5  2013     5    16      506            515        -9      745
##  6  2013     5    16      537            545        -8      839
##  7  2013     5    16      538            540        -2      833
##  8  2013     5    16      548            602       -14      653
##  9  2013     5    16      548            600       -12      832
## 10  2013     5    16      551            600        -9      814
## # ... with 972 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Can we use arguments?

month_ <- lubridate::month(Sys.Date())
day_ <- lubridate::day(Sys.Date())
flights %>% 
  filter(
    month == month_,
    day == day_
  )
## # A tibble: 982 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     5    16        8           2127       161      159
##  2  2013     5    16       13           2145       148      255
##  3  2013     5    16       58           2359        59      426
##  4  2013     5    16      456            500        -4      629
##  5  2013     5    16      506            515        -9      745
##  6  2013     5    16      537            545        -8      839
##  7  2013     5    16      538            540        -2      833
##  8  2013     5    16      548            602       -14      653
##  9  2013     5    16      548            600       -12      832
## 10  2013     5    16      551            600        -9      814
## # ... with 972 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

All flights between 8:00 AM and 10:00 AM

flights %>% 
  filter(dep_time >= 800, dep_time <= 2200)
## # A tibble: 267,608 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      800            800         0     1022
##  2  2013     1     1      800            810       -10      949
##  3  2013     1     1      801            805        -4      900
##  4  2013     1     1      803            810        -7      903
##  5  2013     1     1      803            800         3     1132
##  6  2013     1     1      804            810        -6     1103
##  7  2013     1     1      805            805         0     1015
##  8  2013     1     1      805            800         5     1118
##  9  2013     1     1      805            815       -10     1006
## 10  2013     1     1      807            810        -3     1043
## # ... with 267,598 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>% 
  filter(between(dep_time, 800, 2200))
## # A tibble: 267,608 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      800            800         0     1022
##  2  2013     1     1      800            810       -10      949
##  3  2013     1     1      801            805        -4      900
##  4  2013     1     1      803            810        -7      903
##  5  2013     1     1      803            800         3     1132
##  6  2013     1     1      804            810        -6     1103
##  7  2013     1     1      805            805         0     1015
##  8  2013     1     1      805            800         5     1118
##  9  2013     1     1      805            815       -10     1006
## 10  2013     1     1      807            810        -3     1043
## # ... with 267,598 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Flights in winter months

flights %>%
  filter(month %in% c(12, 1, 2))
## # A tibble: 80,090 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 80,080 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
winter_months <- c(12, 1, 2)
flights %>%
  filter(month %in% winter_months)
## # A tibble: 80,090 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 80,080 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Departure time later than arrival time

flights %>% 
  filter(dep_time > arr_time)
## # A tibble: 10,633 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1     1929           1920         9        3
##  2  2013     1     1     1939           1840        59       29
##  3  2013     1     1     2058           2100        -2        8
##  4  2013     1     1     2102           2108        -6      146
##  5  2013     1     1     2108           2057        11       25
##  6  2013     1     1     2120           2130       -10       16
##  7  2013     1     1     2121           2040        41        6
##  8  2013     1     1     2128           2135        -7       26
##  9  2013     1     1     2134           2045        49       20
## 10  2013     1     1     2136           2145        -9       25
## # ... with 10,623 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.