All flights on this day x years ago

Be careful with the equality operator ==:

flights %>% 
  filter(month = 6, day = 2)
## Error: `month` (`month = 6`), `day` (`day = 2`) must not be named, do you need `==`?
flights %>% 
  filter(month == 6, day == 2)
## # A tibble: 911 x 19
##     year month   day dep_t… sched_… dep_… arr_… sched… arr_de… carr… flig…
##    <int> <int> <int>  <int>   <int> <dbl> <int>  <int>   <dbl> <chr> <int>
##  1  2013     6     2     14    2359  15.0   339    341 -  2.00 B6      727
##  2  2013     6     2     20    2155 145     222     15  127    UA     1548
##  3  2013     6     2     24    2245  99.0   133      1   92.0  B6       30
##  4  2013     6     2     33    2059 214     150   2224  206    EV     4119
##  5  2013     6     2     35    2130 185     332     17  195    B6      383
##  6  2013     6     2     36    1914 322     223   2121  302    EV     4670
##  7  2013     6     2     44    2359  45.0   420    350   30.0  B6      739
##  8  2013     6     2    128    2159 209     325     10  195    EV     5855
##  9  2013     6     2    131    2146 225     229   2251  218    EV     4575
## 10  2013     6     2    219    2055 324     322   2230  292    WN      579
## # ... with 901 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Can we make a dynamic query?

flights %>% 
  filter(
    month == lubridate::month(Sys.Date()),
    day == lubridate::day(Sys.Date())
  )
## # A tibble: 915 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013    10    20    517     520 - 3.00   754    803 - 9.00 UA     1096
##  2  2013    10    20    542     545 - 3.00   922    933 -11.0  B6     1403
##  3  2013    10    20    543     545 - 2.00   843    855 -12.0  AA     2243
##  4  2013    10    20    549     600 -11.0    703    716 -13.0  EV     5716
##  5  2013    10    20    550     550   0      921    932 -11.0  B6      939
##  6  2013    10    20    554     600 - 6.00   713    730 -17.0  UA      279
##  7  2013    10    20    554     600 - 6.00   905    858   7.00 B6      371
##  8  2013    10    20    554     600 - 6.00   836    840 - 4.00 B6       27
##  9  2013    10    20    557     600 - 3.00   650    700 -10.0  US     1739
## 10  2013    10    20    557     600 - 3.00   840    815  25.0  FL      345
## # ... with 905 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Can we use arguments?

month_ <- lubridate::month(Sys.Date())
day_ <- lubridate::day(Sys.Date())
flights %>% 
  filter(
    month == month_,
    day == day_
  )
## # A tibble: 915 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013    10    20    517     520 - 3.00   754    803 - 9.00 UA     1096
##  2  2013    10    20    542     545 - 3.00   922    933 -11.0  B6     1403
##  3  2013    10    20    543     545 - 2.00   843    855 -12.0  AA     2243
##  4  2013    10    20    549     600 -11.0    703    716 -13.0  EV     5716
##  5  2013    10    20    550     550   0      921    932 -11.0  B6      939
##  6  2013    10    20    554     600 - 6.00   713    730 -17.0  UA      279
##  7  2013    10    20    554     600 - 6.00   905    858   7.00 B6      371
##  8  2013    10    20    554     600 - 6.00   836    840 - 4.00 B6       27
##  9  2013    10    20    557     600 - 3.00   650    700 -10.0  US     1739
## 10  2013    10    20    557     600 - 3.00   840    815  25.0  FL      345
## # ... with 905 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

All flights between 8:00 AM and 10:00 AM

flights %>% 
  filter(dep_time >= 800, dep_time <= 2200)
## # A tibble: 267,608 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    800     800   0     1022   1014   8.00 DL     2119
##  2  2013     1     1    800     810 -10.0    949    955 - 6.00 MQ     4406
##  3  2013     1     1    801     805 - 4.00   900    919 -19.0  B6     1172
##  4  2013     1     1    803     810 - 7.00   903    925 -22.0  AA     1838
##  5  2013     1     1    803     800   3.00  1132   1144 -12.0  UA      223
##  6  2013     1     1    804     810 - 6.00  1103   1116 -13.0  DL     1959
##  7  2013     1     1    805     805   0     1015   1005  10.0  B6      219
##  8  2013     1     1    805     800   5.00  1118   1106  12.0  B6        3
##  9  2013     1     1    805     815 -10.0   1006   1010 - 4.00 MQ     4490
## 10  2013     1     1    807     810 - 3.00  1043   1043   0    DL      269
## # ... with 267,598 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>% 
  filter(between(dep_time, 800, 2200))
## # A tibble: 267,608 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    800     800   0     1022   1014   8.00 DL     2119
##  2  2013     1     1    800     810 -10.0    949    955 - 6.00 MQ     4406
##  3  2013     1     1    801     805 - 4.00   900    919 -19.0  B6     1172
##  4  2013     1     1    803     810 - 7.00   903    925 -22.0  AA     1838
##  5  2013     1     1    803     800   3.00  1132   1144 -12.0  UA      223
##  6  2013     1     1    804     810 - 6.00  1103   1116 -13.0  DL     1959
##  7  2013     1     1    805     805   0     1015   1005  10.0  B6      219
##  8  2013     1     1    805     800   5.00  1118   1106  12.0  B6        3
##  9  2013     1     1    805     815 -10.0   1006   1010 - 4.00 MQ     4490
## 10  2013     1     1    807     810 - 3.00  1043   1043   0    DL      269
## # ... with 267,598 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Flights in winter months

flights %>%
  filter(month %in% c(12, 1, 2))
## # A tibble: 80,090 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    517     515   2.00   830    819  11.0  UA     1545
##  2  2013     1     1    533     529   4.00   850    830  20.0  UA     1714
##  3  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  4  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  5  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  6  2013     1     1    554     558  -4.00   740    728  12.0  UA     1696
##  7  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  8  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  9  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
## 10  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
## # ... with 80,080 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
winter_months <- c(12, 1, 2)
flights %>%
  filter(month %in% winter_months)
## # A tibble: 80,090 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    517     515   2.00   830    819  11.0  UA     1545
##  2  2013     1     1    533     529   4.00   850    830  20.0  UA     1714
##  3  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  4  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  5  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  6  2013     1     1    554     558  -4.00   740    728  12.0  UA     1696
##  7  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  8  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  9  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
## 10  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
## # ... with 80,080 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Departure time later than arrival time

flights %>% 
  filter(dep_time > arr_time)
## # A tibble: 10,633 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1   1929    1920   9.00     3      7 - 4.00 UA     1071
##  2  2013     1     1   1939    1840  59.0     29   2151  NA    9E     3325
##  3  2013     1     1   2058    2100 - 2.00     8   2359   9.00 UA     1241
##  4  2013     1     1   2102    2108 - 6.00   146    158 -12.0  UA     1180
##  5  2013     1     1   2108    2057  11.0     25     39 -14.0  UA     1517
##  6  2013     1     1   2120    2130 -10.0     16     18 - 2.00 B6      383
##  7  2013     1     1   2121    2040  41.0      6   2323  43.0  B6      227
##  8  2013     1     1   2128    2135 - 7.00    26     50 -24.0  AA      185
##  9  2013     1     1   2134    2045  49.0     20   2352  28.0  UA     1106
## 10  2013     1     1   2136    2145 - 9.00    25     39 -14.0  B6      515
## # ... with 10,623 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Copyright © 2017 Kirill Müller. Licensed under CC BY-NC 4.0.