Today’s flights, flown by “US”

flights %>%
  filter((month == 6) & (day == 2) & (carrier == "US"))
## # A tibble: 50 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     6     2    515     500  15.0    649    640   9.00 US     1431
##  2  2013     6     2    554     600 - 6.00   649    655 - 6.00 US     1289
##  3  2013     6     2    613     615 - 2.00   800    808 - 8.00 US     1447
##  4  2013     6     2    625     630 - 5.00   810    835 -25.0  US     1433
##  5  2013     6     2    631     629   2.00   801    818 -17.0  US     1989
##  6  2013     6     2    639     645 - 6.00   906    906   0    US      334
##  7  2013     6     2    641     645 - 4.00   822    834 -12.0  US     1173
##  8  2013     6     2    643     645 - 2.00   841    908 -27.0  US      654
##  9  2013     6     2    729     730 - 1.00   856    833  23.0  US     1821
## 10  2013     6     2    748     755 - 7.00   939    950 -11.0  US     1733
## # ... with 40 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>%
  filter(month == 6, day == 2, carrier == "US")
## # A tibble: 50 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     6     2    515     500  15.0    649    640   9.00 US     1431
##  2  2013     6     2    554     600 - 6.00   649    655 - 6.00 US     1289
##  3  2013     6     2    613     615 - 2.00   800    808 - 8.00 US     1447
##  4  2013     6     2    625     630 - 5.00   810    835 -25.0  US     1433
##  5  2013     6     2    631     629   2.00   801    818 -17.0  US     1989
##  6  2013     6     2    639     645 - 6.00   906    906   0    US      334
##  7  2013     6     2    641     645 - 4.00   822    834 -12.0  US     1173
##  8  2013     6     2    643     645 - 2.00   841    908 -27.0  US      654
##  9  2013     6     2    729     730 - 1.00   856    833  23.0  US     1821
## 10  2013     6     2    748     755 - 7.00   939    950 -11.0  US     1733
## # ... with 40 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Inconvenient flights

## Logical OR: |

flights %>% 
  filter(dep_time < 600 | dep_time >= 2200)
## # A tibble: 16,858 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    517     515   2.00   830    819  11.0  UA     1545
##  2  2013     1     1    533     529   4.00   850    830  20.0  UA     1714
##  3  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  4  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  5  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  6  2013     1     1    554     558  -4.00   740    728  12.0  UA     1696
##  7  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  8  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  9  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
## 10  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
## # ... with 16,848 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Flown by other airline than UA or WN

## Logical NOT and OR: ! |

flights %>% 
  filter(!(carrier == "UA" | carrier == "WN"))
## # A tibble: 265,836 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  2  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  3  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  4  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  5  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  6  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
##  7  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
##  8  2013     1     1    558     600  -2.00   849    851 - 2.00 B6       49
##  9  2013     1     1    558     600  -2.00   853    856 - 3.00 B6       71
## 10  2013     1     1    559     600  -1.00   941    910  31.0  AA      707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## Logical NOT and %in%

flights %>% 
  filter(!(carrier %in% c("UA", "WN")))
## # A tibble: 265,836 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  2  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  3  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  4  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  5  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  6  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
##  7  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
##  8  2013     1     1    558     600  -2.00   849    851 - 2.00 B6       49
##  9  2013     1     1    558     600  -2.00   853    856 - 3.00 B6       71
## 10  2013     1     1    559     600  -1.00   941    910  31.0  AA      707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## Logical AND: &

flights %>%
  filter(carrier != "UA" & carrier != "WN")
## # A tibble: 265,836 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  2  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  3  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  4  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  5  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  6  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
##  7  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
##  8  2013     1     1    558     600  -2.00   849    851 - 2.00 B6       49
##  9  2013     1     1    558     600  -2.00   853    856 - 3.00 B6       71
## 10  2013     1     1    559     600  -1.00   941    910  31.0  AA      707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>% 
  filter(carrier != "UA") %>% 
  filter(carrier != "WN")
## # A tibble: 265,836 x 19
##     year month   day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
##    <int> <int> <int>  <int>   <int>  <dbl> <int>  <int>  <dbl> <chr> <int>
##  1  2013     1     1    542     540   2.00   923    850  33.0  AA     1141
##  2  2013     1     1    544     545  -1.00  1004   1022 -18.0  B6      725
##  3  2013     1     1    554     600  -6.00   812    837 -25.0  DL      461
##  4  2013     1     1    555     600  -5.00   913    854  19.0  B6      507
##  5  2013     1     1    557     600  -3.00   709    723 -14.0  EV     5708
##  6  2013     1     1    557     600  -3.00   838    846 - 8.00 B6       79
##  7  2013     1     1    558     600  -2.00   753    745   8.00 AA      301
##  8  2013     1     1    558     600  -2.00   849    851 - 2.00 B6       49
##  9  2013     1     1    558     600  -2.00   853    856 - 3.00 B6       71
## 10  2013     1     1    559     600  -1.00   941    910  31.0  AA      707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Missing departure or arrival time

## is.na()

flights %>% 
  filter(is.na(dep_time))
## # A tibble: 8,255 x 19
##     year month   day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##    <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
##  1  2013     1     1    NA  1630    NA    NA  1815    NA EV     4308 N181…
##  2  2013     1     1    NA  1935    NA    NA  2240    NA AA      791 N3EH…
##  3  2013     1     1    NA  1500    NA    NA  1825    NA AA     1925 N3EV…
##  4  2013     1     1    NA   600    NA    NA   901    NA B6      125 N618…
##  5  2013     1     2    NA  1540    NA    NA  1747    NA EV     4352 N105…
##  6  2013     1     2    NA  1620    NA    NA  1746    NA EV     4406 N139…
##  7  2013     1     2    NA  1355    NA    NA  1459    NA EV     4434 N105…
##  8  2013     1     2    NA  1420    NA    NA  1644    NA EV     4935 N759…
##  9  2013     1     2    NA  1321    NA    NA  1536    NA EV     3849 N135…
## 10  2013     1     2    NA  1545    NA    NA  1910    NA AA      133 <NA> 
## # ... with 8,245 more rows, and 7 more variables: origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>
## is.na()

flights %>% 
  filter(is.na(arr_time))
## # A tibble: 8,713 x 19
##     year month   day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
##    <int> <int> <int>  <int>   <int>   <dbl> <int>  <int> <dbl> <chr> <int>
##  1  2013     1     1   2016    1930   46.0     NA   2220    NA EV     4204
##  2  2013     1     1     NA    1630   NA       NA   1815    NA EV     4308
##  3  2013     1     1     NA    1935   NA       NA   2240    NA AA      791
##  4  2013     1     1     NA    1500   NA       NA   1825    NA AA     1925
##  5  2013     1     1     NA     600   NA       NA    901    NA B6      125
##  6  2013     1     2   2041    2045  - 4.00    NA   2359    NA B6      147
##  7  2013     1     2   2145    2129   16.0     NA     33    NA UA     1299
##  8  2013     1     2     NA    1540   NA       NA   1747    NA EV     4352
##  9  2013     1     2     NA    1620   NA       NA   1746    NA EV     4406
## 10  2013     1     2     NA    1355   NA       NA   1459    NA EV     4434
## # ... with 8,703 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## is.na()

flights %>% 
  filter(is.na(dep_time) | is.na(arr_time))
## # A tibble: 8,713 x 19
##     year month   day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
##    <int> <int> <int>  <int>   <int>   <dbl> <int>  <int> <dbl> <chr> <int>
##  1  2013     1     1   2016    1930   46.0     NA   2220    NA EV     4204
##  2  2013     1     1     NA    1630   NA       NA   1815    NA EV     4308
##  3  2013     1     1     NA    1935   NA       NA   2240    NA AA      791
##  4  2013     1     1     NA    1500   NA       NA   1825    NA AA     1925
##  5  2013     1     1     NA     600   NA       NA    901    NA B6      125
##  6  2013     1     2   2041    2045  - 4.00    NA   2359    NA B6      147
##  7  2013     1     2   2145    2129   16.0     NA     33    NA UA     1299
##  8  2013     1     2     NA    1540   NA       NA   1747    NA EV     4352
##  9  2013     1     2     NA    1620   NA       NA   1746    NA EV     4406
## 10  2013     1     2     NA    1355   NA       NA   1459    NA EV     4434
## # ... with 8,703 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## is.na()

flights %>% 
  filter(!is.na(dep_time) & is.na(arr_time))
## # A tibble: 458 x 19
##     year month   day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
##    <int> <int> <int>  <int>   <int>   <dbl> <int>  <int> <dbl> <chr> <int>
##  1  2013     1     1   2016    1930   46.0     NA   2220    NA EV     4204
##  2  2013     1     2   2041    2045 -  4.00    NA   2359    NA B6      147
##  3  2013     1     2   2145    2129   16.0     NA     33    NA UA     1299
##  4  2013     1     9    615     615    0       NA    855    NA 9E     3856
##  5  2013     1     9   2042    2040    2.00    NA   2357    NA B6      677
##  6  2013     1    11   1344    1350 -  6.00    NA   1518    NA EV     4171
##  7  2013     1    13   1907    1634  153       NA   1837    NA EV     4411
##  8  2013     1    13   2239    2159   40.0     NA     30    NA EV     4519
##  9  2013     1    16    837     840 -  3.00    NA   1030    NA MQ     4521
## 10  2013     1    25   1452    1500 -  8.00    NA   1619    NA US     2179
## # ... with 448 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

I don’t think we had hundreds of plane crashes in 2013.

Copyright © 2017 Kirill Müller. Licensed under CC BY-NC 4.0.