Today’s flights, flown by “US”

flights %>%
  filter((month == 6) & (day == 2) & (carrier == "US"))
## # A tibble: 50 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     6     2      515            500        15      649
##  2  2013     6     2      554            600        -6      649
##  3  2013     6     2      613            615        -2      800
##  4  2013     6     2      625            630        -5      810
##  5  2013     6     2      631            629         2      801
##  6  2013     6     2      639            645        -6      906
##  7  2013     6     2      641            645        -4      822
##  8  2013     6     2      643            645        -2      841
##  9  2013     6     2      729            730        -1      856
## 10  2013     6     2      748            755        -7      939
## # ... with 40 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>%
  filter(month == 6, day == 2, carrier == "US")
## # A tibble: 50 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     6     2      515            500        15      649
##  2  2013     6     2      554            600        -6      649
##  3  2013     6     2      613            615        -2      800
##  4  2013     6     2      625            630        -5      810
##  5  2013     6     2      631            629         2      801
##  6  2013     6     2      639            645        -6      906
##  7  2013     6     2      641            645        -4      822
##  8  2013     6     2      643            645        -2      841
##  9  2013     6     2      729            730        -1      856
## 10  2013     6     2      748            755        -7      939
## # ... with 40 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Inconvenient flights

## Logical OR: |

flights %>% 
  filter(dep_time < 600 | dep_time >= 2200)
## # A tibble: 16,858 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      517            515         2      830
##  2  2013     1     1      533            529         4      850
##  3  2013     1     1      542            540         2      923
##  4  2013     1     1      544            545        -1     1004
##  5  2013     1     1      554            600        -6      812
##  6  2013     1     1      554            558        -4      740
##  7  2013     1     1      555            600        -5      913
##  8  2013     1     1      557            600        -3      709
##  9  2013     1     1      557            600        -3      838
## 10  2013     1     1      558            600        -2      753
## # ... with 16,848 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Flown by other airline than UA or WN

## Logical NOT and OR: ! |

flights %>% 
  filter(!(carrier == "UA" | carrier == "WN"))
## # A tibble: 265,836 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      542            540         2      923
##  2  2013     1     1      544            545        -1     1004
##  3  2013     1     1      554            600        -6      812
##  4  2013     1     1      555            600        -5      913
##  5  2013     1     1      557            600        -3      709
##  6  2013     1     1      557            600        -3      838
##  7  2013     1     1      558            600        -2      753
##  8  2013     1     1      558            600        -2      849
##  9  2013     1     1      558            600        -2      853
## 10  2013     1     1      559            600        -1      941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## Logical NOT and %in%

flights %>% 
  filter(!(carrier %in% c("UA", "WN")))
## # A tibble: 265,836 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      542            540         2      923
##  2  2013     1     1      544            545        -1     1004
##  3  2013     1     1      554            600        -6      812
##  4  2013     1     1      555            600        -5      913
##  5  2013     1     1      557            600        -3      709
##  6  2013     1     1      557            600        -3      838
##  7  2013     1     1      558            600        -2      753
##  8  2013     1     1      558            600        -2      849
##  9  2013     1     1      558            600        -2      853
## 10  2013     1     1      559            600        -1      941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## Logical AND: &

flights %>%
  filter(carrier != "UA" & carrier != "WN")
## # A tibble: 265,836 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      542            540         2      923
##  2  2013     1     1      544            545        -1     1004
##  3  2013     1     1      554            600        -6      812
##  4  2013     1     1      555            600        -5      913
##  5  2013     1     1      557            600        -3      709
##  6  2013     1     1      557            600        -3      838
##  7  2013     1     1      558            600        -2      753
##  8  2013     1     1      558            600        -2      849
##  9  2013     1     1      558            600        -2      853
## 10  2013     1     1      559            600        -1      941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
flights %>% 
  filter(carrier != "UA") %>% 
  filter(carrier != "WN")
## # A tibble: 265,836 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1      542            540         2      923
##  2  2013     1     1      544            545        -1     1004
##  3  2013     1     1      554            600        -6      812
##  4  2013     1     1      555            600        -5      913
##  5  2013     1     1      557            600        -3      709
##  6  2013     1     1      557            600        -3      838
##  7  2013     1     1      558            600        -2      753
##  8  2013     1     1      558            600        -2      849
##  9  2013     1     1      558            600        -2      853
## 10  2013     1     1      559            600        -1      941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Missing departure or arrival time

## is.na()

flights %>% 
  filter(is.na(dep_time))
## # A tibble: 8,255 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1       NA           1630        NA       NA
##  2  2013     1     1       NA           1935        NA       NA
##  3  2013     1     1       NA           1500        NA       NA
##  4  2013     1     1       NA            600        NA       NA
##  5  2013     1     2       NA           1540        NA       NA
##  6  2013     1     2       NA           1620        NA       NA
##  7  2013     1     2       NA           1355        NA       NA
##  8  2013     1     2       NA           1420        NA       NA
##  9  2013     1     2       NA           1321        NA       NA
## 10  2013     1     2       NA           1545        NA       NA
## # ... with 8,245 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## is.na()

flights %>% 
  filter(is.na(arr_time))
## # A tibble: 8,713 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1     2016           1930        46       NA
##  2  2013     1     1       NA           1630        NA       NA
##  3  2013     1     1       NA           1935        NA       NA
##  4  2013     1     1       NA           1500        NA       NA
##  5  2013     1     1       NA            600        NA       NA
##  6  2013     1     2     2041           2045        -4       NA
##  7  2013     1     2     2145           2129        16       NA
##  8  2013     1     2       NA           1540        NA       NA
##  9  2013     1     2       NA           1620        NA       NA
## 10  2013     1     2       NA           1355        NA       NA
## # ... with 8,703 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## is.na()

flights %>% 
  filter(is.na(dep_time) | is.na(arr_time))
## # A tibble: 8,713 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1     2016           1930        46       NA
##  2  2013     1     1       NA           1630        NA       NA
##  3  2013     1     1       NA           1935        NA       NA
##  4  2013     1     1       NA           1500        NA       NA
##  5  2013     1     1       NA            600        NA       NA
##  6  2013     1     2     2041           2045        -4       NA
##  7  2013     1     2     2145           2129        16       NA
##  8  2013     1     2       NA           1540        NA       NA
##  9  2013     1     2       NA           1620        NA       NA
## 10  2013     1     2       NA           1355        NA       NA
## # ... with 8,703 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>
## is.na()

flights %>% 
  filter(!is.na(dep_time) & is.na(arr_time))
## # A tibble: 458 x 19
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1     2016           1930        46       NA
##  2  2013     1     2     2041           2045        -4       NA
##  3  2013     1     2     2145           2129        16       NA
##  4  2013     1     9      615            615         0       NA
##  5  2013     1     9     2042           2040         2       NA
##  6  2013     1    11     1344           1350        -6       NA
##  7  2013     1    13     1907           1634       153       NA
##  8  2013     1    13     2239           2159        40       NA
##  9  2013     1    16      837            840        -3       NA
## 10  2013     1    25     1452           1500        -8       NA
## # ... with 448 more rows, and 12 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

I don’t think we had hundreds of plane crashes in 2013.

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.