Flight with shortest airtime

flights %>% 
  arrange(air_time) %>%
  head(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     1    16   1355  1315  40.0  1442  1411  31.0 EV     4368 N169…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Flight with heaviest delay

flights %>% 
  arrange(arr_delay) %>%
  tail(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     9    30     NA   840    NA    NA  1020    NA MQ     3531 N839…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Why doesn’t this give the result we’re looking for? Can we use a filter?

flights %>% 
  filter(!is.na(arr_delay)) %>%
  arrange(arr_delay) %>%
  tail(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     1     9    641   900  1301  1242  1530  1272 HA       51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Or the pattern below?

flights %>% 
  arrange(!is.na(arr_delay), arr_delay) %>%
  tail(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     1     9    641   900  1301  1242  1530  1272 HA       51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Usually it’s easiest to sort in descending order:

flights %>% 
  arrange(-arr_delay) %>%
  head(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     1     9    641   900  1301  1242  1530  1272 HA       51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>% 
  arrange(desc(arr_delay)) %>%
  head(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     1     9    641   900  1301  1242  1530  1272 HA       51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Flight with longest airtime

flights %>% 
  arrange(desc(air_time)) %>%
  head(1)
## # A tibble: 1 x 19
##    year month   day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##   <int> <int> <int>  <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1  2013     3    17   1337  1335  2.00  1937  1836  61.0 UA       15 N770…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

UA flights with lowest delay

If we filter first, fewer observations need to be sorted.

flights %>% 
  filter(carrier == "UA") %>%
  arrange(arr_delay)
## # A tibble: 58,665 x 19
##     year month   day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##    <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
##  1  2013     5     2  1947  1949 -2.00  2209  2324 -75.0 UA      612 N851…
##  2  2013     5     2  1926  1929 -3.00  2157  2310 -73.0 UA     1628 N242…
##  3  2013     5     7  2054  2055 -1.00  2317    28 -71.0 UA      622 N806…
##  4  2013     2    26  1335  1335  0     1819  1929 -70.0 UA       15 N760…
##  5  2013     2    26  1721  1725 -4.00  1936  2046 -70.0 UA      385 N855…
##  6  2013     2    28   702   705 -3.00   924  1034 -70.0 UA      963 N831…
##  7  2013     5    13  1624  1629 -5.00  1831  1941 -70.0 UA      789 N855…
##  8  2013     5     4  1914  1915 -1.00  2107  2216 -69.0 UA     1557 N364…
##  9  2013    12    27   853   856 -3.00  1052  1200 -68.0 UA      452 N430…
## 10  2013     3     1   629   632 -3.00   844   952 -68.0 UA     1702 N775…
## # ... with 58,655 more rows, and 7 more variables: origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>
flights %>% 
  arrange(arr_delay) %>%
  filter(carrier == "UA")
## # A tibble: 58,665 x 19
##     year month   day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
##    <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
##  1  2013     5     2  1947  1949 -2.00  2209  2324 -75.0 UA      612 N851…
##  2  2013     5     2  1926  1929 -3.00  2157  2310 -73.0 UA     1628 N242…
##  3  2013     5     7  2054  2055 -1.00  2317    28 -71.0 UA      622 N806…
##  4  2013     2    26  1335  1335  0     1819  1929 -70.0 UA       15 N760…
##  5  2013     2    26  1721  1725 -4.00  1936  2046 -70.0 UA      385 N855…
##  6  2013     2    28   702   705 -3.00   924  1034 -70.0 UA      963 N831…
##  7  2013     5    13  1624  1629 -5.00  1831  1941 -70.0 UA      789 N855…
##  8  2013     5     4  1914  1915 -1.00  2107  2216 -69.0 UA     1557 N364…
##  9  2013    12    27   853   856 -3.00  1052  1200 -68.0 UA      452 N430…
## 10  2013     3     1   629   632 -3.00   844   952 -68.0 UA     1702 N775…
## # ... with 58,655 more rows, and 7 more variables: origin <chr>,
## #   dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## #   time_hour <dttm>

Recovering delay

flights %>% 
  arrange(dep_delay - arr_delay)
## # A tibble: 336,776 x 19
##     year month   day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
##    <int> <int> <int>  <int>   <int>   <dbl> <int>  <int> <dbl> <chr> <int>
##  1  2013    11     1    658     700 -  2.00  1329   1015   194 VX      399
##  2  2013     4    18    558     600 -  2.00  1149    850   179 AA      707
##  3  2013     8     8   1819    1519  180        5   1820   345 UA      996
##  4  2013     7    10   1916    1900   16.0    137   2240   177 DL     1465
##  5  2013     6    27   1608    1525   43.0   2045   1725   200 MQ     3199
##  6  2013     7    22   1606    1615 -  9.00  2056   1831   145 DL     1619
##  7  2013     7     1    811     800   11.0   1344   1100   164 DL     2395
##  8  2013     7    10   2011    1520  291     2357   1636   441 EV     4580
##  9  2013     7    22   1626    1545   41.0   2051   1740   191 MQ     2793
## 10  2013     4    18    655     700 -  5.00  1213    950   143 AA     2083
## # ... with 336,766 more rows, and 8 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>

Copyright © 2017 Kirill Müller. Licensed under CC BY-NC 4.0.