flights %>%
arrange(air_time) %>%
head(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 16 1355 1315 40.0 1442 1411 31.0 EV 4368 N169…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
arrange(arr_delay) %>%
tail(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 9 30 NA 840 NA NA 1020 NA MQ 3531 N839…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
Why doesn’t this give the result we’re looking for? Can we use a filter?
flights %>%
filter(!is.na(arr_delay)) %>%
arrange(arr_delay) %>%
tail(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 9 641 900 1301 1242 1530 1272 HA 51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
Or the pattern below?
flights %>%
arrange(!is.na(arr_delay), arr_delay) %>%
tail(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 9 641 900 1301 1242 1530 1272 HA 51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
Usually it’s easiest to sort in descending order:
flights %>%
arrange(-arr_delay) %>%
head(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 9 641 900 1301 1242 1530 1272 HA 51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
arrange(desc(arr_delay)) %>%
head(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 9 641 900 1301 1242 1530 1272 HA 51 N384…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
arrange(desc(air_time)) %>%
head(1)
## # A tibble: 1 x 19
## year month day dep_t… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 3 17 1337 1335 2.00 1937 1836 61.0 UA 15 N770…
## # ... with 7 more variables: origin <chr>, dest <chr>, air_time <dbl>,
## # distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
If we filter first, fewer observations need to be sorted.
flights %>%
filter(carrier == "UA") %>%
arrange(arr_delay)
## # A tibble: 58,665 x 19
## year month day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 5 2 1947 1949 -2.00 2209 2324 -75.0 UA 612 N851…
## 2 2013 5 2 1926 1929 -3.00 2157 2310 -73.0 UA 1628 N242…
## 3 2013 5 7 2054 2055 -1.00 2317 28 -71.0 UA 622 N806…
## 4 2013 2 26 1335 1335 0 1819 1929 -70.0 UA 15 N760…
## 5 2013 2 26 1721 1725 -4.00 1936 2046 -70.0 UA 385 N855…
## 6 2013 2 28 702 705 -3.00 924 1034 -70.0 UA 963 N831…
## 7 2013 5 13 1624 1629 -5.00 1831 1941 -70.0 UA 789 N855…
## 8 2013 5 4 1914 1915 -1.00 2107 2216 -69.0 UA 1557 N364…
## 9 2013 12 27 853 856 -3.00 1052 1200 -68.0 UA 452 N430…
## 10 2013 3 1 629 632 -3.00 844 952 -68.0 UA 1702 N775…
## # ... with 58,655 more rows, and 7 more variables: origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
flights %>%
arrange(arr_delay) %>%
filter(carrier == "UA")
## # A tibble: 58,665 x 19
## year month day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 5 2 1947 1949 -2.00 2209 2324 -75.0 UA 612 N851…
## 2 2013 5 2 1926 1929 -3.00 2157 2310 -73.0 UA 1628 N242…
## 3 2013 5 7 2054 2055 -1.00 2317 28 -71.0 UA 622 N806…
## 4 2013 2 26 1335 1335 0 1819 1929 -70.0 UA 15 N760…
## 5 2013 2 26 1721 1725 -4.00 1936 2046 -70.0 UA 385 N855…
## 6 2013 2 28 702 705 -3.00 924 1034 -70.0 UA 963 N831…
## 7 2013 5 13 1624 1629 -5.00 1831 1941 -70.0 UA 789 N855…
## 8 2013 5 4 1914 1915 -1.00 2107 2216 -69.0 UA 1557 N364…
## 9 2013 12 27 853 856 -3.00 1052 1200 -68.0 UA 452 N430…
## 10 2013 3 1 629 632 -3.00 844 952 -68.0 UA 1702 N775…
## # ... with 58,655 more rows, and 7 more variables: origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
flights %>%
arrange(dep_delay - arr_delay)
## # A tibble: 336,776 x 19
## year month day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 11 1 658 700 - 2.00 1329 1015 194 VX 399
## 2 2013 4 18 558 600 - 2.00 1149 850 179 AA 707
## 3 2013 8 8 1819 1519 180 5 1820 345 UA 996
## 4 2013 7 10 1916 1900 16.0 137 2240 177 DL 1465
## 5 2013 6 27 1608 1525 43.0 2045 1725 200 MQ 3199
## 6 2013 7 22 1606 1615 - 9.00 2056 1831 145 DL 1619
## 7 2013 7 1 811 800 11.0 1344 1100 164 DL 2395
## 8 2013 7 10 2011 1520 291 2357 1636 441 EV 4580
## 9 2013 7 22 1626 1545 41.0 2051 1740 191 MQ 2793
## 10 2013 4 18 655 700 - 5.00 1213 950 143 AA 2083
## # ... with 336,766 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Copyright © 2017 Kirill Müller. Licensed under CC BY-NC 4.0.