flights %>%
filter(day == 13, month == 11) %>%
filter(carrier == "US")
## # A tibble: 66 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 11 13 458 500 -2 632
## 2 2013 11 13 551 600 -9 638
## 3 2013 11 13 553 600 -7 650
## 4 2013 11 13 606 615 -9 741
## 5 2013 11 13 607 615 -8 758
## 6 2013 11 13 622 630 -8 818
## 7 2013 11 13 625 630 -5 824
## 8 2013 11 13 626 630 -4 915
## 9 2013 11 13 631 600 31 739
## 10 2013 11 13 653 700 -7 753
## # ... with 56 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(day == 13 & month == 11 & carrier == "US")
## # A tibble: 66 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 11 13 458 500 -2 632
## 2 2013 11 13 551 600 -9 638
## 3 2013 11 13 553 600 -7 650
## 4 2013 11 13 606 615 -9 741
## 5 2013 11 13 607 615 -8 758
## 6 2013 11 13 622 630 -8 818
## 7 2013 11 13 625 630 -5 824
## 8 2013 11 13 626 630 -4 915
## 9 2013 11 13 631 600 31 739
## 10 2013 11 13 653 700 -7 753
## # ... with 56 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(dep_time < 600 | dep_time > 2200)
## # A tibble: 16,753 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 16,743 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(carrier != "UA" & carrier != "WN")
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(!(carrier == "UA" | carrier == "WN"))
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(!(carrier %in% c("UA", "WN")))
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(is.na(arr_time))
## # A tibble: 8,713 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 2016 1930 46 NA
## 2 2013 1 1 NA 1630 NA NA
## 3 2013 1 1 NA 1935 NA NA
## 4 2013 1 1 NA 1500 NA NA
## 5 2013 1 1 NA 600 NA NA
## 6 2013 1 2 2041 2045 -4 NA
## 7 2013 1 2 2145 2129 16 NA
## 8 2013 1 2 NA 1540 NA NA
## 9 2013 1 2 NA 1620 NA NA
## 10 2013 1 2 NA 1355 NA NA
## # ... with 8,703 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(is.na(arr_time) & is.na(dep_time))
## # A tibble: 8,255 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 NA 1630 NA NA
## 2 2013 1 1 NA 1935 NA NA
## 3 2013 1 1 NA 1500 NA NA
## 4 2013 1 1 NA 600 NA NA
## 5 2013 1 2 NA 1540 NA NA
## 6 2013 1 2 NA 1620 NA NA
## 7 2013 1 2 NA 1355 NA NA
## 8 2013 1 2 NA 1420 NA NA
## 9 2013 1 2 NA 1321 NA NA
## 10 2013 1 2 NA 1545 NA NA
## # ... with 8,245 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(is.na(arr_time) & !is.na(dep_time))
## # A tibble: 458 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 2016 1930 46 NA
## 2 2013 1 2 2041 2045 -4 NA
## 3 2013 1 2 2145 2129 16 NA
## 4 2013 1 9 615 615 0 NA
## 5 2013 1 9 2042 2040 2 NA
## 6 2013 1 11 1344 1350 -6 NA
## 7 2013 1 13 1907 1634 153 NA
## 8 2013 1 13 2239 2159 40 NA
## 9 2013 1 16 837 840 -3 NA
## 10 2013 1 25 1452 1500 -8 NA
## # ... with 448 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
Arranging a smaller subset needs less work.
system.time(
flights %>%
arrange(arr_delay) %>%
filter(carrier == "UA")
)
## user system elapsed
## 0.207 0.017 0.223
system.time(
flights %>%
filter(carrier == "UA") %>%
arrange(arr_delay)
)
## user system elapsed
## 0.039 0.008 0.047
Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.