flights %>%
filter((month == 6) & (day == 2) & (carrier == "US"))
## # A tibble: 50 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 6 2 515 500 15.0 649 640 9.00 US 1431
## 2 2013 6 2 554 600 - 6.00 649 655 - 6.00 US 1289
## 3 2013 6 2 613 615 - 2.00 800 808 - 8.00 US 1447
## 4 2013 6 2 625 630 - 5.00 810 835 -25.0 US 1433
## 5 2013 6 2 631 629 2.00 801 818 -17.0 US 1989
## 6 2013 6 2 639 645 - 6.00 906 906 0 US 334
## 7 2013 6 2 641 645 - 4.00 822 834 -12.0 US 1173
## 8 2013 6 2 643 645 - 2.00 841 908 -27.0 US 654
## 9 2013 6 2 729 730 - 1.00 856 833 23.0 US 1821
## 10 2013 6 2 748 755 - 7.00 939 950 -11.0 US 1733
## # ... with 40 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(month == 6, day == 2, carrier == "US")
## # A tibble: 50 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 6 2 515 500 15.0 649 640 9.00 US 1431
## 2 2013 6 2 554 600 - 6.00 649 655 - 6.00 US 1289
## 3 2013 6 2 613 615 - 2.00 800 808 - 8.00 US 1447
## 4 2013 6 2 625 630 - 5.00 810 835 -25.0 US 1433
## 5 2013 6 2 631 629 2.00 801 818 -17.0 US 1989
## 6 2013 6 2 639 645 - 6.00 906 906 0 US 334
## 7 2013 6 2 641 645 - 4.00 822 834 -12.0 US 1173
## 8 2013 6 2 643 645 - 2.00 841 908 -27.0 US 654
## 9 2013 6 2 729 730 - 1.00 856 833 23.0 US 1821
## 10 2013 6 2 748 755 - 7.00 939 950 -11.0 US 1733
## # ... with 40 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical OR: |
flights %>%
filter(dep_time < 600 | dep_time >= 2200)
## # A tibble: 16,858 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 517 515 2.00 830 819 11.0 UA 1545
## 2 2013 1 1 533 529 4.00 850 830 20.0 UA 1714
## 3 2013 1 1 542 540 2.00 923 850 33.0 AA 1141
## 4 2013 1 1 544 545 -1.00 1004 1022 -18.0 B6 725
## 5 2013 1 1 554 600 -6.00 812 837 -25.0 DL 461
## 6 2013 1 1 554 558 -4.00 740 728 12.0 UA 1696
## 7 2013 1 1 555 600 -5.00 913 854 19.0 B6 507
## 8 2013 1 1 557 600 -3.00 709 723 -14.0 EV 5708
## 9 2013 1 1 557 600 -3.00 838 846 - 8.00 B6 79
## 10 2013 1 1 558 600 -2.00 753 745 8.00 AA 301
## # ... with 16,848 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical NOT and OR: ! |
flights %>%
filter(!(carrier == "UA" | carrier == "WN"))
## # A tibble: 265,836 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 542 540 2.00 923 850 33.0 AA 1141
## 2 2013 1 1 544 545 -1.00 1004 1022 -18.0 B6 725
## 3 2013 1 1 554 600 -6.00 812 837 -25.0 DL 461
## 4 2013 1 1 555 600 -5.00 913 854 19.0 B6 507
## 5 2013 1 1 557 600 -3.00 709 723 -14.0 EV 5708
## 6 2013 1 1 557 600 -3.00 838 846 - 8.00 B6 79
## 7 2013 1 1 558 600 -2.00 753 745 8.00 AA 301
## 8 2013 1 1 558 600 -2.00 849 851 - 2.00 B6 49
## 9 2013 1 1 558 600 -2.00 853 856 - 3.00 B6 71
## 10 2013 1 1 559 600 -1.00 941 910 31.0 AA 707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical NOT and %in%
flights %>%
filter(!(carrier %in% c("UA", "WN")))
## # A tibble: 265,836 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 542 540 2.00 923 850 33.0 AA 1141
## 2 2013 1 1 544 545 -1.00 1004 1022 -18.0 B6 725
## 3 2013 1 1 554 600 -6.00 812 837 -25.0 DL 461
## 4 2013 1 1 555 600 -5.00 913 854 19.0 B6 507
## 5 2013 1 1 557 600 -3.00 709 723 -14.0 EV 5708
## 6 2013 1 1 557 600 -3.00 838 846 - 8.00 B6 79
## 7 2013 1 1 558 600 -2.00 753 745 8.00 AA 301
## 8 2013 1 1 558 600 -2.00 849 851 - 2.00 B6 49
## 9 2013 1 1 558 600 -2.00 853 856 - 3.00 B6 71
## 10 2013 1 1 559 600 -1.00 941 910 31.0 AA 707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical AND: &
flights %>%
filter(carrier != "UA" & carrier != "WN")
## # A tibble: 265,836 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 542 540 2.00 923 850 33.0 AA 1141
## 2 2013 1 1 544 545 -1.00 1004 1022 -18.0 B6 725
## 3 2013 1 1 554 600 -6.00 812 837 -25.0 DL 461
## 4 2013 1 1 555 600 -5.00 913 854 19.0 B6 507
## 5 2013 1 1 557 600 -3.00 709 723 -14.0 EV 5708
## 6 2013 1 1 557 600 -3.00 838 846 - 8.00 B6 79
## 7 2013 1 1 558 600 -2.00 753 745 8.00 AA 301
## 8 2013 1 1 558 600 -2.00 849 851 - 2.00 B6 49
## 9 2013 1 1 558 600 -2.00 853 856 - 3.00 B6 71
## 10 2013 1 1 559 600 -1.00 941 910 31.0 AA 707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(carrier != "UA") %>%
filter(carrier != "WN")
## # A tibble: 265,836 x 19
## year month day dep_t… sched_… dep_d… arr_… sched… arr_d… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 542 540 2.00 923 850 33.0 AA 1141
## 2 2013 1 1 544 545 -1.00 1004 1022 -18.0 B6 725
## 3 2013 1 1 554 600 -6.00 812 837 -25.0 DL 461
## 4 2013 1 1 555 600 -5.00 913 854 19.0 B6 507
## 5 2013 1 1 557 600 -3.00 709 723 -14.0 EV 5708
## 6 2013 1 1 557 600 -3.00 838 846 - 8.00 B6 79
## 7 2013 1 1 558 600 -2.00 753 745 8.00 AA 301
## 8 2013 1 1 558 600 -2.00 849 851 - 2.00 B6 49
## 9 2013 1 1 558 600 -2.00 853 856 - 3.00 B6 71
## 10 2013 1 1 559 600 -1.00 941 910 31.0 AA 707
## # ... with 265,826 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(is.na(dep_time))
## # A tibble: 8,255 x 19
## year month day dep_… sche… dep_… arr_… sche… arr_… carr… flig… tail…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int> <chr>
## 1 2013 1 1 NA 1630 NA NA 1815 NA EV 4308 N181…
## 2 2013 1 1 NA 1935 NA NA 2240 NA AA 791 N3EH…
## 3 2013 1 1 NA 1500 NA NA 1825 NA AA 1925 N3EV…
## 4 2013 1 1 NA 600 NA NA 901 NA B6 125 N618…
## 5 2013 1 2 NA 1540 NA NA 1747 NA EV 4352 N105…
## 6 2013 1 2 NA 1620 NA NA 1746 NA EV 4406 N139…
## 7 2013 1 2 NA 1355 NA NA 1459 NA EV 4434 N105…
## 8 2013 1 2 NA 1420 NA NA 1644 NA EV 4935 N759…
## 9 2013 1 2 NA 1321 NA NA 1536 NA EV 3849 N135…
## 10 2013 1 2 NA 1545 NA NA 1910 NA AA 133 <NA>
## # ... with 8,245 more rows, and 7 more variables: origin <chr>,
## # dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
## is.na()
flights %>%
filter(is.na(arr_time))
## # A tibble: 8,713 x 19
## year month day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 2016 1930 46.0 NA 2220 NA EV 4204
## 2 2013 1 1 NA 1630 NA NA 1815 NA EV 4308
## 3 2013 1 1 NA 1935 NA NA 2240 NA AA 791
## 4 2013 1 1 NA 1500 NA NA 1825 NA AA 1925
## 5 2013 1 1 NA 600 NA NA 901 NA B6 125
## 6 2013 1 2 2041 2045 - 4.00 NA 2359 NA B6 147
## 7 2013 1 2 2145 2129 16.0 NA 33 NA UA 1299
## 8 2013 1 2 NA 1540 NA NA 1747 NA EV 4352
## 9 2013 1 2 NA 1620 NA NA 1746 NA EV 4406
## 10 2013 1 2 NA 1355 NA NA 1459 NA EV 4434
## # ... with 8,703 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(is.na(dep_time) | is.na(arr_time))
## # A tibble: 8,713 x 19
## year month day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 2016 1930 46.0 NA 2220 NA EV 4204
## 2 2013 1 1 NA 1630 NA NA 1815 NA EV 4308
## 3 2013 1 1 NA 1935 NA NA 2240 NA AA 791
## 4 2013 1 1 NA 1500 NA NA 1825 NA AA 1925
## 5 2013 1 1 NA 600 NA NA 901 NA B6 125
## 6 2013 1 2 2041 2045 - 4.00 NA 2359 NA B6 147
## 7 2013 1 2 2145 2129 16.0 NA 33 NA UA 1299
## 8 2013 1 2 NA 1540 NA NA 1747 NA EV 4352
## 9 2013 1 2 NA 1620 NA NA 1746 NA EV 4406
## 10 2013 1 2 NA 1355 NA NA 1459 NA EV 4434
## # ... with 8,703 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(!is.na(dep_time) & is.na(arr_time))
## # A tibble: 458 x 19
## year month day dep_t… sched_… dep_de… arr_… sched… arr_… carr… flig…
## <int> <int> <int> <int> <int> <dbl> <int> <int> <dbl> <chr> <int>
## 1 2013 1 1 2016 1930 46.0 NA 2220 NA EV 4204
## 2 2013 1 2 2041 2045 - 4.00 NA 2359 NA B6 147
## 3 2013 1 2 2145 2129 16.0 NA 33 NA UA 1299
## 4 2013 1 9 615 615 0 NA 855 NA 9E 3856
## 5 2013 1 9 2042 2040 2.00 NA 2357 NA B6 677
## 6 2013 1 11 1344 1350 - 6.00 NA 1518 NA EV 4171
## 7 2013 1 13 1907 1634 153 NA 1837 NA EV 4411
## 8 2013 1 13 2239 2159 40.0 NA 30 NA EV 4519
## 9 2013 1 16 837 840 - 3.00 NA 1030 NA MQ 4521
## 10 2013 1 25 1452 1500 - 8.00 NA 1619 NA US 2179
## # ... with 448 more rows, and 8 more variables: tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
I don’t think we had hundreds of plane crashes in 2013.
Copyright © 2017 Kirill Müller. Licensed under CC BY-NC 4.0.