flights %>%
filter((month == 6) & (day == 2) & (carrier == "US"))
## # A tibble: 50 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 6 2 515 500 15 649
## 2 2013 6 2 554 600 -6 649
## 3 2013 6 2 613 615 -2 800
## 4 2013 6 2 625 630 -5 810
## 5 2013 6 2 631 629 2 801
## 6 2013 6 2 639 645 -6 906
## 7 2013 6 2 641 645 -4 822
## 8 2013 6 2 643 645 -2 841
## 9 2013 6 2 729 730 -1 856
## 10 2013 6 2 748 755 -7 939
## # ... with 40 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(month == 6, day == 2, carrier == "US")
## # A tibble: 50 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 6 2 515 500 15 649
## 2 2013 6 2 554 600 -6 649
## 3 2013 6 2 613 615 -2 800
## 4 2013 6 2 625 630 -5 810
## 5 2013 6 2 631 629 2 801
## 6 2013 6 2 639 645 -6 906
## 7 2013 6 2 641 645 -4 822
## 8 2013 6 2 643 645 -2 841
## 9 2013 6 2 729 730 -1 856
## 10 2013 6 2 748 755 -7 939
## # ... with 40 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical OR: |
flights %>%
filter(dep_time < 600 | dep_time >= 2200)
## # A tibble: 16,858 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # ... with 16,848 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical NOT and OR: ! |
flights %>%
filter(!(carrier == "UA" | carrier == "WN"))
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical NOT and %in%
flights %>%
filter(!(carrier %in% c("UA", "WN")))
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## Logical AND: &
flights %>%
filter(carrier != "UA" & carrier != "WN")
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
flights %>%
filter(carrier != "UA") %>%
filter(carrier != "WN")
## # A tibble: 265,836 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 542 540 2 923
## 2 2013 1 1 544 545 -1 1004
## 3 2013 1 1 554 600 -6 812
## 4 2013 1 1 555 600 -5 913
## 5 2013 1 1 557 600 -3 709
## 6 2013 1 1 557 600 -3 838
## 7 2013 1 1 558 600 -2 753
## 8 2013 1 1 558 600 -2 849
## 9 2013 1 1 558 600 -2 853
## 10 2013 1 1 559 600 -1 941
## # ... with 265,826 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(is.na(dep_time))
## # A tibble: 8,255 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 NA 1630 NA NA
## 2 2013 1 1 NA 1935 NA NA
## 3 2013 1 1 NA 1500 NA NA
## 4 2013 1 1 NA 600 NA NA
## 5 2013 1 2 NA 1540 NA NA
## 6 2013 1 2 NA 1620 NA NA
## 7 2013 1 2 NA 1355 NA NA
## 8 2013 1 2 NA 1420 NA NA
## 9 2013 1 2 NA 1321 NA NA
## 10 2013 1 2 NA 1545 NA NA
## # ... with 8,245 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(is.na(arr_time))
## # A tibble: 8,713 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 2016 1930 46 NA
## 2 2013 1 1 NA 1630 NA NA
## 3 2013 1 1 NA 1935 NA NA
## 4 2013 1 1 NA 1500 NA NA
## 5 2013 1 1 NA 600 NA NA
## 6 2013 1 2 2041 2045 -4 NA
## 7 2013 1 2 2145 2129 16 NA
## 8 2013 1 2 NA 1540 NA NA
## 9 2013 1 2 NA 1620 NA NA
## 10 2013 1 2 NA 1355 NA NA
## # ... with 8,703 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(is.na(dep_time) | is.na(arr_time))
## # A tibble: 8,713 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 2016 1930 46 NA
## 2 2013 1 1 NA 1630 NA NA
## 3 2013 1 1 NA 1935 NA NA
## 4 2013 1 1 NA 1500 NA NA
## 5 2013 1 1 NA 600 NA NA
## 6 2013 1 2 2041 2045 -4 NA
## 7 2013 1 2 2145 2129 16 NA
## 8 2013 1 2 NA 1540 NA NA
## 9 2013 1 2 NA 1620 NA NA
## 10 2013 1 2 NA 1355 NA NA
## # ... with 8,703 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
## is.na()
flights %>%
filter(!is.na(dep_time) & is.na(arr_time))
## # A tibble: 458 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 2016 1930 46 NA
## 2 2013 1 2 2041 2045 -4 NA
## 3 2013 1 2 2145 2129 16 NA
## 4 2013 1 9 615 615 0 NA
## 5 2013 1 9 2042 2040 2 NA
## 6 2013 1 11 1344 1350 -6 NA
## 7 2013 1 13 1907 1634 153 NA
## 8 2013 1 13 2239 2159 40 NA
## 9 2013 1 16 837 840 -3 NA
## 10 2013 1 25 1452 1500 -8 NA
## # ... with 448 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
I don’t think we had hundreds of plane crashes in 2013.
Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.