View all flights that arrived after 10:00 PM. Use an intermediate variable, a nested expression, and the pipe. Which appeals more to you?
flights_after_10 <- filter(flights, ___)
view(flights_after_10)
view(filter(flights, ___))
flights %>%
filter(___) %>%
view()
Extend the four solutions to view all "UA"
flights that arrived after 10:00 PM.
flights_after_10 <- filter(flights, ___)
ua_flights_after_10 <- ...
view(___)
view(filter(filter(flights, ___), ___))
flights %>%
filter(___) %>%
filter(___) %>%
view()
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours, originating in one of New York City’s airports.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours, originating in one of New York City’s airports but excluding Honolulu International airport.
Hint: Use dest != "HNL"
as predicate.
Sort the result by distance
.
► Solution:
Naming is hard!
late_flights <-
filter(flights, arr_time >= 2200)
late_ua_flights <-
filter(late_flights, carrier == "UA")
early_late_ua_flights <-
filter(late_ua_flights, dep_time < 1800)
early_late_late_ua_flights <-
filter(early_late_ua_flights, arr_delay > 120)
early_late_late_ua_flights_not_honolulu <-
filter(early_late_late_ua_flights, dest != "HNL")
early_late_late_ua_flights_not_honolulu_sorted <-
arrange(
early_late_late_ua_flights_not_honolulu,
distance
)
view(early_late_late_ua_flights_not_honolulu_sorted)
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 4 10 1740 1716 24 2216 1936
## 2 2013 7 7 1756 1710 46 2230 1932
## 3 2013 8 8 1754 1710 44 2213 1932
## 4 2013 8 9 1734 1710 24 2209 1922
## 5 2013 7 28 1747 1505 162 2220 1802
## 6 2013 8 8 1753 1609 104 2235 1928
## 7 2013 7 7 1734 1540 114 2218 1858
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Difficult to read.
view(
arrange(
filter(
filter(
filter(
filter(
filter(
flights,
arr_time >= 2200
),
carrier == "UA"
),
dep_time < 1800
),
arr_delay > 120
),
dest != "HNL"
),
distance
)
)
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 4 10 1740 1716 24 2216 1936
## 2 2013 7 7 1756 1710 46 2230 1932
## 3 2013 8 8 1754 1710 44 2213 1932
## 4 2013 8 9 1734 1710 24 2209 1922
## 5 2013 7 28 1747 1505 162 2220 1802
## 6 2013 8 8 1753 1609 104 2235 1928
## 7 2013 7 7 1734 1540 114 2218 1858
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(dep_time < 1800) %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL") %>%
arrange(distance) %>%
view()
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 4 10 1740 1716 24 2216 1936
## 2 2013 7 7 1756 1710 46 2230 1932
## 3 2013 8 8 1754 1710 44 2213 1932
## 4 2013 8 9 1734 1710 24 2209 1922
## 5 2013 7 28 1747 1505 162 2220 1802
## 6 2013 8 8 1753 1609 104 2235 1928
## 7 2013 7 7 1734 1540 114 2218 1858
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
The original data is never updated! You still need to assign the result of a pipe to a variable:
flights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 1 1 517 515 2 830 819
## 2 2013 1 1 533 529 4 850 830
## 3 2013 1 1 542 540 2 923 850
## 4 2013 1 1 544 545 -1 1004 1022
## 5 2013 1 1 554 600 -6 812 837
## 6 2013 1 1 554 558 -4 740 728
## 7 2013 1 1 555 600 -5 913 854
## 8 2013 1 1 557 600 -3 709 723
## 9 2013 1 1 557 600 -3 838 846
## 10 2013 1 1 558 600 -2 753 745
## # … with 336,766 more rows, and 11 more variables: arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
late_late_ua_flights_not_honolulu <-
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(dep_time < 1800) %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL") %>%
arrange(distance)
late_late_ua_flights_not_honolulu
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time sched_arr_time
## <int> <int> <int> <int> <int> <dbl> <int> <int>
## 1 2013 4 10 1740 1716 24 2216 1936
## 2 2013 7 7 1756 1710 46 2230 1932
## 3 2013 8 8 1754 1710 44 2213 1932
## 4 2013 8 9 1734 1710 24 2209 1922
## 5 2013 7 28 1747 1505 162 2220 1802
## 6 2013 8 8 1753 1609 104 2235 1928
## 7 2013 7 7 1734 1540 114 2218 1858
## # … with 11 more variables: arr_delay <dbl>, carrier <chr>, flight <int>,
## # tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>,
## # hour <dbl>, minute <dbl>, time_hour <dttm>
Copyright © 2019 Kirill Müller. Licensed under CC BY-NC 4.0.