View all flights that arrived after 10:00 PM. Use an intermediate variable, a nested expression, and the pipe. Which appeals more to you?
flights_after_10 <- filter(flights, ___)
view(flights_after_10)
view(filter(flights, ___))
flights %>%
filter(___) %>%
view()
Extend the four solutions to view all "UA"
flights that arrived after 10:00 PM.
flights_after_10 <- filter(flights, ___)
ua_flights_after_10 <- ...
view(___)
view(filter(filter(flights, ___), ___))
flights %>%
filter(___) %>%
filter(___) %>%
view()
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours, originating in one of New York City’s airports.
Extend the four solutions to view all "UA"
flights that departed before 6:00 PM and arrived after 10:00 PM and had a delay of more than two hours, originating in one of New York City’s airports but excluding Honolulu International airport.
Hint: Use dest != "HNL"
as predicate.
Sort the result by distance
.
► Solution:
### Intermediate variables
Naming is hard!
late_flights <-
filter(flights, arr_time >= 2200)
late_ua_flights <-
filter(late_flights, carrier == "UA")
early_late_ua_flights <-
filter(late_ua_flights, dep_time < 1800)
early_late_late_ua_flights <-
filter(early_late_ua_flights, arr_delay > 120)
early_late_late_ua_flights_not_honolulu <-
filter(early_late_late_ua_flights, dest != "HNL")
early_late_late_ua_flights_not_honolulu_sorted <-
arrange(
early_late_late_ua_flights_not_honolulu,
distance
)
view(early_late_late_ua_flights_not_honolulu_sorted)
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 4 10 1740 1716 24 2216
## 2 2013 7 7 1756 1710 46 2230
## 3 2013 8 8 1754 1710 44 2213
## 4 2013 8 9 1734 1710 24 2209
## 5 2013 7 28 1747 1505 162 2220
## 6 2013 8 8 1753 1609 104 2235
## 7 2013 7 7 1734 1540 114 2218
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
Difficult to read.
view(
arrange(
filter(
filter(
filter(
filter(
filter(
flights,
arr_time >= 2200
),
carrier == "UA"
),
dep_time < 1800
),
arr_delay > 120
),
dest != "HNL"
),
distance
)
)
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 4 10 1740 1716 24 2216
## 2 2013 7 7 1756 1710 46 2230
## 3 2013 8 8 1754 1710 44 2213
## 4 2013 8 9 1734 1710 24 2209
## 5 2013 7 28 1747 1505 162 2220
## 6 2013 8 8 1753 1609 104 2235
## 7 2013 7 7 1734 1540 114 2218
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(dep_time < 1800) %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL") %>%
arrange(distance) %>%
view()
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 4 10 1740 1716 24 2216
## 2 2013 7 7 1756 1710 46 2230
## 3 2013 8 8 1754 1710 44 2213
## 4 2013 8 9 1734 1710 24 2209
## 5 2013 7 28 1747 1505 162 2220
## 6 2013 8 8 1753 1609 104 2235
## 7 2013 7 7 1734 1540 114 2218
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
The original data is never updated! You still need to assign the result of a pipe to a variable:
flights
## # A tibble: 336,776 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 517 515 2 830
## 2 2013 1 1 533 529 4 850
## 3 2013 1 1 542 540 2 923
## 4 2013 1 1 544 545 -1 1004
## 5 2013 1 1 554 600 -6 812
## 6 2013 1 1 554 558 -4 740
## 7 2013 1 1 555 600 -5 913
## 8 2013 1 1 557 600 -3 709
## 9 2013 1 1 557 600 -3 838
## 10 2013 1 1 558 600 -2 753
## # … with 336,766 more rows, and 12 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>
late_late_ua_flights_not_honolulu <-
flights %>%
filter(arr_time >= 2200) %>%
filter(carrier == "UA") %>%
filter(dep_time < 1800) %>%
filter(arr_delay > 120) %>%
filter(dest != "HNL") %>%
arrange(distance)
late_late_ua_flights_not_honolulu
## # A tibble: 7 x 19
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 4 10 1740 1716 24 2216
## 2 2013 7 7 1756 1710 46 2230
## 3 2013 8 8 1754 1710 44 2213
## 4 2013 8 9 1734 1710 24 2209
## 5 2013 7 28 1747 1505 162 2220
## 6 2013 8 8 1753 1609 104 2235
## 7 2013 7 7 1734 1540 114 2218
## # … with 12 more variables: sched_arr_time <int>, arr_delay <dbl>,
## # carrier <chr>, flight <int>, tailnum <chr>, origin <chr>, dest <chr>,
## # air_time <dbl>, distance <dbl>, hour <dbl>, minute <dbl>,
## # time_hour <dttm>
Copyright © 2019 Kirill Müller. Licensed under CC BY-NC 4.0.