flights %>%
group_by(origin, dest, carrier) %>%
summarize(n_flights = n()) %>%
summarize(n_distinct_carriers = n()) %>%
ungroup() %>%
arrange(desc(n_distinct_carriers))
## # A tibble: 224 x 3
## origin dest n_distinct_carriers
## <chr> <chr> <int>
## 1 EWR DTW 5
## 2 EWR MSP 5
## 3 JFK LAX 5
## 4 JFK SFO 5
## 5 JFK TPA 5
## 6 LGA ATL 5
## 7 LGA CLE 5
## 8 LGA CLT 5
## 9 EWR ATL 4
## 10 JFK AUS 4
## # ... with 214 more rows
Much shorter:
flights %>%
count(origin, dest, carrier) %>%
count(origin, dest) %>%
ungroup() %>%
arrange(desc(nn))
## # A tibble: 224 x 3
## origin dest nn
## <chr> <chr> <int>
## 1 EWR DTW 5
## 2 EWR MSP 5
## 3 JFK LAX 5
## 4 JFK SFO 5
## 5 JFK TPA 5
## 6 LGA ATL 5
## 7 LGA CLE 5
## 8 LGA CLT 5
## 9 EWR ATL 4
## 10 JFK AUS 4
## # ... with 214 more rows
Alternatively:
flights %>%
group_by(origin, dest, carrier) %>%
tally() %>%
tally(wt = NULL) %>%
ungroup() %>%
arrange(desc(nn))
## # A tibble: 224 x 3
## origin dest nn
## <chr> <chr> <int>
## 1 EWR DTW 5
## 2 EWR MSP 5
## 3 JFK LAX 5
## 4 JFK SFO 5
## 5 JFK TPA 5
## 6 LGA ATL 5
## 7 LGA CLE 5
## 8 LGA CLT 5
## 9 EWR ATL 4
## 10 JFK AUS 4
## # ... with 214 more rows
cancelled_flights <-
flights %>%
group_by(carrier, month) %>%
summarize(share_of_cancelled = mean(is.na(dep_time))) %>%
ungroup()
cancelled_flights
## # A tibble: 185 x 3
## carrier month share_of_cancelled
## <chr> <int> <dbl>
## 1 9E 1 0.0477
## 2 9E 2 0.0727
## 3 9E 3 0.0695
## 4 9E 4 0.0688
## 5 9E 5 0.0506
## 6 9E 6 0.112
## 7 9E 7 0.0870
## 8 9E 8 0.0536
## 9 9E 9 0.0409
## 10 9E 10 0.0185
## # ... with 175 more rows
cancelled_flights %>%
ggplot() +
geom_raster(aes(x = carrier, y = factor(month), fill = share_of_cancelled))
Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.