Distinct airlines per relation

flights %>%
  group_by(origin, dest, carrier) %>%
  summarize(n_flights = n()) %>%
  summarize(n_distinct_carriers = n()) %>%
  ungroup() %>% 
  arrange(desc(n_distinct_carriers))
## # A tibble: 224 x 3
##    origin dest  n_distinct_carriers
##    <chr>  <chr>               <int>
##  1 EWR    DTW                     5
##  2 EWR    MSP                     5
##  3 JFK    LAX                     5
##  4 JFK    SFO                     5
##  5 JFK    TPA                     5
##  6 LGA    ATL                     5
##  7 LGA    CLE                     5
##  8 LGA    CLT                     5
##  9 EWR    ATL                     4
## 10 JFK    AUS                     4
## # ... with 214 more rows

Much shorter:

flights %>%
  count(origin, dest, carrier) %>%
  count(origin, dest) %>% 
  ungroup() %>% 
  arrange(desc(nn))
## # A tibble: 224 x 3
##    origin dest     nn
##    <chr>  <chr> <int>
##  1 EWR    DTW       5
##  2 EWR    MSP       5
##  3 JFK    LAX       5
##  4 JFK    SFO       5
##  5 JFK    TPA       5
##  6 LGA    ATL       5
##  7 LGA    CLE       5
##  8 LGA    CLT       5
##  9 EWR    ATL       4
## 10 JFK    AUS       4
## # ... with 214 more rows

Alternatively:

flights %>%
  group_by(origin, dest, carrier) %>%
  tally() %>%
  tally(wt = NULL) %>%
  ungroup() %>%
  arrange(desc(nn))
## # A tibble: 224 x 3
##    origin dest     nn
##    <chr>  <chr> <int>
##  1 EWR    DTW       5
##  2 EWR    MSP       5
##  3 JFK    LAX       5
##  4 JFK    SFO       5
##  5 JFK    TPA       5
##  6 LGA    ATL       5
##  7 LGA    CLE       5
##  8 LGA    CLT       5
##  9 EWR    ATL       4
## 10 JFK    AUS       4
## # ... with 214 more rows

Cancelled flights per month per airline

cancelled_flights <-
  flights %>% 
  group_by(carrier, month) %>% 
  summarize(share_of_cancelled = mean(is.na(dep_time))) %>%
  ungroup()

cancelled_flights
## # A tibble: 185 x 3
##    carrier month share_of_cancelled
##    <chr>   <int>              <dbl>
##  1 9E          1             0.0477
##  2 9E          2             0.0727
##  3 9E          3             0.0695
##  4 9E          4             0.0688
##  5 9E          5             0.0506
##  6 9E          6             0.112 
##  7 9E          7             0.0870
##  8 9E          8             0.0536
##  9 9E          9             0.0409
## 10 9E         10             0.0185
## # ... with 175 more rows
cancelled_flights %>%
  ggplot() +
  geom_raster(aes(x = carrier, y = factor(month), fill = share_of_cancelled))

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.