Busiest month

flights %>%
  group_by(carrier, month) %>%
  summarize(distance = sum(distance)) %>%
  mutate(total_distance = sum(distance)) %>%
  ungroup() %>%
  mutate(month_share = distance / total_distance) %>%
  arrange(-total_distance)
## # A tibble: 185 x 5
##    carrier month distance total_distance month_share
##    <chr>   <int>    <dbl>          <dbl>       <dbl>
##  1 UA          1  6777189       89705524      0.0755
##  2 UA          2  6239683       89705524      0.0696
##  3 UA          3  7235740       89705524      0.0807
##  4 UA          4  7580735       89705524      0.0845
##  5 UA          5  7714391       89705524      0.0860
##  6 UA          6  7833622       89705524      0.0873
##  7 UA          7  8008887       89705524      0.0893
##  8 UA          8  8162260       89705524      0.0910
##  9 UA          9  7360730       89705524      0.0821
## 10 UA         10  7734657       89705524      0.0862
## # ... with 175 more rows

Ground time

flights %>%
  mutate_at(
    vars(dep_min = dep_time, arr_min = arr_time),
    funs(. %/% 100 + . %% 100 * 60)
  ) %>%
  group_by(tailnum) %>%
  mutate(ground_min = dep_min - lag(arr_min)) %>%
  ungroup() %>%
  filter(ground_min >= 0) %>%
  group_by(carrier, tailnum) %>%
  mutate(total_ground_min = sum(ground_min, na.rm = TRUE)) %>%
  ungroup()
## # A tibble: 170,997 x 23
##     year month   day dep_time sched_dep_time dep_delay arr_time
##    <int> <int> <int>    <int>          <int>     <dbl>    <int>
##  1  2013     1     1     1154           1200        -6     1253
##  2  2013     1     1     1155           1200        -5     1312
##  3  2013     1     1     1246           1225        21     1424
##  4  2013     1     1     1255           1200        55     1451
##  5  2013     1     1     1257           1258        -1     1601
##  6  2013     1     1     1257           1300        -3     1454
##  7  2013     1     1     1339           1345        -6     1642
##  8  2013     1     1     1342           1320        22     1617
##  9  2013     1     1     1350           1329        21     1504
## 10  2013     1     1     1355           1356        -1     1646
## # ... with 170,987 more rows, and 16 more variables: sched_arr_time <int>,
## #   arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, dep_min <dbl>, arr_min <dbl>,
## #   ground_min <dbl>, total_ground_min <dbl>

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.