flights %>%
group_by(carrier, month) %>%
summarize(distance = sum(distance)) %>%
mutate(total_distance = sum(distance)) %>%
ungroup() %>%
mutate(month_share = distance / total_distance) %>%
arrange(-total_distance)
## # A tibble: 185 x 5
## carrier month distance total_distance month_share
## <chr> <int> <dbl> <dbl> <dbl>
## 1 UA 1 6777189 89705524 0.0755
## 2 UA 2 6239683 89705524 0.0696
## 3 UA 3 7235740 89705524 0.0807
## 4 UA 4 7580735 89705524 0.0845
## 5 UA 5 7714391 89705524 0.0860
## 6 UA 6 7833622 89705524 0.0873
## 7 UA 7 8008887 89705524 0.0893
## 8 UA 8 8162260 89705524 0.0910
## 9 UA 9 7360730 89705524 0.0821
## 10 UA 10 7734657 89705524 0.0862
## # ... with 175 more rows
flights %>%
mutate_at(
vars(dep_min = dep_time, arr_min = arr_time),
funs(. %/% 100 + . %% 100 * 60)
) %>%
group_by(tailnum) %>%
mutate(ground_min = dep_min - lag(arr_min)) %>%
ungroup() %>%
filter(ground_min >= 0) %>%
group_by(carrier, tailnum) %>%
mutate(total_ground_min = sum(ground_min, na.rm = TRUE)) %>%
ungroup()
## # A tibble: 170,997 x 23
## year month day dep_time sched_dep_time dep_delay arr_time
## <int> <int> <int> <int> <int> <dbl> <int>
## 1 2013 1 1 1154 1200 -6 1253
## 2 2013 1 1 1155 1200 -5 1312
## 3 2013 1 1 1246 1225 21 1424
## 4 2013 1 1 1255 1200 55 1451
## 5 2013 1 1 1257 1258 -1 1601
## 6 2013 1 1 1257 1300 -3 1454
## 7 2013 1 1 1339 1345 -6 1642
## 8 2013 1 1 1342 1320 22 1617
## 9 2013 1 1 1350 1329 21 1504
## 10 2013 1 1 1355 1356 -1 1646
## # ... with 170,987 more rows, and 16 more variables: sched_arr_time <int>,
## # arr_delay <dbl>, carrier <chr>, flight <int>, tailnum <chr>,
## # origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## # minute <dbl>, time_hour <dttm>, dep_min <dbl>, arr_min <dbl>,
## # ground_min <dbl>, total_ground_min <dbl>
Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.