Select first five variables

flights %>%
  select(year, month, day, dep_time, sched_dep_time)
## # A tibble: 336,776 x 5
##     year month   day dep_time sched_dep_time
##    <int> <int> <int>    <int>          <int>
##  1  2013     1     1      517            515
##  2  2013     1     1      533            529
##  3  2013     1     1      542            540
##  4  2013     1     1      544            545
##  5  2013     1     1      554            600
##  6  2013     1     1      554            558
##  7  2013     1     1      555            600
##  8  2013     1     1      557            600
##  9  2013     1     1      557            600
## 10  2013     1     1      558            600
## # ... with 336,766 more rows
flights %>%
  select(year:sched_dep_time)
## # A tibble: 336,776 x 5
##     year month   day dep_time sched_dep_time
##    <int> <int> <int>    <int>          <int>
##  1  2013     1     1      517            515
##  2  2013     1     1      533            529
##  3  2013     1     1      542            540
##  4  2013     1     1      544            545
##  5  2013     1     1      554            600
##  6  2013     1     1      554            558
##  7  2013     1     1      555            600
##  8  2013     1     1      557            600
##  9  2013     1     1      557            600
## 10  2013     1     1      558            600
## # ... with 336,766 more rows
## Numeric indexes work, too

flights %>%
  select(1:5)
## # A tibble: 336,776 x 5
##     year month   day dep_time sched_dep_time
##    <int> <int> <int>    <int>          <int>
##  1  2013     1     1      517            515
##  2  2013     1     1      533            529
##  3  2013     1     1      542            540
##  4  2013     1     1      544            545
##  5  2013     1     1      554            600
##  6  2013     1     1      554            558
##  7  2013     1     1      555            600
##  8  2013     1     1      557            600
##  9  2013     1     1      557            600
## 10  2013     1     1      558            600
## # ... with 336,766 more rows

Exclude the date

flights %>%
  select(-year, -month, -day)
## # A tibble: 336,776 x 16
##    dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay
##       <int>          <int>     <dbl>    <int>          <int>     <dbl>
##  1      517            515         2      830            819        11
##  2      533            529         4      850            830        20
##  3      542            540         2      923            850        33
##  4      544            545        -1     1004           1022       -18
##  5      554            600        -6      812            837       -25
##  6      554            558        -4      740            728        12
##  7      555            600        -5      913            854        19
##  8      557            600        -3      709            723       -14
##  9      557            600        -3      838            846        -8
## 10      558            600        -2      753            745         8
## # ... with 336,766 more rows, and 10 more variables: carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
flights %>%
  select(-year:-day)
## # A tibble: 336,776 x 16
##    dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay
##       <int>          <int>     <dbl>    <int>          <int>     <dbl>
##  1      517            515         2      830            819        11
##  2      533            529         4      850            830        20
##  3      542            540         2      923            850        33
##  4      544            545        -1     1004           1022       -18
##  5      554            600        -6      812            837       -25
##  6      554            558        -4      740            728        12
##  7      555            600        -5      913            854        19
##  8      557            600        -3      709            723       -14
##  9      557            600        -3      838            846        -8
## 10      558            600        -2      753            745         8
## # ... with 336,766 more rows, and 10 more variables: carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>
## Numeric indexes work, too

flights %>%
  select(-1:-3)
## # A tibble: 336,776 x 16
##    dep_time sched_dep_time dep_delay arr_time sched_arr_time arr_delay
##       <int>          <int>     <dbl>    <int>          <int>     <dbl>
##  1      517            515         2      830            819        11
##  2      533            529         4      850            830        20
##  3      542            540         2      923            850        33
##  4      544            545        -1     1004           1022       -18
##  5      554            600        -6      812            837       -25
##  6      554            558        -4      740            728        12
##  7      555            600        -5      913            854        19
##  8      557            600        -3      709            723       -14
##  9      557            600        -3      838            846        -8
## 10      558            600        -2      753            745         8
## # ... with 336,766 more rows, and 10 more variables: carrier <chr>,
## #   flight <int>, tailnum <chr>, origin <chr>, dest <chr>, air_time <dbl>,
## #   distance <dbl>, hour <dbl>, minute <dbl>, time_hour <dttm>

Departure variables

## contains()

flights %>% 
  select(contains("dep_"))
## # A tibble: 336,776 x 3
##    dep_time sched_dep_time dep_delay
##       <int>          <int>     <dbl>
##  1      517            515         2
##  2      533            529         4
##  3      542            540         2
##  4      544            545        -1
##  5      554            600        -6
##  6      554            558        -4
##  7      555            600        -5
##  8      557            600        -3
##  9      557            600        -3
## 10      558            600        -2
## # ... with 336,766 more rows

Move departure variables to end

## everything()

flights %>%
  select(-contains("dep_"), everything(), contains("dep_"))
## # A tibble: 336,776 x 19
##     year month   day arr_time sched_arr_time arr_delay carrier flight
##    <int> <int> <int>    <int>          <int>     <dbl> <chr>    <int>
##  1  2013     1     1      830            819        11 UA        1545
##  2  2013     1     1      850            830        20 UA        1714
##  3  2013     1     1      923            850        33 AA        1141
##  4  2013     1     1     1004           1022       -18 B6         725
##  5  2013     1     1      812            837       -25 DL         461
##  6  2013     1     1      740            728        12 UA        1696
##  7  2013     1     1      913            854        19 B6         507
##  8  2013     1     1      709            723       -14 EV        5708
##  9  2013     1     1      838            846        -8 B6          79
## 10  2013     1     1      753            745         8 AA         301
## # ... with 336,766 more rows, and 11 more variables: tailnum <chr>,
## #   origin <chr>, dest <chr>, air_time <dbl>, distance <dbl>, hour <dbl>,
## #   minute <dbl>, time_hour <dttm>, dep_time <int>, sched_dep_time <int>,
## #   dep_delay <dbl>

Contour plot

## Overriding label names

flights %>% 
  ggplot() +
  geom_density2d(aes(dep_time, arr_time)) +
  scale_x_continuous(name = "Departure time") +
  scale_x_continuous(name = "Arrival time")
## Scale for 'x' is already present. Adding another scale for 'x', which
## will replace the existing scale.
## Warning: Removed 8713 rows containing non-finite values (stat_density2d).

## Renaming in the data

flights %>% 
  rename(`Departure time` = dep_time) %>%
  rename(`Arrival time` = arr_time) %>%
  ggplot() +
  geom_density2d(aes(`Departure time`, `Arrival time`))
## Warning: Removed 8713 rows containing non-finite values (stat_density2d).

flights %>% 
  filter(arr_time < 500) %>%
  rename(`Departure time` = dep_time) %>%
  rename(`Arrival time` = arr_time) %>%
  ggplot() +
  geom_density2d(aes(`Departure time`, `Arrival time`))

## Use coord_fixed() for fixing axes
flights %>% 
  filter(arr_time < 500) %>%
  rename(`Departure time` = dep_time) %>%
  rename(`Arrival time` = arr_time) %>%
  ggplot() +
  geom_density2d(aes(`Departure time`, `Arrival time`)) +
  coord_fixed()

Copyright © 2018 Kirill Müller. Licensed under CC BY-NC 4.0.