Last active
February 25, 2020 20:09
-
-
Save sharlagelfand/d6b1af70675c9d3ae07276c0612b1167 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
library(tidyr) | |
df <- tribble( | |
~Date, ~Fruit, ~Sold, | |
"2020-02-01", "Apple", 5, | |
"2020-02-01", "Banana", 1, | |
"2020-02-02", "Apple", 2 | |
) | |
df_complete <- df %>% | |
complete(Date, Fruit) | |
df_complete | |
#> # A tibble: 4 x 3 | |
#> Date Fruit Sold | |
#> <chr> <chr> <dbl> | |
#> 1 2020-02-01 Apple 5 | |
#> 2 2020-02-01 Banana 1 | |
#> 3 2020-02-02 Apple 2 | |
#> 4 2020-02-02 Banana NA | |
df_complete[is.na(df_complete$Sold), "Sold"] <- 0 | |
df_complete | |
#> # A tibble: 4 x 3 | |
#> Date Fruit Sold | |
#> <chr> <chr> <dbl> | |
#> 1 2020-02-01 Apple 5 | |
#> 2 2020-02-01 Banana 1 | |
#> 3 2020-02-02 Apple 2 | |
#> 4 2020-02-02 Banana 0 | |
# What if there are more columns to complete? Gets cumbersome. | |
df_more <- tribble( | |
~Date, ~Fruit, ~Sold, ~Ate, ~Garbage, | |
"2020-02-01", "Apple", 5, 1, 4, | |
"2020-02-01", "Banana", 1, 2, 1, | |
"2020-02-02", "Apple", 2, 1, 1 | |
) | |
df_more_complete <- df_more %>% | |
complete(Date, Fruit) | |
df_more_complete[is.na(df_more_complete$Sold), "Sold"] <- 0 | |
df_more_complete[is.na(df_more_complete$Ate), "Ate"] <- 0 | |
df_more_complete[is.na(df_more_complete$Garbage), "Garbage"] <- 0 | |
df_more_complete | |
#> # A tibble: 4 x 5 | |
#> Date Fruit Sold Ate Garbage | |
#> <chr> <chr> <dbl> <dbl> <dbl> | |
#> 1 2020-02-01 Apple 5 1 4 | |
#> 2 2020-02-01 Banana 1 2 1 | |
#> 3 2020-02-02 Apple 2 1 1 | |
#> 4 2020-02-02 Banana 0 0 0 | |
# Or you could just do that all within complete() | |
df_more_complete <- df_more %>% | |
complete(Date, Fruit, fill = list(Sold = 0, Ate = 0, Garbage = 0)) | |
df_more_complete | |
#> # A tibble: 4 x 5 | |
#> Date Fruit Sold Ate Garbage | |
#> <chr> <chr> <dbl> <dbl> <dbl> | |
#> 1 2020-02-01 Apple 5 1 4 | |
#> 2 2020-02-01 Banana 1 2 1 | |
#> 3 2020-02-02 Apple 2 1 1 | |
#> 4 2020-02-02 Banana 0 0 0 | |
# What if you want to do a pipeline? | |
library(dplyr, warn.conflicts = FALSE) | |
df %>% | |
complete(Date, Fruit, fill = list(Sold = 0)) %>% | |
group_by(Date) %>% | |
summarise(Total = sum(Sold)) | |
#> # A tibble: 2 x 2 | |
#> Date Total | |
#> <chr> <dbl> | |
#> 1 2020-02-01 6 | |
#> 2 2020-02-02 2 | |
# If you do is.na() etc, have to break it: | |
df_na_filled <- df %>% | |
complete(Date, Fruit) | |
df_na_filled[is.na(df_na_filled$Sold), "Sold"] <- 0 | |
df_na_filled %>% | |
group_by(Date) %>% | |
summarise(Total = sum(Sold)) | |
#> # A tibble: 2 x 2 | |
#> Date Total | |
#> <chr> <dbl> | |
#> 1 2020-02-01 6 | |
#> 2 2020-02-02 2 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment