sharlagelfand · February 25, 2020 20:09
diff --git a/complete() without fill b/complete() without fill
 library(tidyr)

 df <- tribble(
  ~Date, ~Fruit, ~Sold,
  "2020-02-01", "Apple", 5,
  "2020-02-01", "Banana", 1,
  "2020-02-02", "Apple", 2
 )

 df_complete <- df %>%
  complete(Date, Fruit)

 df_complete
 #> # A tibble: 4 x 3
 #>   Date       Fruit   Sold
 #>   <chr>      <chr>  <dbl>
 #> 1 2020-02-01 Apple      5
 #> 2 2020-02-01 Banana     1
 #> 3 2020-02-02 Apple      2
 #> 4 2020-02-02 Banana    NA

 df_complete[is.na(df_complete$Sold), "Sold"] <- 0

 df_complete
 #> # A tibble: 4 x 3
 #>   Date       Fruit   Sold
 #>   <chr>      <chr>  <dbl>
 #> 1 2020-02-01 Apple      5
 #> 2 2020-02-01 Banana     1
 #> 3 2020-02-02 Apple      2
 #> 4 2020-02-02 Banana     0

 # What if there are more columns to complete? Gets cumbersome.

 df_more <- tribble(
  ~Date, ~Fruit, ~Sold, ~Ate, ~Garbage,
  "2020-02-01", "Apple", 5, 1, 4,
  "2020-02-01", "Banana", 1, 2, 1, 
  "2020-02-02", "Apple", 2, 1, 1
 )

 df_more_complete <- df_more %>%
  complete(Date, Fruit)

 df_more_complete[is.na(df_more_complete$Sold), "Sold"] <- 0
 df_more_complete[is.na(df_more_complete$Ate), "Ate"] <- 0
 df_more_complete[is.na(df_more_complete$Garbage), "Garbage"] <- 0

 df_more_complete
 #> # A tibble: 4 x 5
 #>   Date       Fruit   Sold   Ate Garbage
 #>   <chr>      <chr>  <dbl> <dbl>   <dbl>
 #> 1 2020-02-01 Apple      5     1       4
 #> 2 2020-02-01 Banana     1     2       1
 #> 3 2020-02-02 Apple      2     1       1
 #> 4 2020-02-02 Banana     0     0       0

 # Or you could just do that all within complete()

 df_more_complete <- df_more %>%
  complete(Date, Fruit, fill = list(Sold = 0, Ate = 0, Garbage = 0))

 df_more_complete
 #> # A tibble: 4 x 5
 #>   Date       Fruit   Sold   Ate Garbage
 #>   <chr>      <chr>  <dbl> <dbl>   <dbl>
 #> 1 2020-02-01 Apple      5     1       4
 #> 2 2020-02-01 Banana     1     2       1
 #> 3 2020-02-02 Apple      2     1       1
 #> 4 2020-02-02 Banana     0     0       0

 # What if you want to do a pipeline?
 library(dplyr, warn.conflicts = FALSE)

 df %>%
  complete(Date, Fruit, fill = list(Sold = 0)) %>%
  group_by(Date) %>%
  summarise(Total = sum(Sold))
 #> # A tibble: 2 x 2
 #>   Date       Total
 #>   <chr>      <dbl>
 #> 1 2020-02-01     6
 #> 2 2020-02-02     2
           
 # If you do is.na() etc, have to break it:

 df_na_filled <- df %>%
  complete(Date, Fruit)

 df_na_filled[is.na(df_na_filled$Sold), "Sold"] <- 0

 df_na_filled %>%
  group_by(Date) %>%
  summarise(Total = sum(Sold))
 #> # A tibble: 2 x 2
 #>   Date       Total
 #>   <chr>      <dbl>
 #> 1 2020-02-01     6
 #> 2 2020-02-02     2
	library(tidyr)

	df <- tribble(
	~Date, ~Fruit, ~Sold,
	"2020-02-01", "Apple", 5,
	"2020-02-01", "Banana", 1,
	"2020-02-02", "Apple", 2
	)

	df_complete <- df %>%
	complete(Date, Fruit)

	df_complete
	#> # A tibble: 4 x 3
	#> Date Fruit Sold
	#> <chr> <chr> <dbl>
	#> 1 2020-02-01 Apple 5
	#> 2 2020-02-01 Banana 1
	#> 3 2020-02-02 Apple 2
	#> 4 2020-02-02 Banana NA

	df_complete[is.na(df_complete$Sold), "Sold"] <- 0

	df_complete
	#> # A tibble: 4 x 3
	#> Date Fruit Sold
	#> <chr> <chr> <dbl>
	#> 1 2020-02-01 Apple 5
	#> 2 2020-02-01 Banana 1
	#> 3 2020-02-02 Apple 2
	#> 4 2020-02-02 Banana 0

	# What if there are more columns to complete? Gets cumbersome.

	df_more <- tribble(
	~Date, ~Fruit, ~Sold, ~Ate, ~Garbage,
	"2020-02-01", "Apple", 5, 1, 4,
	"2020-02-01", "Banana", 1, 2, 1,
	"2020-02-02", "Apple", 2, 1, 1
	)

	df_more_complete <- df_more %>%
	complete(Date, Fruit)

	df_more_complete[is.na(df_more_complete$Sold), "Sold"] <- 0
	df_more_complete[is.na(df_more_complete$Ate), "Ate"] <- 0
	df_more_complete[is.na(df_more_complete$Garbage), "Garbage"] <- 0

	df_more_complete
	#> # A tibble: 4 x 5
	#> Date Fruit Sold Ate Garbage
	#> <chr> <chr> <dbl> <dbl> <dbl>
	#> 1 2020-02-01 Apple 5 1 4
	#> 2 2020-02-01 Banana 1 2 1
	#> 3 2020-02-02 Apple 2 1 1
	#> 4 2020-02-02 Banana 0 0 0

	# Or you could just do that all within complete()

	df_more_complete <- df_more %>%
	complete(Date, Fruit, fill = list(Sold = 0, Ate = 0, Garbage = 0))

	df_more_complete
	#> # A tibble: 4 x 5
	#> Date Fruit Sold Ate Garbage
	#> <chr> <chr> <dbl> <dbl> <dbl>
	#> 1 2020-02-01 Apple 5 1 4
	#> 2 2020-02-01 Banana 1 2 1
	#> 3 2020-02-02 Apple 2 1 1
	#> 4 2020-02-02 Banana 0 0 0

	# What if you want to do a pipeline?
	library(dplyr, warn.conflicts = FALSE)

	df %>%
	complete(Date, Fruit, fill = list(Sold = 0)) %>%
	group_by(Date) %>%
	summarise(Total = sum(Sold))
	#> # A tibble: 2 x 2
	#> Date Total
	#> <chr> <dbl>
	#> 1 2020-02-01 6
	#> 2 2020-02-02 2

	# If you do is.na() etc, have to break it:

	df_na_filled <- df %>%
	complete(Date, Fruit)

	df_na_filled[is.na(df_na_filled$Sold), "Sold"] <- 0

	df_na_filled %>%
	group_by(Date) %>%
	summarise(Total = sum(Sold))
	#> # A tibble: 2 x 2
	#> Date Total
	#> <chr> <dbl>
	#> 1 2020-02-01 6
	#> 2 2020-02-02 2