R/plot_date_variable_missingness.R
plot_date_variable_missingness.Rd
Plot Missingness for a Date Variable
plot_date_variable_missingness(df = NULL, var = NULL, start_year = NULL, end_year = NULL, split_year = NULL, facet_by_year = TRUE)
df | A dataframe containing cleaned ETS data as produced by |
---|---|
var | A character string indicating the name of the date variable to explore missingness within. |
start_year | Numeric, indicating which year to start including data from (inclusive) |
end_year | Numeric, indicating which year to stop including data from (exclusive) |
split_year | The year to use as a splitting point when facetting. By default uses the mean year. |
facet_by_year | Logical defaults to |
A list of plots including: missing data by month and by day.
## Code plot_date_variable_missingness#> function (df = NULL, var = NULL, start_year = NULL, end_year = NULL, #> split_year = NULL, facet_by_year = TRUE) #> { #> year_facet <- NULL #> notifications <- NULL #> year_strat <- NULL #> nn <- NULL #> df$date <- df[[var]] #> if (is.null(start_year)) { #> start_year <- df$date %>% year %>% min(na.rm = TRUE) #> } #> if (is.null(end_year)) { #> end_year <- df$date %>% year %>% max(na.rm = TRUE) #> } #> df_count <- df %>% filter(year(date) >= start_year, year(date) <= #> end_year) %>% drop_na(date) %>% count(date, .drop = FALSE, #> name = "notifications") %>% mutate(notifications = notifications %>% #> replace_na(0)) #> years_of_data <- df_count$date %>% year() %>% unique() %>% #> as.numeric() #> if (is.null(split_year)) { #> split_year <- years_of_data %>% mean(na.rm = TRUE) %>% #> floor #> } #> df_count <- df_count %>% dplyr::mutate(year_strat = cut(year(date) %>% #> as.integer, breaks = c(min(years_of_data) - 1, split_year, #> max(years_of_data) + 1), labels = c(paste0(min(years_of_data), #> "-", split_year - 1), paste0(split_year, "-", max(years_of_data))), #> right = FALSE)) #> if (facet_by_year) { #> df_count <- df_count %>% group_by(year_strat) #> } #> month_plot <- df_count %>% mutate(date = floor_date(date, #> "month")) %>% count(date, wt = notifications) %>% add_count(year(date), #> wt = n, name = "nn") %>% mutate(n = n/nn) %>% mutate(month = month(date, #> label = TRUE)) %>% ggplot(aes(x = month, y = n)) + geom_violin(draw_quantiles = c(0.25, #> 0.5, 0.75)) + geom_jitter(alpha = 0.2) + scale_y_sqrt(labels = percent) + #> theme_minimal() + labs(x = "Month", y = "Percentage of annual notifications (sqrt)") #> day_plot <- df_count %>% mutate(date = floor_date(date, "day")) %>% #> count(date, wt = notifications) %>% add_count(floor_date(date, #> "month"), wt = n, name = "nn") %>% mutate(n = n/nn) %>% #> mutate(mday = mday(date)) %>% ggplot(aes(x = mday, y = n, #> group = mday)) + geom_violin(draw_quantiles = c(0.25, #> 0.5, 0.75)) + geom_jitter(alpha = 0.05) + scale_y_sqrt(labels = percent) + #> scale_x_continuous(minor_breaks = NULL, breaks = seq(1, #> 31, 2)) + theme_minimal() + labs(x = "Day of the month", #> y = "Percentage of monthly notifications (sqrt)") #> plots <- list(month_plot, day_plot) #> if (facet_by_year) { #> plots <- plots %>% map(~. + facet_wrap(~year_strat, scales = "free_y")) #> } #> names(plots) <- c("by_month", "by_day") #> return(plots) #> } #> <bytecode: 0x55630a8840a0> #> <environment: namespace:ETSMissing>