Plot Missingness for a Date Variable

plot_date_variable_missingness(df = NULL, var = NULL,
  start_year = NULL, end_year = NULL, split_year = NULL,
  facet_by_year = TRUE)



A dataframe containing cleaned ETS data as produced by tbinenglanddataclean.


A character string indicating the name of the date variable to explore missingness within.


Numeric, indicating which year to start including data from (inclusive)


Numeric, indicating which year to stop including data from (exclusive)


The year to use as a splitting point when facetting. By default uses the mean year.


Logical defaults to TRUE. Should the plots be facetted by a year midpoint


A list of plots including: missing data by month and by day.


## Code plot_date_variable_missingness
#> function (df = NULL, var = NULL, start_year = NULL, end_year = NULL, #> split_year = NULL, facet_by_year = TRUE) #> { #> year_facet <- NULL #> notifications <- NULL #> year_strat <- NULL #> nn <- NULL #> df$date <- df[[var]] #> if (is.null(start_year)) { #> start_year <- df$date %>% year %>% min(na.rm = TRUE) #> } #> if (is.null(end_year)) { #> end_year <- df$date %>% year %>% max(na.rm = TRUE) #> } #> df_count <- df %>% filter(year(date) >= start_year, year(date) <= #> end_year) %>% drop_na(date) %>% count(date, .drop = FALSE, #> name = "notifications") %>% mutate(notifications = notifications %>% #> replace_na(0)) #> years_of_data <- df_count$date %>% year() %>% unique() %>% #> as.numeric() #> if (is.null(split_year)) { #> split_year <- years_of_data %>% mean(na.rm = TRUE) %>% #> floor #> } #> df_count <- df_count %>% dplyr::mutate(year_strat = cut(year(date) %>% #> as.integer, breaks = c(min(years_of_data) - 1, split_year, #> max(years_of_data) + 1), labels = c(paste0(min(years_of_data), #> "-", split_year - 1), paste0(split_year, "-", max(years_of_data))), #> right = FALSE)) #> if (facet_by_year) { #> df_count <- df_count %>% group_by(year_strat) #> } #> month_plot <- df_count %>% mutate(date = floor_date(date, #> "month")) %>% count(date, wt = notifications) %>% add_count(year(date), #> wt = n, name = "nn") %>% mutate(n = n/nn) %>% mutate(month = month(date, #> label = TRUE)) %>% ggplot(aes(x = month, y = n)) + geom_violin(draw_quantiles = c(0.25, #> 0.5, 0.75)) + geom_jitter(alpha = 0.2) + scale_y_sqrt(labels = percent) + #> theme_minimal() + labs(x = "Month", y = "Percentage of annual notifications (sqrt)") #> day_plot <- df_count %>% mutate(date = floor_date(date, "day")) %>% #> count(date, wt = notifications) %>% add_count(floor_date(date, #> "month"), wt = n, name = "nn") %>% mutate(n = n/nn) %>% #> mutate(mday = mday(date)) %>% ggplot(aes(x = mday, y = n, #> group = mday)) + geom_violin(draw_quantiles = c(0.25, #> 0.5, 0.75)) + geom_jitter(alpha = 0.05) + scale_y_sqrt(labels = percent) + #> scale_x_continuous(minor_breaks = NULL, breaks = seq(1, #> 31, 2)) + theme_minimal() + labs(x = "Day of the month", #> y = "Percentage of monthly notifications (sqrt)") #> plots <- list(month_plot, day_plot) #> if (facet_by_year) { #> plots <- plots %>% map(~. + facet_wrap(~year_strat, scales = "free_y")) #> } #> names(plots) <- c("by_month", "by_day") #> return(plots) #> } #> <bytecode: 0x55630a8840a0> #> <environment: namespace:ETSMissing>