This function models missingness within a binary variable using logistic regression, adjusted for a specified list of confounders (only categorical variables are supported). It returns a structured output that can be directly tabulated. P values from Wald and Likelihood tests are also returned. Odds ratios are computed and presented with their 95 intervals.
model_variable_missingness(df = NULL, var = NULL, confounders = NULL, confounder_names = NULL, conf_int_sep = ", ")
df | A dataframe containing cleaned ETS data as produced by |
---|---|
var | A character string indicating the name of the variable to explore missingness within. |
confounders | A character vector containing the programmatic names of variables to use as confounders. All variables must be categorical. |
confounder_names | A character vector containing presentation names of variables. If not supplied will default
to |
conf_int_sep | A character string indicating the separator to use for confidence intervals |
A dataframe containing data and model estimates for the specified variable and confounders.
## Code model_variable_missingness#> function (df = NULL, var = NULL, confounders = NULL, confounder_names = NULL, #> conf_int_sep = ", ") #> { #> estimate <- NULL #> conf.low <- NULL #> conf.high <- NULL #> p.value <- NULL #> std.error <- NULL #> statistic <- NULL #> key <- NULL #> `Odds Ratio` <- NULL #> Variable <- NULL #> dummy <- NULL #> `P value (LRT)` <- NULL #> `P value (Wald)` <- NULL #> if (is.null(confounder_names)) { #> confounder_names <- confounders #> } #> df <- df %>% select_at(.vars = c(var, confounders)) %>% mutate_at(.vars = var, #> ~case_when(is.na(.) ~ "Missing", TRUE ~ "Complete") %>% #> factor(levels = c("Complete", "Missing"))) %>% drop_na() #> model <- glm(as.formula(paste0(var, " ~ .")), data = df, #> family = binomial(link = "logit")) #> table <- model %>% tidy(conf.int = TRUE) %>% mutate_at(.vars = c("estimate", #> "conf.low", "conf.high"), exp) %>% slice(-1) %>% mutate(`Odds Ratio` = pretty_ci(estimate, #> conf.low, conf.high, sep = conf_int_sep), `P value (Wald)` = signif(p.value, #> 3)) %>% select(-estimate, -std.error, -statistic, -p.value, #> -conf.low, -conf.high) #> data <- map2_dfr(confounders, confounder_names, ~group_by(df, #> .dots = c(.x, var)) %>% count %>% group_by(.dots = .x) %>% #> add_count(wt = n, name = "nn") %>% filter_at(.vars = var, #> all_vars(. == "Missing")) %>% select(-contains(var)) %>% #> drop_na %>% ungroup %>% rename_if(is.factor, ~paste0("Category")) %>% #> mutate_if(is.factor, as.character) %>% mutate(Variable = .y) %>% #> mutate(key = paste0(.x, Category)) %>% mutate(Missing = pretty_round(n/nn * #> 100, 1)) %>% mutate(Missing = paste0(Missing, "% (", #> n, ")")) %>% select(Variable, Category, `Missing (N)` = Missing, #> Notifications = nn, key)) #> lik_tests <- tibble(Variable = confounder_names, `P value (LRT)` = map_dbl(confounders, #> ~anova(model, update(model, paste0(". ~ . - ", .)), test = "LRT")$`Pr(>Chi)`[2]) %>% #> signif(3)) #> output <- data %>% left_join(table, by = c(key = "term")) %>% #> select(-key) %>% mutate(`Odds Ratio` = `Odds Ratio` %>% #> replace_na("Reference")) %>% mutate(dummy = Variable) %>% #> group_by(dummy) %>% mutate(Variable = c(Variable[1], #> rep("", n() - 1))) %>% ungroup %>% select(-dummy) %>% #> left_join(lik_tests, by = "Variable") %>% mutate(`P value (LRT)` = `P value (LRT)` %>% #> replace_na("")) %>% mutate(`P value (Wald)` = `P value (Wald)` %>% #> replace_na("")) #> colnames(output) <- colnames(output) %>% str_replace("Notifications", #> paste0("Notifications (", nrow(df), ")")) #> return(output) #> } #> <bytecode: 0x55630cba1978> #> <environment: namespace:ETSMissing>