Performs data preprocessing required to prepare the input data for the model.
setup_model_input(run_time = NULL, time_scale_numeric = 1)
run_time | Numeric, the run time of the model |
---|---|
time_scale_numeric | Numeric the time scale to use (with 1 being a year, 12 a month etc.). Defaults to 1. |
A named list of data inputs required by the model.
## Code setup_model_input#> function(run_time = NULL, time_scale_numeric = 1) { #> #> ## Set up initial population distribution #> pop_dist <- ModelTBBCGEngland::england_demographics %>% #> dplyr::filter(CoB == "UK born") %>% #> group_by(year) %>% #> mutate(age = 0:(n() - 1)) %>% #> group_by(age) %>% #> summarise(value = mean(proportion_age_by_year)) %>% #> select(age, value) #> #> ## Set up births scaling for time horizon #> t_births <- ModelTBBCGEngland::births %>% #> dplyr::filter(year >= 1931) %>% #> mutate(time = year - 1931) %>% #> mutate(time_n = map(time, ~ tibble(time_n = time_scale_numeric * . + 0:(time_scale_numeric - 1)))) %>% #> unnest() %>% #> mutate(value = births / time_scale_numeric) %>% #> select(time = time_n, value) %>% #> dplyr::filter(time <= run_time * time_scale_numeric) #> #> ## Set up expected lifespan #> exp_life_span <- ModelTBBCGEngland::mortality_rates %>% #> mutate(time = year - 1931, #> value = exp_life_span * time_scale_numeric) %>% #> group_by(time) %>% #> mutate(age = 0:(n() - 1)) %>% #> ungroup %>% #> mutate(time_n = map(time, ~ tibble(time_n = time_scale_numeric * . + 0:(time_scale_numeric - 1)))) %>% #> unnest() %>% #> select(time = time_n, age, value) %>% #> dplyr::filter(time <= run_time * time_scale_numeric) #> #> ## Set up Polymod contacts #> polymod <- ModelTBBCGEngland::contact %>% #> arrange(age_x, age_y) %>% #> group_by(age_x) %>% #> mutate(age2 = 0:(n() - 1)) %>% #> group_by(age_y) %>% #> mutate(age = 0:(n() - 1)) %>% #> ungroup %>% #> mutate_at(.vars = vars(mean, sd), #> ~ . / time_scale_numeric) %>% #> select(age, age2, mean, sd) #> #> ## Mean contacts #> polymod_mean <- polymod %>% #> select(age, age2, value = mean) #> #> ## SD of contacts #> polymod_sd <- polymod %>% #> select(age, age2, value = sd) #> #> ## Calculate mean contacts per person weighted by age distribution of the population #> avg_contacts <- polymod_mean %>% #> count(age, wt = value) %>% #> mutate(n = n * pop_dist$value) %>% #> count(wt = n) %>% #> pull(n) %>% #> round(0) #> #> ## Extact non-UK born pulmonary cases - estimate previous cases in the model #> nonukborn_p_cases <- ModelTBBCGEngland::incidence %>% #> dplyr::filter(ukborn == "Non-UK Born", #> pulmextrapulm == "Pulmonary, with or without EP") %>% #> select(-ukborn, -pulmextrapulm, -type, -policy_change) %>% #> mutate(time = year - 1931) %>% #> arrange(time, age_group) %>% #> mutate(age = as.numeric(age_group) - 1) %>% #> select(time, age, incidence) %>% #> count(time, age, wt = incidence) %>% #> rename(value = n) %>% #> mutate(time_n = map(time, ~ tibble(time_n = time_scale_numeric * . + 0:(time_scale_numeric - 1)))) %>% #> unnest() %>% #> mutate(value = value / time_scale_numeric) %>% #> select(time = time_n, age, value) #> #> ## Estimated future non UK born cases using a poisson regression model adjusting for time and age. #> nonukborn_p_cases <- nonukborn_p_cases %>% #> dplyr::filter(time >= (2010 - 1931)) %>% #> nest() %>% #> mutate(model = map(data, ~ glm(value ~ time + factor(age),, family = poisson, data = .))) %>% #> mutate(new_data = map(data, ~ expand.grid(age = min(.$age):max(.$age), time = c(max(.$time) + 1):(2100 - 1931)))) %>% #> mutate(pred_cases = map2(model, new_data, ~ tibble(value = predict(.x, .y, type = "response")))) %>% #> mutate(pred_data = map2(new_data, pred_cases, ~ bind_cols(.x, .y))) %>% #> mutate(all_data = map(pred_data, ~ bind_rows(nonukborn_p_cases, .))) %>% #> select(all_data) %>% #> unnest(all_data) #> #> #> ## Non UK born cases in 2000 - used to estimate historic non UK born cases #> NUKCases2000 <- nonukborn_p_cases %>% #> dplyr::filter(time == time_scale_numeric * (2000 - 1931)) %>% #> select(-time) #> #> #> ## Distribution UK born cases in 2000 - used to initialise the model #> DistUKCases2000 <- ModelTBBCGEngland::incidence %>% #> dplyr::filter(ukborn == "UK Born") %>% #> dplyr::filter(year == 2000) %>% #> group_by(age_group) %>% #> summarise(value = sum(incidence, na.rm = T)) %>% #> ungroup %>% #> add_count(wt = value) %>% #> mutate(value = value / n) %>% #> mutate(age = as.numeric(age_group) - 1) %>% #> select(age, value) #> #> #> input <- list( #> "pop_dist" = pop_dist, #> "births_input" = t_births, #> "exp_life_span" = exp_life_span, #> "polymod" = polymod_mean, #> "polymod_sd" = polymod_sd, #> "avg_contacts" = avg_contacts, #> "NonUKBornPCases" = nonukborn_p_cases, #> "NUKCases2000" = NUKCases2000, #> "DistUKCases2000" = DistUKCases2000 #> ) #> #> return(input) #> } #> <environment: namespace:ModelTBBCGEngland>## Output setup_model_input(run_time = 80, time_scale_numeric = 1)#> $pop_dist #> # A tibble: 12 x 2 #> age value #> <int> <dbl> #> 1 0 0.0682 #> 2 1 0.0651 #> 3 2 0.0651 #> 4 3 0.0650 #> 5 4 0.0635 #> 6 5 0.0609 #> 7 6 0.0627 #> 8 7 0.0673 #> 9 8 0.0703 #> 10 9 0.0684 #> 11 10 0.232 #> 12 11 0.112 #> #> $births_input #> # A tibble: 81 x 2 #> time value #> <dbl> <dbl> #> 1 0 589751 #> 2 1 573099 #> 3 2 540989 #> 4 3 557686 #> 5 4 559569 #> 6 5 567383 #> 7 6 573382 #> 8 7 583579 #> 9 8 581950 #> 10 9 567710 #> # … with 71 more rows #> #> $exp_life_span #> # A tibble: 972 x 3 #> time age value #> <dbl> <int> <dbl> #> 1 0 0 75.3 #> 2 0 1 777. #> 3 0 2 955. #> 4 0 3 563. #> 5 0 4 793. #> 6 0 5 988. #> 7 0 6 909. #> 8 0 7 635. #> 9 0 8 317. #> 10 0 9 128. #> # … with 962 more rows #> #> $polymod #> # A tibble: 144 x 3 #> age age2 value #> <int> <int> <dbl> #> 1 0 0 700. #> 2 0 1 296. #> 3 0 2 172. #> 4 0 3 110. #> 5 0 4 177. #> 6 0 5 288. #> 7 0 6 325. #> 8 0 7 393. #> 9 0 8 159. #> 10 0 9 99.8 #> # … with 134 more rows #> #> $polymod_sd #> # A tibble: 144 x 3 #> age age2 value #> <int> <int> <dbl> #> 1 0 0 117. #> 2 0 1 32.8 #> 3 0 2 28.5 #> 4 0 3 20.8 #> 5 0 4 34.5 #> 6 0 5 33.7 #> 7 0 6 38.0 #> 8 0 7 68.9 #> 9 0 8 31.6 #> 10 0 9 20.2 #> # … with 134 more rows #> #> $avg_contacts #> [1] 4027 #> #> $NonUKBornPCases #> # A tibble: 1,212 x 3 #> time age value #> <dbl> <dbl> <dbl> #> 1 69 0 15 #> 2 69 1 6 #> 3 69 2 32 #> 4 69 3 92 #> 5 69 4 216 #> 6 69 5 260 #> 7 69 6 245 #> 8 69 7 131 #> 9 69 8 110 #> 10 69 9 84 #> # … with 1,202 more rows #> #> $NUKCases2000 #> # A tibble: 12 x 2 #> age value #> <dbl> <dbl> #> 1 0 15 #> 2 1 6 #> 3 2 32 #> 4 3 92 #> 5 4 216 #> 6 5 260 #> 7 6 245 #> 8 7 131 #> 9 8 110 #> 10 9 84 #> 11 10 330 #> 12 11 154 #> #> $DistUKCases2000 #> # A tibble: 12 x 2 #> age value #> <dbl> <dbl> #> 1 0 0.0416 #> 2 1 0.0327 #> 3 2 0.0416 #> 4 3 0.0621 #> 5 4 0.0738 #> 6 5 0.0643 #> 7 6 0.0815 #> 8 7 0.0549 #> 9 8 0.0460 #> 10 9 0.0582 #> 11 10 0.237 #> 12 11 0.206 #>