From 98122220f0ec4b7fd62f7cd6346a1012af093b8f Mon Sep 17 00:00:00 2001
From: Xiang Li <x.li@imperial.ac.uk>
Date: Mon, 30 Apr 2018 11:54:05 +0100
Subject: [PATCH 1/3] moved burden_templates genration script to jenner; did
 test - scucessful locally

---
 DESCRIPTION                            |   3 +-
 R/create_burden_templates.R            | 522 +++++++++++++++++++++++++
 R/util_assert.R                        |   9 +
 tests/testthat/test-burden-templates.R |  21 +
 4 files changed, 554 insertions(+), 1 deletion(-)
 create mode 100644 R/create_burden_templates.R
 create mode 100644 tests/testthat/test-burden-templates.R

diff --git a/DESCRIPTION b/DESCRIPTION
index e0e0d4e..0fb4781 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -11,7 +11,8 @@ Imports:
     RPostgres,
     vaultr,
     whisker,
-    yaml
+    yaml,
+    dplyr
 RoxygenNote: 6.0.1
 Suggests:
     RSQLite,
diff --git a/R/create_burden_templates.R b/R/create_burden_templates.R
new file mode 100644
index 0000000..dcac91f
--- /dev/null
+++ b/R/create_burden_templates.R
@@ -0,0 +1,522 @@
+
+create_burden_template <- function(files, files_path, assert_files = c("Country_Disease_Table.csv", "model_meta.csv", "model_outcomes.csv"), central_templates_only = FALSE) {
+
+  assert_has_files(files, assert_files)
+  files <- paste0(files_path, files)
+
+  country_disease_table <- read_csv(files[1])
+  model_meta_data <- read_csv(files[2])
+  model_outcomes <- read_csv(files[3])
+
+  names(country_disease_table)[names(country_disease_table) == "Country"] <-
+    "country_name"
+  names(country_disease_table)[names(country_disease_table) == "ISO3.code"] <-
+    "country"
+
+  ## making sure that the final year makes sense:
+  year_end <- 2030
+  model_meta_data$year_max <- apply(cbind(2100, year_end + model_meta_data$burden_max_age), 1, min)
+  ## write templates
+  output_files <- sapply(seq_along(model_meta_data[, 1]),
+                         function(i) write_template_files(model_meta_data[i, ],
+                                                          year_end,
+                                                          country_disease_table,
+                                                          model_outcomes,
+                                                          central_templates_only))
+}
+
+### From Nick's insert_function.R
+# function adds new rows to dataframe with contents of value_insert
+# to the column column_name
+
+insert_dataframe = function(data,column_name,value_insert){
+
+  length_values <- length(value_insert)
+
+  data$column_name <- value_insert[1]
+
+  cf <- "column_name"
+  names(data)[names(data)%in%cf] <- c(column_name)
+
+  data1 <- data
+
+  for(i in 2:length_values){
+    data1[, column_name] <- value_insert[i]
+    data <- rbind(data,data1)
+  }
+
+  return(data)
+
+}
+
+# inserts country name into every line of data frame for each country iso code
+
+country_function <- function(k,country_n,country_ISOcode,country_data){
+
+  country_name_rep <- rep(country_n[k],length(country_data[country_data$country == country_ISOcode[k], ]$country))
+  return(country_name_rep)
+
+}
+
+# outer wrapper function that produces the csv files
+# that selects the models associated with each disease
+
+model_select_function <- function(j){
+
+  model_name <- unique(model_meta_data[model_meta_data$disease == disease_name[j], ]$modelling_group)
+
+  sapply(1:length(model_name),generate_csv,model_name,j)
+
+}
+
+# inner wrapper function that produces the csv files
+# this function has no return
+
+generate_csv <- function(i,model_name,j){
+
+  country_data <- data.frame(disease = disease_name[j])
+
+  age_min <- model_meta_data[model_meta_data$disease == disease_name[j] & model_meta_data$modelling_group == model_name[i], ]$burden_min_age
+  age_max <- model_meta_data[model_meta_data$disease == disease_name[j] & model_meta_data$modelling_group == model_name[i], ]$burden_max_age
+  year_start <- model_meta_data[model_meta_data$disease == disease_name[j] & model_meta_data$modelling_group == model_name[i], ]$year_min
+
+  country_ISOcode <- unique(country_meta_data[country_meta_data$vaccine == disease_name[j], ]$country)
+  country_n <- country_meta_data[country_meta_data$vaccine == disease_name[j], ]$country_name
+
+  country_data$run_id <- ""
+
+  if(disease_name[j] == "HPV"){
+
+    age_values <- c(age_min:age_max)
+    age_column_name<-c("age")
+    year_values <- c(year_start:year_end)
+    year_column_name<-c("year")
+
+    country_data <- insert_dataframe(country_data,year_column_name,year_values)
+
+    country_data <- insert_dataframe(country_data,age_column_name,age_values)
+
+    age_min_k <- age_min + 1
+
+    for(k in age_min_k:age_max){
+
+      country_data1<-HPV_generate_csv(k,j,disease_name,age_min,age_max,year_end)
+
+      country_data <- rbind(country_data,country_data1)
+      rm(country_data1)
+
+      country_data <- country_data[order(country_data$age),]
+    }
+
+  }
+
+  if(disease_name[j] != "HPV"){
+
+    year_till <- year_end + age_max
+    if(year_till > 2100){year_till <- 2100}
+
+    age_values <- c(age_min:age_max)
+    age_column_name<-c("age")
+    year_values <- c(year_start:year_till)
+    year_column_name<-c("year")
+
+    country_data <- insert_dataframe(country_data,year_column_name,year_values)
+
+    country_data <- insert_dataframe(country_data,age_column_name,age_values)
+
+  }
+
+  country_column_name <-c("country")
+
+  country_data <- insert_dataframe(country_data,country_column_name,country_ISOcode)
+
+  b<-lapply(1:length(country_n),country_function,country_n,country_ISOcode,country_data)
+
+  country_data$country_name <- unlist(b)
+
+  outcome_name <- unique(model_outcomes[model_outcomes$disease == "standard" & model_outcomes$model == "standard", ]$outcome)
+
+  if(disease_name[j] == "Hib3" & model_name[i] == "JHU-Tam"){
+    outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
+  }
+
+  if(disease_name[j] == "HepB" & model_name[i] == "IC-Hallett"){
+    outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
+  }
+
+  if(disease_name[j] == "HepB" & model_name[i] == "CDA-Razavi"){
+    outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
+  }
+
+  if(disease_name[j] == "HepB" & model_name[i] == "Li"){
+    outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
+  }
+
+  if(disease_name[j] == "Rubella" & model_name[i] == "PHE-Vynnycky"){
+    outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
+  }
+
+  for(l in seq_along(outcome_name)){
+    country_data[, paste0(outcome_name[l])] <- ""
+  }
+
+  country_data_stochastic <- country_data
+
+  country_data <- country_data[, !(names(country_data) %in% "run_id")]
+
+  title_stochastic <- paste0("stochastic_burden_template_",disease_name[j],"-",model_name[i],".csv")
+  title <- paste0("central_burden_template_",disease_name[j],"-",model_name[i],".csv")
+
+  write.csv(country_data_stochastic, file = title_stochastic, row.names = FALSE)
+  write.csv(country_data, file = title, row.names = FALSE)
+
+  return()
+
+}
+
+# function that generates the triangular form of the data (between 2031 to around 2130)
+# needed for the HPV burden estimate templates
+
+HPV_generate_csv <- function(k,j,disease_name,age_min,age_max,year_end){
+
+  country_data1 <- data.frame(disease = disease_name[j])
+  country_data1$run_id <- ""
+
+  age_values <- c(k:age_max)
+  age_column_name<-c("age")
+  year_values <- c(year_end-age_min+k)
+  year_column_name<-c("year")
+
+  country_data1$year <- year_values
+
+  if(length(age_values)>1){
+    country_data1 <- insert_dataframe(country_data1,age_column_name,age_values)
+  }
+
+  if(length(age_values)==1){
+    country_data1$age <- age_max
+  }
+
+  return(country_data1)
+
+}
+
+### From Nick's country_function
+
+get_countries_for_model <- function(model_meta_line,
+                                    country_disease_table) {
+
+  disease_curr <-  model_meta_line$disease
+  modelling_group_curr <- model_meta_line$modelling_group
+
+  sel <- country_disease_table[, grep(disease_curr,
+                                      names(country_disease_table))] == "Y"
+
+  countries_for_model <- cbind(disease = disease_curr,
+                               modelling_group = modelling_group_curr,
+                               country_disease_table[sel, c("country",
+                                                            "country_name")])
+
+  return(countries_for_model)
+}
+
+make_filled_columns <- function(model_meta_line,
+                                country_disease_table) {
+
+  ## need to generate columns:
+  ## cols_filled = c("disease", "year", "age", "country", "country_name")
+
+  country_df <- get_countries_for_model(model_meta_line,
+                                        country_disease_table)
+
+  ages <- model_meta_line$burden_min_age : model_meta_line$burden_max_age
+  years <- model_meta_line$year_min : model_meta_line$year_max
+
+  out <- expand.grid(age = ages, year = years, country = country_df$country)
+  out <- cbind(disease = model_meta_line$disease, out)
+  out$country_name <-
+    country_df$country_name[match(out$country, country_df$country)]
+  ## NOTE: The order here is very important - upload will not work if
+  ## it is not correct.
+  out[c("disease", "year", "age", "country", "country_name")]
+}
+
+find_model_outcomes <- function(disease_curr,
+                                modelling_group_curr,
+                                model_outcomes) {
+
+  outcomes_curr <- model_outcomes %>%
+    filter(disease == disease_curr & model == modelling_group_curr) %>%
+    select(outcome)
+  if(nrow(outcomes_curr) == 0) { ## use the standard outcomes:
+    outcomes_curr <- model_outcomes %>%
+      filter(disease == "standard" & model == "standard") %>%
+      select(outcome)
+  }
+  return(outcomes_curr)
+}
+
+
+write_template_files <- function(model_meta_line,
+                                 year_end,
+                                 country_disease_table,
+                                 model_outcomes,
+                                 central_templates_only = FALSE) {
+
+  out <- make_filled_columns(model_meta_line,
+                             country_disease_table)
+
+  ## knocking out the bottom triangle post 2030 - needs to be done for
+  ## List, Trivac and HPV:
+  if(model_meta_line$disease == "HPV" |
+     model_meta_line$model_name %in% c("TRIVAC", "LiST")) {
+    out <- knock_out_triangle(out, model_meta_line, year_end)
+  }
+
+  outcomes_curr <- find_model_outcomes(model_meta_line$disease,
+                                       model_meta_line$modelling_group,
+                                       model_outcomes)
+  if (any(duplicated(outcomes_curr$outcome))) {
+    stop("Duplicated model outcomes")
+  }
+
+  tmp <- matrix("", nrow = nrow(out), ncol = nrow(outcomes_curr))
+  colnames(tmp) <- outcomes_curr$outcome
+  out <- cbind(out, tmp)
+
+  expected <- c("disease", "year", "age", "country", "country_name",
+                "cohort_size")
+  if (!identical(names(out)[seq_along(expected)], expected)) {
+    stop("incorrect template headers")
+  }
+
+  ## writing the central template file:
+  filename1 <- sprintf("central_burden_template_%s-%s.csv",
+                      model_meta_line$disease,
+                      model_meta_line$modelling_group)
+
+  write.csv(out, filename1, row.names = FALSE)
+  if ( !central_templates_only ) {
+    ## adapting the file to the stochastic output:
+    out <- cbind(disease = out$disease, run_id = "", out[, -1])
+    filename2 <- sprintf("stochastic_burden_template_%s-%s.csv",
+                        model_meta_line$disease,
+                        model_meta_line$modelling_group)
+
+    write.csv(out, filename2, row.names = FALSE)
+    return(list(filename1, filename2))
+  } else {
+    return(filename1)
+  }
+}
+
+
+knock_out_triangle <- function(out, model_meta_line, year_end) {
+
+  year_end <- 2030
+  year_birth_last <- year_end - model_meta_line$burden_min_age
+  ## this is good for Trivac:
+  out <- out %>%
+    mutate(year_birth = year - age) %>%
+    filter(year_birth <= year_birth_last) %>%
+    select(disease, year, age, country, country_name)
+  return(out)
+}
+
+#### From Nick's check_function.R
+check_central_columns_filled <- function(dat, cols_filled) {
+  if(sum(is.na(match(cols_filled, names(dat)))) != 0)
+    warning("Not all columns that should be filled in are available.")
+
+  if(sum(is.na(dat[, cols_filled])) != 0)
+    warning("Missing entries in some columns that should be filled in.")
+
+}
+
+check_central_disease <- function(dat, disease_curr) {
+  tt = table(dat$disease, useNA = "ifany")
+  if(length(tt) != 1)
+    warning("More or less than a single disease present.")
+  if(names(tt) != disease_curr)
+    warning("disease in file does not match filename.")
+}
+
+check_central_year <- function(dat,
+                               model_meta,
+                               disease_curr,
+                               modelling_group_curr) {
+
+  year_range <- model_meta %>%
+    select(year_min, year_max)
+
+  if(min(dat$year) != year_range$year_min)
+    warning("First year not as given in model_meta file.")
+  if(max(dat$year) != year_range$year_max)
+    warning("Last year not as given in model_meta file.")
+
+  tt = table(dat$year)
+  if(!all(names(tt) == year_range$year_min : year_range$year_max))
+    warning("Not all years in the specified year range appear.")
+
+  if(length(table(tt)) != 1) {
+
+    tmp <- dat %>%
+      group_by(age, year) %>%
+      summarise(npt = n())
+    g <- ggplot(tmp, aes(x = year, y = age, colour = npt)) +
+      geom_point() +
+      ggtitle(sprintf("%s: %s", modelling_group_curr, disease_curr)) +
+      theme_light()
+    print(g)
+
+    warning("There are not the same number of entries for each year.")
+  }
+}
+
+check_central_age <- function(dat,
+                              model_meta,
+                              disease_curr,
+                              modelling_group_curr) {
+
+  age_range <- model_meta %>%
+    select(burden_min_age, burden_max_age)
+
+  if(min(dat$age) != age_range$burden_min_age)
+    warning("First age not as given in model_meta file.")
+  if(max(dat$age) != age_range$burden_max_age) {
+    warning("Last age not as given in model_meta file.")
+  }
+
+  tt = table(dat$age)
+  if(!all(names(tt) == age_range$burden_min_age : age_range$burden_max_age))
+    warning("Not all years in the specified year range appear.")
+
+  if(length(table(tt)) != 1)
+    warning("There are not the same number of entries for each age.")
+  ## this could still be ok - for instance for HPV.
+
+}
+
+check_central_country <- function(dat,
+                                  model_meta,
+                                  country_disease_table,
+                                  countries_excluded) {
+
+  disease_curr <- model_meta$disease
+  n_countries <- model_meta$number_countries
+
+  country_meta_data <- get_countries_for_model(model_meta,
+                                               country_disease_table,
+                                               countries_excluded)
+
+  countries <- country_meta_data %>%
+    filter(disease == disease_curr) %>%
+    select(country, country_name)
+
+  tt <- table(dat$country)
+  if(length(tt) != n_countries)
+    warning(sprintf("Number of countries = %d, should be %d.",
+                    length(tt), n_countries))
+
+  ## to the countries match what's pulled of Montagu?
+  if(sum(is.na(match(names(tt), countries$country))) != 0)
+    warning("There are countries in the .csv file that are not listed on Montagu for this disease and touchstone.")
+  if(sum(is.na(match(countries$country, names(tt)))) != 0)
+    warning("There are countries listed in Montagu for this disease and touchstone that are missing in the .csv.")
+
+  if(length(table(tt)) != 1)
+    warning("The number of rows per country varies between countries.")
+
+  ## checking country names and country ISO codes:
+  ttn <- table(table(dat$country, dat$country_name))
+  ## this should be mostly 0, but have n_country entries of some other number.
+  if(names(ttn)[1] != "0" | ttn[2] != n_countries)
+    warning("The country names and countries don't match up.")
+
+
+}
+################################################################################
+compare_central_stochastic_columns <- function(dat, dat_stochastic) {
+
+  ## comparing columns of the central and stochastic files:
+  mm = match(names(dat_stochastic), names(dat))
+  if(sum(is.na(mm)) != 1) {
+    warning("Columns in the central and stochastic files do not match up.")
+  } else {
+    new_col <- names(dat_stochastic[is.na(mm)])
+    if(new_col != "run_id")
+      warning("Additional column in stochastic file is `%s`, not `run_id`.")
+  }
+
+}
+
+compare_central_stochastic_rows <- function(dat, dat_stochastic, cols_filled) {
+
+  if(nrow(dat) != nrow(dat_stochastic))
+    warning("number of rows in central and stochastic files does not match.")
+
+
+  if(!all(dat_stochastic[, cols_filled] == dat[, cols_filled]))
+    warning("stochastic and central files are not aligned.")
+
+
+}
+
+################################################################################
+check_model_files <- function(model_meta_line,
+                              country_disease_table,
+                              countries_excluded) {
+
+  disease_curr <- model_meta_line$disease
+  modelling_group_curr <- model_meta_line$modelling_group
+
+  ## reading in both central and stochastic burden template files:
+  file_central <- sprintf("central_burden_template_%s-%s.csv",
+                          disease_curr, modelling_group_curr)
+  file_stochastic <- sprintf("stochastic_burden_template_%s-%s.csv",
+                             disease_curr, modelling_group_curr)
+
+  dat <- read.csv(file_central, stringsAsFactors = TRUE)
+  dat_stochastic <- read.csv(file_stochastic, stringsAsFactors = TRUE)
+
+  ## checking the central file:
+  ## columns that should be filled in:
+  cols_filled = c("disease", "year", "age", "country", "country_name")
+  check_central_columns_filled(dat, cols_filled)
+  check_central_disease(dat, disease_curr)
+
+  check_central_year(dat, model_meta_line, disease_curr, modelling_group_curr)
+  check_central_age(dat, model_meta_line, disease_curr, modelling_group_curr)
+
+  check_central_country(dat, model_meta_line, country_disease_table,
+                        countries_excluded)
+
+
+  ## the number of filled columns that are expected but missing:
+  n_col_mis <- sum(is.na(match(cols_filled, names(dat))))
+  if(n_col_mis != 0)
+    warning(sprintf("There are %d pre-filled columns missing among those expected (%s).",
+                    n_col_mis, paste(cols_filled, collapse = ", ")))
+
+  ## number of missing entries in the columns that should be filled:
+  n_mis_entries <- sum(colSums(is.na(dat[, cols_filled])))
+  if(n_mis_entries != 0)
+    warning(sprintf("There are %d entries missing missing among those expected in columns %s.",
+                    n_col_mis, paste(cols_filled, collapse = ", ")))
+
+
+  ## finding the expected outcomes:
+  outcomes_curr <- find_model_outcomes(disease_curr, modelling_group_curr,
+                                       model_outcomes)
+  ## the number of outcomes that are expected but missing:
+  n_mismatched_outcomes <- sum(is.na(match(outcomes_curr$outcome, names(dat))))
+  if(n_mismatched_outcomes != 0)
+    warning(sprintf("There are %d outcomes missing of the expected outcomes (%s).",
+                    n_mismatched_outcomes, paste(outcomes_curr$outcome, collapse = ", ")))
+
+
+  ## are the columns right?
+  compare_central_stochastic_columns(dat, dat_stochastic)
+  compare_central_stochastic_rows(dat, dat_stochastic, cols_filled)
+
+}
diff --git a/R/util_assert.R b/R/util_assert.R
index 51d3a42..171b56d 100644
--- a/R/util_assert.R
+++ b/R/util_assert.R
@@ -5,3 +5,12 @@ assert_has_columns <- function(x, columns, name = deparse(substitute(x))) {
                  name, paste(msg, collapse = ", ")))
   }
 }
+
+assert_has_files <- function(x, files) {
+  msg <- setdiff(files,x)
+  if (length(msg) > 0L) {
+    stop(sprintf("Missing file '%s' from: %s.",
+                 msg, paste(files, collapse = ", ")))
+  }
+}
+
diff --git a/tests/testthat/test-burden-templates.R b/tests/testthat/test-burden-templates.R
new file mode 100644
index 0000000..4090010
--- /dev/null
+++ b/tests/testthat/test-burden-templates.R
@@ -0,0 +1,21 @@
+context("create_burden_templates")
+
+test_that("burden_templates", {
+  files_path <- "jenner-test-data/burden_template_meta_files/"
+  files <- c("Country_Disease_Table.csv", "model_meta.csv", "model_outcomes.csv")
+  output_files <- unlist(create_burden_template(files, files_path, central_templates_only = TRUE))
+
+  test_path <- "jenner-test-data/burden_templates/"
+  test_files <- paste0(test_path, output_files)
+
+  f <- rep(TRUE, length(output_files))
+  v <- rep(FALSE, length(output_files))
+  for(i in seq_along(output_files)) {
+    f1 <- read_csv(output_files[i])
+    f2 <- read_csv(test_files[i])
+    v[i] <- all.equal(f1, f2)
+  }
+  file.remove(output_files)
+  expect_equal(f, v)
+})
+

From 3320c659ce4b40b093d5ffed12803e041cee1e3b Mon Sep 17 00:00:00 2001
From: Xiang Li <x.li@imperial.ac.uk>
Date: Mon, 30 Apr 2018 12:03:23 +0100
Subject: [PATCH 2/3] update NEWS; export create burden templates function

---
 DESCRIPTION                   |  2 +-
 NAMESPACE                     |  1 +
 NEWS.md                       |  3 +++
 R/create_burden_templates.R   | 17 +++++++++++++++--
 man/create_burden_template.Rd | 25 +++++++++++++++++++++++++
 5 files changed, 45 insertions(+), 3 deletions(-)
 create mode 100644 man/create_burden_template.Rd

diff --git a/DESCRIPTION b/DESCRIPTION
index 0fb4781..7080f89 100644
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,6 +1,6 @@
 Package: jenner
 Title: Internal Montagu Helpers
-Version: 0.0.14
+Version: 0.0.15
 Description: Helpers for Montagu.
 License: MIT + file LICENSE
 Author: Rich FitzJohn
diff --git a/NAMESPACE b/NAMESPACE
index 49ba797..aaa4bd4 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -2,6 +2,7 @@
 
 export(admin_set_active_touchstone)
 export(calculate_dalys)
+export(create_burden_template)
 export(create_touchstone)
 export(database_connection)
 export(fix_coverage_fvps)
diff --git a/NEWS.md b/NEWS.md
index a62ea3d..cfb4efe 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -1,3 +1,6 @@
+### jenner 0.0.15
+ * VIMC-1728 Create burden estimate templates in jenner 
+ 
 ### jenner 0.0.14
  * VIMC-1400 dalys calculation
  * VIMC-1607 correct a bug for reporting suspicious campaign data
diff --git a/R/create_burden_templates.R b/R/create_burden_templates.R
index dcac91f..076822c 100644
--- a/R/create_burden_templates.R
+++ b/R/create_burden_templates.R
@@ -1,6 +1,19 @@
-
+##' Creat burden estimate templates
+##' @title Creat burden estimate templates
+##'
+##' @param files filenames, it has to contain the following three files
+##' "Country_Disease_Table.csv", "model_meta.csv" and "model_outcomes.csv"
+##'
+##' @param files_path the directory that contains those files above
+##'
+##' @param assert_files needed meta files
+##' "Country_Disease_Table.csv", "model_meta.csv" and "model_outcomes.csv"
+##'
+##' @param central_templates_only TRUE if you only need central burden templates, usually for open call. But it depend on Tini's input.
+##'  Set to FALSE as default, which outputs both central and stochastic templates
+##'
+##' @export
 create_burden_template <- function(files, files_path, assert_files = c("Country_Disease_Table.csv", "model_meta.csv", "model_outcomes.csv"), central_templates_only = FALSE) {
-
   assert_has_files(files, assert_files)
   files <- paste0(files_path, files)
 
diff --git a/man/create_burden_template.Rd b/man/create_burden_template.Rd
new file mode 100644
index 0000000..4622036
--- /dev/null
+++ b/man/create_burden_template.Rd
@@ -0,0 +1,25 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/create_burden_templates.R
+\name{create_burden_template}
+\alias{create_burden_template}
+\title{Creat burden estimate templates}
+\usage{
+create_burden_template(files, files_path,
+  assert_files = c("Country_Disease_Table.csv", "model_meta.csv",
+  "model_outcomes.csv"), central_templates_only = FALSE)
+}
+\arguments{
+\item{files}{filenames, it has to contain the following three files
+"Country_Disease_Table.csv", "model_meta.csv" and "model_outcomes.csv"}
+
+\item{files_path}{the directory that contains those files above}
+
+\item{assert_files}{needed meta files
+"Country_Disease_Table.csv", "model_meta.csv" and "model_outcomes.csv"}
+
+\item{central_templates_only}{TRUE if you only need central burden templates, usually for open call. But it depend on Tini's input.
+Set to FALSE as default, which outputs both central and stochastic templates}
+}
+\description{
+Creat burden estimate templates
+}

From 94c2c896515fb4c100738c96064d4a543d08f421 Mon Sep 17 00:00:00 2001
From: Xiang Li <x.li@imperial.ac.uk>
Date: Mon, 30 Apr 2018 12:16:23 +0100
Subject: [PATCH 3/3] styling a little bit

---
 R/create_burden_templates.R | 128 ++++++++++++++++++------------------
 1 file changed, 65 insertions(+), 63 deletions(-)

diff --git a/R/create_burden_templates.R b/R/create_burden_templates.R
index 076822c..ef49caf 100644
--- a/R/create_burden_templates.R
+++ b/R/create_burden_templates.R
@@ -42,7 +42,7 @@ create_burden_template <- function(files, files_path, assert_files = c("Country_
 # function adds new rows to dataframe with contents of value_insert
 # to the column column_name
 
-insert_dataframe = function(data,column_name,value_insert){
+insert_dataframe <- function(data,column_name,value_insert) {
 
   length_values <- length(value_insert)
 
@@ -53,7 +53,7 @@ insert_dataframe = function(data,column_name,value_insert){
 
   data1 <- data
 
-  for(i in 2:length_values){
+  for(i in 2:length_values) {
     data1[, column_name] <- value_insert[i]
     data <- rbind(data,data1)
   }
@@ -64,7 +64,7 @@ insert_dataframe = function(data,column_name,value_insert){
 
 # inserts country name into every line of data frame for each country iso code
 
-country_function <- function(k,country_n,country_ISOcode,country_data){
+country_function <- function(k, country_n, country_ISOcode, country_data) {
 
   country_name_rep <- rep(country_n[k],length(country_data[country_data$country == country_ISOcode[k], ]$country))
   return(country_name_rep)
@@ -74,18 +74,18 @@ country_function <- function(k,country_n,country_ISOcode,country_data){
 # outer wrapper function that produces the csv files
 # that selects the models associated with each disease
 
-model_select_function <- function(j){
+model_select_function <- function(j) {
 
   model_name <- unique(model_meta_data[model_meta_data$disease == disease_name[j], ]$modelling_group)
 
-  sapply(1:length(model_name),generate_csv,model_name,j)
+  sapply(1:length(model_name), generate_csv, model_name, j)
 
 }
 
 # inner wrapper function that produces the csv files
 # this function has no return
 
-generate_csv <- function(i,model_name,j){
+generate_csv <- function(i, model_name, j) {
 
   country_data <- data.frame(disease = disease_name[j])
 
@@ -98,7 +98,7 @@ generate_csv <- function(i,model_name,j){
 
   country_data$run_id <- ""
 
-  if(disease_name[j] == "HPV"){
+  if (disease_name[j] == "HPV") {
 
     age_values <- c(age_min:age_max)
     age_column_name<-c("age")
@@ -111,65 +111,67 @@ generate_csv <- function(i,model_name,j){
 
     age_min_k <- age_min + 1
 
-    for(k in age_min_k:age_max){
+    for(k in age_min_k:age_max) {
 
-      country_data1<-HPV_generate_csv(k,j,disease_name,age_min,age_max,year_end)
+      country_data1<-HPV_generate_csv(k, j, disease_name, age_min, age_max, year_end)
 
-      country_data <- rbind(country_data,country_data1)
+      country_data <- rbind(country_data, country_data1)
       rm(country_data1)
 
-      country_data <- country_data[order(country_data$age),]
+      country_data <- country_data[order(country_data$age), ]
     }
 
   }
 
-  if(disease_name[j] != "HPV"){
+  if (disease_name[j] != "HPV") {
 
     year_till <- year_end + age_max
-    if(year_till > 2100){year_till <- 2100}
+    if (year_till > 2100) {
+      year_till <- 2100
+    }
 
     age_values <- c(age_min:age_max)
     age_column_name<-c("age")
     year_values <- c(year_start:year_till)
     year_column_name<-c("year")
 
-    country_data <- insert_dataframe(country_data,year_column_name,year_values)
+    country_data <- insert_dataframe(country_data, year_column_name, year_values)
 
-    country_data <- insert_dataframe(country_data,age_column_name,age_values)
+    country_data <- insert_dataframe(country_data, age_column_name, age_values)
 
   }
 
-  country_column_name <-c("country")
+  country_column_name <- c("country")
 
-  country_data <- insert_dataframe(country_data,country_column_name,country_ISOcode)
+  country_data <- insert_dataframe(country_data, country_column_name, country_ISOcode)
 
-  b<-lapply(1:length(country_n),country_function,country_n,country_ISOcode,country_data)
+  b<-lapply(1:length(country_n), country_function, country_n, country_ISOcode, country_data)
 
   country_data$country_name <- unlist(b)
 
   outcome_name <- unique(model_outcomes[model_outcomes$disease == "standard" & model_outcomes$model == "standard", ]$outcome)
 
-  if(disease_name[j] == "Hib3" & model_name[i] == "JHU-Tam"){
+  if (disease_name[j] == "Hib3" & model_name[i] == "JHU-Tam") {
     outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
   }
 
-  if(disease_name[j] == "HepB" & model_name[i] == "IC-Hallett"){
+  if (disease_name[j] == "HepB" & model_name[i] == "IC-Hallett") {
     outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
   }
 
-  if(disease_name[j] == "HepB" & model_name[i] == "CDA-Razavi"){
+  if (disease_name[j] == "HepB" & model_name[i] == "CDA-Razavi") {
     outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
   }
 
-  if(disease_name[j] == "HepB" & model_name[i] == "Li"){
+  if (disease_name[j] == "HepB" & model_name[i] == "Li") {
     outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
   }
 
-  if(disease_name[j] == "Rubella" & model_name[i] == "PHE-Vynnycky"){
+  if (disease_name[j] == "Rubella" & model_name[i] == "PHE-Vynnycky") {
     outcome_name <- unique(model_outcomes[model_outcomes$disease == disease_name[j] & model_outcomes$model == model_name[i], ]$outcome)
   }
 
-  for(l in seq_along(outcome_name)){
+  for(l in seq_along(outcome_name)) {
     country_data[, paste0(outcome_name[l])] <- ""
   }
 
@@ -190,23 +192,23 @@ generate_csv <- function(i,model_name,j){
 # function that generates the triangular form of the data (between 2031 to around 2130)
 # needed for the HPV burden estimate templates
 
-HPV_generate_csv <- function(k,j,disease_name,age_min,age_max,year_end){
+HPV_generate_csv <- function(k,j,disease_name,age_min,age_max,year_end) {
 
   country_data1 <- data.frame(disease = disease_name[j])
   country_data1$run_id <- ""
 
   age_values <- c(k:age_max)
-  age_column_name<-c("age")
+  age_column_name <- c("age")
   year_values <- c(year_end-age_min+k)
-  year_column_name<-c("year")
+  year_column_name <- c("year")
 
   country_data1$year <- year_values
 
-  if(length(age_values)>1){
+  if (length(age_values) > 1) {
     country_data1 <- insert_dataframe(country_data1,age_column_name,age_values)
   }
 
-  if(length(age_values)==1){
+  if (length(age_values) == 1) {
     country_data1$age <- age_max
   }
 
@@ -261,7 +263,7 @@ find_model_outcomes <- function(disease_curr,
   outcomes_curr <- model_outcomes %>%
     filter(disease == disease_curr & model == modelling_group_curr) %>%
     select(outcome)
-  if(nrow(outcomes_curr) == 0) { ## use the standard outcomes:
+  if (nrow(outcomes_curr) == 0) { ## use the standard outcomes:
     outcomes_curr <- model_outcomes %>%
       filter(disease == "standard" & model == "standard") %>%
       select(outcome)
@@ -281,7 +283,7 @@ write_template_files <- function(model_meta_line,
 
   ## knocking out the bottom triangle post 2030 - needs to be done for
   ## List, Trivac and HPV:
-  if(model_meta_line$disease == "HPV" |
+  if (model_meta_line$disease == "HPV" |
      model_meta_line$model_name %in% c("TRIVAC", "LiST")) {
     out <- knock_out_triangle(out, model_meta_line, year_end)
   }
@@ -305,16 +307,16 @@ write_template_files <- function(model_meta_line,
 
   ## writing the central template file:
   filename1 <- sprintf("central_burden_template_%s-%s.csv",
-                      model_meta_line$disease,
-                      model_meta_line$modelling_group)
+                       model_meta_line$disease,
+                       model_meta_line$modelling_group)
 
   write.csv(out, filename1, row.names = FALSE)
-  if ( !central_templates_only ) {
+  if (!central_templates_only ) {
     ## adapting the file to the stochastic output:
     out <- cbind(disease = out$disease, run_id = "", out[, -1])
     filename2 <- sprintf("stochastic_burden_template_%s-%s.csv",
-                        model_meta_line$disease,
-                        model_meta_line$modelling_group)
+                         model_meta_line$disease,
+                         model_meta_line$modelling_group)
 
     write.csv(out, filename2, row.names = FALSE)
     return(list(filename1, filename2))
@@ -338,19 +340,19 @@ knock_out_triangle <- function(out, model_meta_line, year_end) {
 
 #### From Nick's check_function.R
 check_central_columns_filled <- function(dat, cols_filled) {
-  if(sum(is.na(match(cols_filled, names(dat)))) != 0)
+  if (sum(is.na(match(cols_filled, names(dat)))) != 0)
     warning("Not all columns that should be filled in are available.")
 
-  if(sum(is.na(dat[, cols_filled])) != 0)
+  if (sum(is.na(dat[, cols_filled])) != 0)
     warning("Missing entries in some columns that should be filled in.")
 
 }
 
 check_central_disease <- function(dat, disease_curr) {
   tt = table(dat$disease, useNA = "ifany")
-  if(length(tt) != 1)
+  if (length(tt) != 1)
     warning("More or less than a single disease present.")
-  if(names(tt) != disease_curr)
+  if (names(tt) != disease_curr)
     warning("disease in file does not match filename.")
 }
 
@@ -362,16 +364,16 @@ check_central_year <- function(dat,
   year_range <- model_meta %>%
     select(year_min, year_max)
 
-  if(min(dat$year) != year_range$year_min)
+  if (min(dat$year) != year_range$year_min)
     warning("First year not as given in model_meta file.")
-  if(max(dat$year) != year_range$year_max)
+  if (max(dat$year) != year_range$year_max)
     warning("Last year not as given in model_meta file.")
 
-  tt = table(dat$year)
-  if(!all(names(tt) == year_range$year_min : year_range$year_max))
+  tt <- table(dat$year)
+  if (!all(names(tt) == year_range$year_min : year_range$year_max))
     warning("Not all years in the specified year range appear.")
 
-  if(length(table(tt)) != 1) {
+  if (length(table(tt)) != 1) {
 
     tmp <- dat %>%
       group_by(age, year) %>%
@@ -394,17 +396,17 @@ check_central_age <- function(dat,
   age_range <- model_meta %>%
     select(burden_min_age, burden_max_age)
 
-  if(min(dat$age) != age_range$burden_min_age)
+  if (min(dat$age) != age_range$burden_min_age)
     warning("First age not as given in model_meta file.")
-  if(max(dat$age) != age_range$burden_max_age) {
+  if (max(dat$age) != age_range$burden_max_age) {
     warning("Last age not as given in model_meta file.")
   }
 
   tt = table(dat$age)
-  if(!all(names(tt) == age_range$burden_min_age : age_range$burden_max_age))
+  if (!all(names(tt) == age_range$burden_min_age : age_range$burden_max_age))
     warning("Not all years in the specified year range appear.")
 
-  if(length(table(tt)) != 1)
+  if (length(table(tt)) != 1)
     warning("There are not the same number of entries for each age.")
   ## this could still be ok - for instance for HPV.
 
@@ -427,23 +429,23 @@ check_central_country <- function(dat,
     select(country, country_name)
 
   tt <- table(dat$country)
-  if(length(tt) != n_countries)
+  if (length(tt) != n_countries)
     warning(sprintf("Number of countries = %d, should be %d.",
                     length(tt), n_countries))
 
   ## to the countries match what's pulled of Montagu?
-  if(sum(is.na(match(names(tt), countries$country))) != 0)
+  if (sum(is.na(match(names(tt), countries$country))) != 0)
     warning("There are countries in the .csv file that are not listed on Montagu for this disease and touchstone.")
-  if(sum(is.na(match(countries$country, names(tt)))) != 0)
+  if (sum(is.na(match(countries$country, names(tt)))) != 0)
     warning("There are countries listed in Montagu for this disease and touchstone that are missing in the .csv.")
 
-  if(length(table(tt)) != 1)
+  if (length(table(tt)) != 1)
     warning("The number of rows per country varies between countries.")
 
   ## checking country names and country ISO codes:
   ttn <- table(table(dat$country, dat$country_name))
   ## this should be mostly 0, but have n_country entries of some other number.
-  if(names(ttn)[1] != "0" | ttn[2] != n_countries)
+  if (names(ttn)[1] != "0" | ttn[2] != n_countries)
     warning("The country names and countries don't match up.")
 
 
@@ -453,11 +455,11 @@ compare_central_stochastic_columns <- function(dat, dat_stochastic) {
 
   ## comparing columns of the central and stochastic files:
   mm = match(names(dat_stochastic), names(dat))
-  if(sum(is.na(mm)) != 1) {
+  if (sum(is.na(mm)) != 1) {
     warning("Columns in the central and stochastic files do not match up.")
   } else {
     new_col <- names(dat_stochastic[is.na(mm)])
-    if(new_col != "run_id")
+    if (new_col != "run_id")
       warning("Additional column in stochastic file is `%s`, not `run_id`.")
   }
 
@@ -465,11 +467,11 @@ compare_central_stochastic_columns <- function(dat, dat_stochastic) {
 
 compare_central_stochastic_rows <- function(dat, dat_stochastic, cols_filled) {
 
-  if(nrow(dat) != nrow(dat_stochastic))
+  if (nrow(dat) != nrow(dat_stochastic))
     warning("number of rows in central and stochastic files does not match.")
 
 
-  if(!all(dat_stochastic[, cols_filled] == dat[, cols_filled]))
+  if (!all(dat_stochastic[, cols_filled] == dat[, cols_filled]))
     warning("stochastic and central files are not aligned.")
 
 
@@ -489,8 +491,8 @@ check_model_files <- function(model_meta_line,
   file_stochastic <- sprintf("stochastic_burden_template_%s-%s.csv",
                              disease_curr, modelling_group_curr)
 
-  dat <- read.csv(file_central, stringsAsFactors = TRUE)
-  dat_stochastic <- read.csv(file_stochastic, stringsAsFactors = TRUE)
+  dat <- read_csv(file_central)
+  dat_stochastic <- read_csv(file_stochastic)
 
   ## checking the central file:
   ## columns that should be filled in:
@@ -507,13 +509,13 @@ check_model_files <- function(model_meta_line,
 
   ## the number of filled columns that are expected but missing:
   n_col_mis <- sum(is.na(match(cols_filled, names(dat))))
-  if(n_col_mis != 0)
+  if (n_col_mis != 0)
     warning(sprintf("There are %d pre-filled columns missing among those expected (%s).",
                     n_col_mis, paste(cols_filled, collapse = ", ")))
 
   ## number of missing entries in the columns that should be filled:
   n_mis_entries <- sum(colSums(is.na(dat[, cols_filled])))
-  if(n_mis_entries != 0)
+  if (n_mis_entries != 0)
     warning(sprintf("There are %d entries missing missing among those expected in columns %s.",
                     n_col_mis, paste(cols_filled, collapse = ", ")))
 
@@ -523,7 +525,7 @@ check_model_files <- function(model_meta_line,
                                        model_outcomes)
   ## the number of outcomes that are expected but missing:
   n_mismatched_outcomes <- sum(is.na(match(outcomes_curr$outcome, names(dat))))
-  if(n_mismatched_outcomes != 0)
+  if (n_mismatched_outcomes != 0)
     warning(sprintf("There are %d outcomes missing of the expected outcomes (%s).",
                     n_mismatched_outcomes, paste(outcomes_curr$outcome, collapse = ", ")))