f_population_data_DZ_age_sex.R 5.68 KiB
f_population_data_DZ_age_sex <- function(){
# Using 2020 PHS ages
# https://www.nrscotland.gov.uk/statistics-and-data/statistics/statistics-by-theme/population/population-estimates/2011-based-special-area-population-estimates/small-area-population-estimates/time-series
pop_2020 <- as.data.frame(read.csv("/Users/awood310/Desktop/R/Vaccinations/data/zones_and_population/pop_age_estimates_2020.csv"))
pop_2020 <- pop_2020[(pop_2020$Year == "2020" & !pop_2020$Sex== "All"),]
pop_2020$"12-14" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age12"):which(colnames(pop_2020)=="Age14"))])
pop_2020$"15-19" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age15"):which(colnames(pop_2020)=="Age19"))])
pop_2020$"20-24" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age20"):which(colnames(pop_2020)=="Age24"))])
pop_2020$"25-29" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age25"):which(colnames(pop_2020)=="Age29"))])
pop_2020$"30-34" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age30"):which(colnames(pop_2020)=="Age34"))])
pop_2020$"35-39" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age35"):which(colnames(pop_2020)=="Age39"))])
pop_2020$"40-44" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age40"):which(colnames(pop_2020)=="Age44"))])
pop_2020$"45-49" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age45"):which(colnames(pop_2020)=="Age49"))])
pop_2020$"50-54" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age50"):which(colnames(pop_2020)=="Age54"))])
pop_2020$"55-59" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age55"):which(colnames(pop_2020)=="Age59"))])
pop_2020$"60-64" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age60"):which(colnames(pop_2020)=="Age64"))])
pop_2020$"65-69" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age65"):which(colnames(pop_2020)=="Age69"))])
pop_2020$"70-74" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age70"):which(colnames(pop_2020)=="Age74"))])
pop_2020$"75+" <- rowSums(pop_2020[,c(which(colnames(pop_2020)=="Age75"):which(colnames(pop_2020)=="Age90plus"))])
pop_2020 <- pop_2020[,c("DataZone", "Sex", "12-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75+")]
pop_2020$Sex[pop_2020$Sex == "f"] <- "FEMALE"
pop_2020$Sex[pop_2020$Sex == "m"] <- "MALE"
pop_2020 <- setNames(gather(pop_2020, "agegroup", "count",3:ncol(pop_2020)), c("DZ", "patient_sex", "age_range", "population"))
# Get a median age...
pop_2020_all <- as.data.frame(read.csv("/Users/awood310/Desktop/R/Vaccinations/data/zones_and_population/pop_age_estimates_2020.csv"))
pop_2020_all <- pop_2020_all[pop_2020_all$Year == "2020" & pop_2020_all$Sex=="All",c("DataZone","AllAges")]
names(pop_2020_all) <- c("DZ","Population")
pop_2020_kids <- as.data.frame(read.csv("/Users/awood310/Desktop/R/Vaccinations/data/zones_and_population/pop_age_estimates_2020.csv"))
pop_2020_kids <- pop_2020_kids[(pop_2020_kids$Year == "2020" & !pop_2020_kids$Sex== "All"),]
pop_2020_kids$"0-4" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age0"):which(colnames(pop_2020_kids)=="Age4"))])
pop_2020_kids$"5-9" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age5"):which(colnames(pop_2020_kids)=="Age9"))])
pop_2020_kids$"10-14" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age10"):which(colnames(pop_2020_kids)=="Age14"))])
pop_2020_kids$"15-19" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age15"):which(colnames(pop_2020_kids)=="Age19"))])
pop_2020_kids$"20-24" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age20"):which(colnames(pop_2020_kids)=="Age24"))])
pop_2020_kids$"25-29" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age25"):which(colnames(pop_2020_kids)=="Age29"))])
pop_2020_kids$"30-34" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age30"):which(colnames(pop_2020_kids)=="Age34"))])
pop_2020_kids$"35-39" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age35"):which(colnames(pop_2020_kids)=="Age39"))])
pop_2020_kids$"40-44" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age40"):which(colnames(pop_2020_kids)=="Age44"))])
pop_2020_kids$"45-49" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age45"):which(colnames(pop_2020_kids)=="Age49"))])
pop_2020_kids$"50-54" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age50"):which(colnames(pop_2020_kids)=="Age54"))])
pop_2020_kids$"55-59" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age55"):which(colnames(pop_2020_kids)=="Age59"))])
pop_2020_kids$"60-64" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age60"):which(colnames(pop_2020_kids)=="Age64"))])
pop_2020_kids$"65-69" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age65"):which(colnames(pop_2020_kids)=="Age69"))])
pop_2020_kids$"70-74" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age70"):which(colnames(pop_2020_kids)=="Age74"))])
pop_2020_kids$"75+" <- rowSums(pop_2020_kids[,c(which(colnames(pop_2020_kids)=="Age75"):which(colnames(pop_2020_kids)=="Age90plus"))])
pop_2020_kids <- pop_2020_kids[,c("DataZone", "Sex", "0-4", "5-9","10-14", "15-19", "20-24", "25-29", "30-34", "35-39", "40-44", "45-49", "50-54", "55-59", "60-64", "65-69", "70-74", "75+")]
pop_2020_kids$Sex[pop_2020_kids$Sex == "f"] <- "FEMALE"
pop_2020_kids$Sex[pop_2020_kids$Sex == "m"] <- "MALE"
pop_2020_kids <- setNames(gather(pop_2020_kids, "agegroup", "count",3:ncol(pop_2020_kids)), c("DZ", "patient_sex", "age_range", "population"))
return(list("pop_2020_ages" = pop_2020, "pop_2020_all" = pop_2020_all, "pop_2020_kids" = pop_2020_kids))}