Model Functions
Overall MI estimates
Est_Overall<-function(out,data=ED_survival,recurrent=TRUE,weight=TRUE,trunc=5){
if(recurrent==TRUE){ ## Recurrent survival
reg_data<- data %>%
#browser()
filter(outcome==out) %>%
filter(event<=trunc)
reg_data2<-lapply(complete_data, function(x) reg_data %>% left_join(x) )
if(weight==TRUE){
model<-lapply(reg_data2,
function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10 + gender+Age+Married+edu_group+employ_group + smoke_group + occu_exp_group+ prefire + STUDY_GROUP_CODE+cluster(HAZELWOOD_ID)+strata(event), method="breslow",data=x ,weights = weight )))[,c("coef","robust se")])
#print(model)
} else{
model<-lapply(reg_data2,
function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10 + gender+Age+Married+edu_group+employ_group + smoke_group + occu_exp_group+ prefire + STUDY_GROUP_CODE+cluster(HAZELWOOD_ID)+strata(event), method="breslow",data=x )))[,c("coef","robust se")])
}
} else { ## First event survival
reg_data<- data %>%
filter(outcome==out) %>%
filter(event<=1)
reg_data2<-lapply(complete_data, function(x) reg_data %>% left_join(x) )
if(weight==TRUE){
model<-lapply(reg_data2,
function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10 + gender+Age+Married+edu_group+employ_group + smoke_group + occu_exp_group + prefire + STUDY_GROUP_CODE, method="breslow",data=x ,weights = weight )))[,c("coef","robust se")])
} else{
model<-lapply(reg_data2,
function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10 +gender+Age+Married+edu_group+employ_group + smoke_group + occu_exp_group+ prefire +STUDY_GROUP_CODE, method="breslow",data=x )))[,c("coef","se(coef)")])
}
}
names(model)<-c("mi1","mi2","mi3","mi4","mi5")
result <-cbind(model,as_tibble(model))[,c(1:10)]
result$mi1.se<-result[,2]
result$mi2.se<-result[,4]
result$mi3.se<-result[,6]
result$mi4.se<-result[,8]
result$mi5.se<-result[,10]
result<-result[,c("mi1.coef","mi2.coef","mi3.coef","mi4.coef","mi5.coef","mi1.se","mi2.se","mi3.se","mi4.se","mi5.se")]
result$variable<-row.names(result)
# Combine Beta and Se - all risk factors
#
result<-result%>%
mutate(Beta=(mi1.coef + mi2.coef + mi3.coef + mi4.coef + mi5.coef)/5) %>%
mutate(var_Beta=rowSds(as.matrix(result[,c("mi1.coef","mi2.coef","mi3.coef","mi4.coef","mi5.coef")]))^2) %>%
mutate(Se=((((mi1.se)^2 + (mi2.se)^2 + (mi3.se)^2 + (mi4.se)^2 +(mi5.se)^2)/5)+1.2*var_Beta)^0.5) %>%
mutate(HR=exp(Beta),HR_low=exp(Beta-1.96*Se),HR_up=exp(Beta+1.96*Se)) %>%
mutate(P_value=(1-pnorm(abs(Beta/Se)))*2)
result$outcome<-out
result<-result%>%
filter(variable=="firePM_TotalDiary_per10")
result$ED.no_truc<-nrow(filter(reg_data,status==1))
result$ED.no_full<-data%>%
filter(outcome==out&status==1)%>%
filter(HAZELWOOD_ID%in%adult$HAZELWOOD_ID)%>%nrow()
return(result)
}
Time stratified MI estimates
Est_Tstrat<-function(out,data=ED_survival,recurrent=TRUE,weight=TRUE,trunc=5,cutpoint=913){
if(recurrent==TRUE){ ## Recurrent survival
reg_data<- data %>%
filter(outcome==out) %>%
filter(event<=trunc)
#merge imputed complete survey data
reg_data2<-lapply(complete_data, function(x) reg_data %>% left_join(x) )
# time split data into first 2.5 years and last 2.5 years.
reg_data3 <- lapply(reg_data2, function(x) survSplit(Surv(t_start,t_stop,status) ~ ., data= x, cut=c(cutpoint),episode= "tgroup"))
if(weight==TRUE){
model<-lapply(reg_data3,function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10:strata(tgroup) +gender+Age+Married+edu_group+employ_group+smoke_group +occu_exp_group+pre_fire_ED+STUDY_GROUP_CODE+strata(event), method = "breslow", data =x ,weights = weight,id=HAZELWOOD_ID )))[,c("coef","robust se")])
#print(model)
} else{
model<-lapply(reg_data3,
function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10:strata(tgroup) +gender+Age+Married+edu_group+employ_group+smoke_group +occu_exp_group+pre_fire_ED+STUDY_GROUP_CODE+strata(event), method = "breslow", data =x,id=HAZELWOOD_ID )))[,c("coef","robust se")])
}
} else { ## First event survival
reg_data<- data %>%
filter(outcome==out) %>%
filter(event<=1)
#merge imputed complete survey data
reg_data2<-lapply(complete_data, function(x) reg_data %>% left_join(x) )
reg_data3 <- lapply(reg_data2, function(x) survSplit(Surv(t_start,t_stop,status) ~ ., data= x, cut=c(cutpoint),episode= "tgroup"))
# time split data into first 2 years and last 3 years.
# reg_data3 <- survSplit(Surv(t_start,t_stop,status) ~ ., data= reg_data2, cut=c(cutpoint),episode= "tgroup")
if(weight==TRUE){
model<-lapply(reg_data3,function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~firePM_TotalDiary_per10:strata(tgroup) +gender+Age+Married+edu_group+employ_group+smoke_group +occu_exp_group+pre_fire_ED+STUDY_GROUP_CODE, method = "breslow", data =x ,weights = weight,id=HAZELWOOD_ID )))[,c("coef","robust se")])
} else{
model<-lapply(reg_data3,function(x) coef(summary(coxph(Surv(t_start,t_stop,status) ~ firePM_TotalDiary_per10:strata(tgroup)+gender+Age+Married+edu_group+employ_group+smoke_group +occu_exp_group+pre_fire_ED+STUDY_GROUP_CODE, method = "breslow", data =x,id=HAZELWOOD_ID )))[,c("coef","se(coef)")])
}
}
names(model)<-c("mi1","mi2","mi3","mi4","mi5")
result <-cbind(model,as_tibble(model))[,c(1:10)]
result$mi1.se<-result[,2]
result$mi2.se<-result[,4]
result$mi3.se<-result[,6]
result$mi4.se<-result[,8]
result$mi5.se<-result[,10]
result<-result[,c("mi1.coef","mi2.coef","mi3.coef","mi4.coef","mi5.coef","mi1.se","mi2.se","mi3.se","mi4.se","mi5.se")]
result$variable<-row.names(result)
# Combine Beta and Se - all risk factors
#
result<-result%>%
mutate(Beta=(mi1.coef + mi2.coef + mi3.coef + mi4.coef + mi5.coef)/5) %>%
mutate(var_Beta=rowSds(as.matrix(result[,c("mi1.coef","mi2.coef","mi3.coef","mi4.coef","mi5.coef")]))^2) %>%
mutate(Se=((((mi1.se)^2 + (mi2.se)^2 + (mi3.se)^2 + (mi4.se)^2 +(mi5.se)^2)/5)+1.2*var_Beta)^0.5) %>%
mutate(HR=exp(Beta),HR_low=exp(Beta-1.96*Se),HR_up=exp(Beta+1.96*Se)) %>%
mutate(P_value=(1-pnorm(abs(Beta/Se)))*2)
result$outcome<-out
# count events by tgroup for each outcome
temp<-reg_data3[[1]] %>%
filter(status==1 & outcome==out) %>%
group_by(tgroup) %>%
dplyr::summarize(Tot_events=sum(status))
# select exposure variables
result<-subset(result, variable=="firePM_TotalDiary_per10:strata(tgroup)tgroup=1" | variable=="firePM_TotalDiary_per10:strata(tgroup)tgroup=2")
result$ED.no_truc<-temp$Tot_events
return(result)
}
Plot overall estimates
Plot_Overall<-function(filename,figname){
#Plot_Overall<-function(filename){
forest<-filename %>%
select(c("outcome","ED.no_full","ED.no_truc","Beta","Se","HR", "HR_low","HR_up","P_value"))
forest<-forest%>%
dplyr::mutate(Index=seq.int(1:length(forest$outcome)))%>%
mutate('HR (95% CI)'=paste0(format(round(HR, digits=2), nsmall=2), " (" ,
format(round( HR_low, digits = 2),nsmall=2)," - " ,
format(round(HR_up, digits = 2),nsmall=2),")")) %>%
mutate( 'p-value' =format(round(P_value, digits=2 ), nsmall=2)) %>%
mutate('p-value'= ifelse(`p-value`=="0.00","<0.001",`p-value`)) %>%
mutate(Outcome=c("All cause","All cardiovascular disease","Ischemic heart disease", "Cardiac arrhythmia","Cerebrovascular disease","Stroke","Atherothrombotic disease","All respiratory","Chronic obstructive pulmonary disease","Pneumonia & acute bronchitis","All mental","Anxiety","Injury"))%>%
mutate(ICDcodes=c("All except Z00-Z99","I00-99;G45-46", "I20-25,49.0", "I47-49","I61-69;G45-46","I60-61,63-64;G45","I20-25,46,61-69,71-74,49.0;G45-46","J00-99","J41-44","J12-18,20-21","F00-99;X60-84;R40-46","F40-44","S00-99;T00-14"))
#main plot in the middle
plot1<- ggplot(forest, aes(x = HR, y = Index,colour = "black") )+
geom_point() +
geom_errorbarh(aes(xmin = HR_low, xmax = HR_up), height = 0.3) +
ylab(NULL) +
ggtitle(paste("\n \n ")) +
xlab(expression(paste("HR (95% CI) per 10 ", group("",mu*g/{m^3}, "")," increase in mean ", PM[2.5]))) +
scale_colour_manual(values = c("black")) +
scale_y_continuous(name = "", breaks=1:13,labels = forest$Outcome, trans="reverse") +
xlim(0.5, 2.0)+
theme(plot.title = element_text(size = 10,face = "bold"),
axis.text.y = element_blank(),
axis.title.x= element_text(size = 11,face = "bold"),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size=11,face = "bold"),
panel.border = element_blank(),
axis.line.x = element_line(colour = "black"),
panel.background = element_blank(),
panel.grid = element_blank(),
plot.margin = margin(0, 0, 0.0, 0, "cm"),
legend.position = "none") +
geom_vline(aes(xintercept = 1), linetype = "dashed")
#base plot for adding text
tab_base <- ggplot(forest, aes(y = Index)) +
ylab(NULL) + xlab("") +
scale_y_continuous(name = "", breaks=1:13,labels = forest$Outcome, trans="reverse") +
theme(plot.title = element_text(hjust = 0.5, size = 11,face="bold"), ## centering title on text
axis.text.x = element_text(color = "white"), ## need text to be printed so it stays aligned with figure but white so it's invisible
axis.line = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
plot.margin = margin(0, 0, 0.4, 0, "cm"),
legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank())
# Predictors
tab1 <- tab_base +
geom_text(aes(x = 0, label = Outcome, y = Index), hjust = 1,nudge_x = 0.05,size=4.5) +
scale_x_continuous(limits = c(-1.5,0.1)) +
ggtitle("\n ED diagnoses \n")
# Group
tab2 <- tab_base +
geom_text(aes(x = 0, label = ICDcodes,colour = "black"),hjust = 1,size=4.5) +
scale_x_continuous(limits = c(-0.5,0.2)) +
scale_colour_manual(values = c("black") ) +
ggtitle("\n ICD codes \n") # Group
tab3 <- tab_base +
geom_text(aes(x = 0, label = ED.no_truc,colour = "black"),hjust = 1,size=4.5) +
scale_x_continuous(limits = c(-0.5,0.2)) +
scale_colour_manual(values = c("black") ) +
ggtitle("\n Number \n of events")
# OR
tab4 <- tab_base +
geom_text(aes(x = 0, label = `HR (95% CI)`, colour = "black"),size=4.5 ) +
scale_colour_manual(values =c("black") ) +
ggtitle("\n HR (95% CI) \n")
# individual p-value
tab5 <- tab_base +
geom_text(aes(x = 0, label = `p-value`, colour = "black"),size=4.5) +
scale_x_continuous(limits = c(-0.1,0.1)) +
scale_colour_manual(values = c("black" )) +
ggtitle("\n p-value \n")
plot <- grid.arrange(tab1, tab2, tab3, plot1, tab4,tab5,
layout_matrix = matrix(c(rep(1,10),
rep(2,13),
rep(3,3),
rep(4,13),
rep(5,5),
rep(6,2)), nrow = 1))
}
#
Plot time stratified estimates
Plot_Tstrat<-function(filename,figname,noutcomes=13){
nb<-noutcomes*2
forest<-filename %>%
select(c("outcome","ED.no_truc","Beta","Se","HR", "HR_low","HR_up","P_value")) %>%
mutate(time=rep(c("First 2.5 yrs","2.5 to 5 yrs"),times=noutcomes)) %>%
mutate(Index=seq.int(1:nb)) %>%
mutate(Index=ifelse(Index%%2==1,Index+0.1,Index-0.1)) %>%
mutate('HR (95% CI)'=paste0(format(round(HR, digits=2), nsmall=2), " (" ,
format(round( HR_low, digits = 2),nsmall=2)," - " ,
format(round(HR_up, digits = 2),nsmall=2),")")) %>%
mutate( 'p-value' =format(round(P_value, digits=2 ), nsmall=2)) %>%
mutate('p-value'= ifelse(`p-value`=="0.00","<0.001",`p-value`)) %>%
group_by(outcome) %>%
mutate(Index_all=mean(Index)) %>%
ungroup() %>%
mutate(outcome = factor(ifelse(time== "2.5 to 5 yrs",NA,as.character(outcome))))
forest$y_loc=forest$Index
#colours <- c("#065F9B","#66933E") # colours from somatic plot
plot1<- ggplot(forest, aes(x = HR, y = Index,colour = time) )+
geom_point() +
geom_errorbarh(aes(xmin = HR_low, xmax = HR_up), height = 0.3) +
ylab(NULL) +
ggtitle(paste("\n \n ")) +
xlab(expression(paste("HR (95% CI) per 10 ", group("",mu*g/{m^3}, "")," increase in mean ", PM[2.5]))) +
scale_colour_manual(values = c("#065F9B","#66933E")) +
scale_y_continuous(name = "", breaks=1:nb,labels = forest$outcome, trans="reverse") +
xlim(0.5, 2.0)+
theme(plot.title = element_text(size = 10,face = "bold"),
axis.text.y = element_blank(),
axis.title.x= element_text(size = 11,face = "bold"),
axis.ticks.y = element_blank(),
axis.text.x = element_text(size=11,face = "bold"),
panel.border = element_blank(),
axis.line.x = element_line(colour = "black"),
panel.background = element_blank(),
panel.grid = element_blank(),
plot.margin = margin(0, 0, 0.0, 0, "cm"),
legend.position = "none") +
geom_vline(aes(xintercept = 1), linetype = "dashed")
#base plot for adding text
tab_base <- ggplot(forest, aes(y = Index)) +
ylab(NULL) + xlab("") +
scale_y_continuous(name = "", limits = c(nb,1), breaks=1:nb,labels = forest$outcome, trans="reverse") +
theme(plot.title = element_text(hjust = 0.5, size = 11,face="bold"), ## centering title on text
axis.text.x = element_text(color = "white"), ## need text to be printed so it stays aligned with figure but white so it's invisible
axis.line = element_blank(),
axis.text.y = element_blank(),
axis.ticks = element_blank(),
axis.title.y = element_blank(),
plot.margin = margin(0, 0, 0.4, 0, "cm"),
legend.position = "none",
panel.background = element_blank(),
panel.grid = element_blank())
# Predictors
tab1 <- tab_base +
geom_text(aes(x = 0, label = outcome, y = Index_all), hjust = 1,nudge_x = 0.05,size=4.5) +
scale_x_continuous(limits = c(-1.5,0.1)) +
ggtitle("\n ED diagnoses \n")
tab2 <- tab_base +
geom_text(aes(x = 0, label = time,colour = time),hjust = 1,size=4.5) +
scale_x_continuous(limits = c(-0.5,0.2)) +
scale_colour_manual(values = c("#065F9B","#66933E") ) +
ggtitle("\n Follow-up \n post mine fire")
tab3 <- tab_base +
geom_text(aes(x = 0, label = ED.no_truc,colour = "black"),hjust = 1,size=4.5) +
scale_x_continuous(limits = c(-0.5,0.2)) +
scale_colour_manual(values = c("black") ) +
ggtitle("\n Number \n of events")
tab4 <- tab_base +
geom_text(aes(x = 0, label = `HR (95% CI)`, colour = time),size=4.5 ) +
scale_colour_manual(values =c("#065F9B","#66933E") ) +
ggtitle("\n HR (95% CI) \n")
# individual p-value
tab5 <- tab_base +
geom_text(aes(x = 0, label = `p-value`, colour = time),size=4.5) +
scale_x_continuous(limits = c(-0.1,0.1)) +
scale_colour_manual(values = c("#065F9B","#66933E")) +
ggtitle("\n p-value \n")
# plot <- grid.arrange(tab1,tab2,tab3, plot1, tab4,tab5,
# layout_matrix = matrix(c(rep(1,9),
# rep(2,6),
# rep(3,4),
# rep(4,16),
# rep(5,7),
# rep(6,3)), nrow = 1))
plot <- grid.arrange(tab1, tab2, tab3, plot1, tab4,tab5,
layout_matrix = matrix(c(rep(1,8),
rep(2,13),
rep(3,3),
rep(4,13),
rep(5,5),
rep(6,2)), nrow = 1))
}