用ggplot2绘制R的生存曲线。

时间:2023-02-03 20:31:35

I've been looking for a solution to plot survival curves using ggplot2. I've found some nice examples, but they do not follow the whole ggplot2 aesthetics (mainly regarding shaded confidence intervals and so on). So finally I've written my own function:

我一直在寻找用ggplot2绘制生存曲线的方法。我找到了一些很好的例子,但是它们并没有遵循整个ggplot2美学(主要是关于阴影的置信区间等等)。最后我写了自己的函数

ggsurvplot<-function(s, conf.int=T, events=T, shape="|", xlab="Time", 
                  ylab="Survival probability", zeroy=F, col=T, linetype=F){

#s: a survfit object.
#conf.int: TRUE or FALSE to plot confidence intervals.
#events: TRUE or FALSE to draw points when censoring events occur
#shape: the shape of these points
#zeroy: Force the y axis to reach 0
#col: TRUE, FALSE or a vector with colours. Colour or B/W
#linetype: TRUE, FALSE or a vector with line types.

require(ggplot2)
require(survival)

if(class(s)!="survfit") stop("Survfit object required")

#Build a data frame with all the data
sdata<-data.frame(time=s$time, surv=s$surv, lower=s$lower, upper=s$upper)
sdata$strata<-rep(names(s$strata), s$strata)

#Create a blank canvas
kmplot<-ggplot(sdata, aes(x=time, y=surv))+
    geom_blank()+
    xlab(xlab)+
    ylab(ylab)+
    theme_bw()

#Set color palette
if(is.logical(col)) ifelse(col,
                         kmplot<-kmplot+scale_colour_brewer(type="qual", palette=6)+scale_fill_brewer(type="qual", palette=6),
                         kmplot<-kmplot+scale_colour_manual(values=rep("black",length(s$strata)))+scale_fill_manual(values=rep("black",length(s$strata)))
                        )
else kmplot<-kmplot+scale_fill_manual(values=col)+scale_colour_manual(values=col)

#Set line types
if(is.logical(linetype)) ifelse(linetype,
                              kmplot<-kmplot+scale_linetype_manual(values=1:length(s$strata)),
                              kmplot<-kmplot+scale_linetype_manual(values=rep(1,  length(s$strata)))
                              )
else kmplot<-kmplot+scale_linetype_manual(values=linetype)

#Force y axis to zero
if(zeroy) {
    kmplot<-kmplot+ylim(0,1)
}

#Confidence intervals
if(conf.int) {  

    #Create a data frame with stepped lines
    n <- nrow(sdata)
    ys <- rep(1:n, each = 2)[-2*n] #duplicate row numbers and remove the last one
    xs <- c(1, rep(2:n, each=2))   #first row 1, and then duplicate row numbers
    scurve.step<-data.frame(time=sdata$time[xs], lower=sdata$lower[ys], upper=sdata$upper[ys],  surv=sdata$surv[ys], strata=sdata$strata[ys])

    kmplot<-kmplot+
      geom_ribbon(data=scurve.step, aes(x=time,ymin=lower, ymax=upper, fill=strata), alpha=0.2)
}

#Events
if(events) {
    kmplot<-kmplot+
      geom_point(aes(x=time, y=surv, col=strata), shape=shape)
}

#Survival stepped line
kmplot<-kmplot+geom_step(data=sdata, aes(x=time, y=surv, col=strata, linetype=strata))

#Return the ggplot2 object
kmplot
}

I wrote a previous version using for loops for each strata, but is was slower. As I'm not a programmer, I look for advice to improve the function. Maybe adding a data table with patients at risk, or a better integration in the ggplot2 framework.

我编写了一个以前的版本,用于每个层次的循环,但速度较慢。由于我不是一个程序员,我寻求建议来改进这个功能。可能在风险中添加一个数据表,或者在ggplot2框架中更好地集成。

Thanks

谢谢

2 个解决方案

#1


6  

You could try the following for something with shaded areas between CIs:

您可以尝试以下的方法,在CIs:

(I'm using the development version here as there's a flaw with the parameter alpha in the production version (doesn't shade upper rectangles correctly for non-default values). Otherwise the functions are identical).

(我在这里使用开发版本,因为在生产版本中,参数alpha存在缺陷(不为非默认值设置正确的上矩形)。否则函数是相同的。

library(devtools)
dev_mode(TRUE) # in case you don't want a permanent install
install_github("survMisc", "dardisco")
library("survMisc", lib.loc="C:/Users/c/R-dev") # or wherever you/devtools has put it
data(kidney, package="KMsurv")
p1 <- autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
               type="fill", survSize=2, palette="Pastel1",
               fillLineSize=0.1, alpha=0.4)$plot
p1 + theme_classic()
dev_mode(FALSE)

giving:

给:

用ggplot2绘制R的生存曲线。

And for a classic plot and table:

对于一个经典的情节和桌子:

autoplot(autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
                  type="CI"))

用ggplot2绘制R的生存曲线。

See ?survMisc::autoplot.survfit and ?survMisc::autoplot.tableAndPlot for more options.

看到了什么? survMisc::autoplot。survfit和? survMisc::autoplot。tableAndPlot更多选项。

#2


0  

I wanted to do the same thing and also got the error from the cartesian error. In addition I wanted to have numbers of censored in my code and numbers of events. So I wrote this little snippet. Still a bit raw but maybe useful for some.

我想做同样的事情,也得到了笛卡尔误差的误差。此外,我还想在我的代码和事件数量上进行审查。我写了这个小片段。还有些生疏,但可能对一些人有用。

ggsurvplot<-function(  
  time, 
  event, 
  event.marker=1, 
  marker,
  tabletitle="tabletitle", 
  xlab="Time(months)", 
  ylab="Disease Specific Survival", 
  ystratalabs=c("High", "Low"),
  pv=TRUE,
  legend=TRUE, 
  n.risk=TRUE,
  n.event=TRUE,
  n.cens=TRUE,
  timeby=24, 
  xmax=120,
  panel="A")

{
  require(ggplot2)
  require(survival)
  require(gridExtra)

  s.fit=survfit(Surv(time, event==event.marker)~marker)
  s.diff=survdiff(Surv(time, event=event.marker)~marker)


  #Build a data frame with all the data
  sdata<-data.frame(time=s.fit$time, 
                    surv=s.fit$surv, 
                    lower=s.fit$lower, 
                    upper=s.fit$upper,
                    n.censor=s.fit$n.censor,
                    n.event=s.fit$n.event,
                    n.risk=s.fit$n.risk)
  sdata$strata<-rep(names(s.fit$strata), s.fit$strata)
  m <- max(nchar(ystratalabs))
  if(xmax<=max(sdata$time)){
    xlims=c(0, round(xmax/timeby, digits=0)*timeby)
  }else{
    xlims=c(0, round((max(sdata$time))/timeby, digits=0)*timeby)
  }
  times <- seq(0, max(xlims), by = timeby)
  subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
  strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs])
  time = summary(s.fit, time = times, extend = TRUE)$time


  #Buidling the plot basics
  p<-ggplot(data = sdata, aes(colour = strata, group = strata, shape=strata)) + 
                        theme_classic()+
                        geom_step(aes(x = time, y = surv), direction = "hv")+
                        scale_x_continuous(breaks=times)+ 
                        scale_y_continuous(breaks=seq(0,1,by=0.1)) +
                        geom_ribbon(aes(x = time, ymax = upper, ymin = lower, fill = strata), directions = "hv", linetype = 0,alpha = 0.10) + 
                        geom_point(data = subset(sdata, n.censor == 1), aes(x = time, y = surv), shape = 3) + 
                        labs(title=tabletitle)+
                        theme(
                          plot.margin=unit(c(1,0.5,(2.5+length(levels(factor(marker)))*2),2), "lines"),
                          legend.title=element_blank(),
                          legend.background=element_blank(),
                          legend.position=c(0.2,0.2))+
                        scale_colour_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        scale_shape_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        scale_fill_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        xlab(xlab)+
                        ylab(ylab)+
                        coord_cartesian(xlim = xlims, ylim=c(0,1)) 

                        #addping the p-value
                        if (pv==TRUE){
                                pval <- 1 - pchisq(s.diff$chisq, length(s.diff$n) - 1)
                                pvaltxt<-if(pval>=0.001){
                                              paste0("P = ", round(pval, digits=3))
                                          }else{
                                              "P < 0.001"
                                          }
                                          p <- p + annotate("text", x = 0.85 * max(xlims), y = 0.1, label = pvaltxt)
                        }

                        #adding information for tables
                        times <- seq(0, max(xlims), by = timeby)
                        subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)

                        risk.data<-data.frame(strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]),
                                              time = summary(s.fit, time = times, extend = TRUE)$time[subs],
                                              n.risk = summary(s.fit,times = times,extend = TRUE)$n.risk[subs],
                                              n.cens = summary(s.fit, times=times, extend=TRUE)$n.cens[subs],
                                              n.event=summary(s.fit, times=times, extend=TRUE)$n.event[subs])
                        #adding the risk table 
                        if(n.risk==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=-0.15, label="Numbers at risk")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.15+(-0.05*q)), label=paste0(ystratalabs[q]))
                                    for(i in ((q-1)*length(times)+1):(q*length(times))){
                                          p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.15+(-0.05*q)), label=paste0(risk.data$n.risk[i]))
                                    }
                                }
                        }
                        #adding the event table 
                        if(n.event==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.20+(-0.05*length(levels(factor(marker))))), label="Number of events")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(ystratalabs[q]))
                                for(i in ((q-1)*length(times)+1):(q*length(times))){
                                    p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(risk.data$n.event[i]))
                                  }
                                }
                              }
                        #adding the cens table 
                        if(n.event==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.25+(-0.05*length(levels(factor(marker)))*2)), label="Number of censored")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(ystratalabs[q]))
                                for(i in ((q-1)*length(times)+1):(q*length(times))){
                                    p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(risk.data$n.cens[i]))
                                  }
                                }
                              }

                        #adding panel marker
                              p <- p + annotate("text", cex=10, x= -0.2*max(xlims), y=1.1, label=panel)
                        #drawing the plot with  the tables outside the margins
                              gt <- ggplot_gtable(ggplot_build(p))
                              gt$layout$clip[gt$layout$name=="panel"] <- "off"
                              grid.draw(gt)
}

#1


6  

You could try the following for something with shaded areas between CIs:

您可以尝试以下的方法,在CIs:

(I'm using the development version here as there's a flaw with the parameter alpha in the production version (doesn't shade upper rectangles correctly for non-default values). Otherwise the functions are identical).

(我在这里使用开发版本,因为在生产版本中,参数alpha存在缺陷(不为非默认值设置正确的上矩形)。否则函数是相同的。

library(devtools)
dev_mode(TRUE) # in case you don't want a permanent install
install_github("survMisc", "dardisco")
library("survMisc", lib.loc="C:/Users/c/R-dev") # or wherever you/devtools has put it
data(kidney, package="KMsurv")
p1 <- autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
               type="fill", survSize=2, palette="Pastel1",
               fillLineSize=0.1, alpha=0.4)$plot
p1 + theme_classic()
dev_mode(FALSE)

giving:

给:

用ggplot2绘制R的生存曲线。

And for a classic plot and table:

对于一个经典的情节和桌子:

autoplot(autoplot(survfit(Surv(time, delta) ~ type, data=kidney),
                  type="CI"))

用ggplot2绘制R的生存曲线。

See ?survMisc::autoplot.survfit and ?survMisc::autoplot.tableAndPlot for more options.

看到了什么? survMisc::autoplot。survfit和? survMisc::autoplot。tableAndPlot更多选项。

#2


0  

I wanted to do the same thing and also got the error from the cartesian error. In addition I wanted to have numbers of censored in my code and numbers of events. So I wrote this little snippet. Still a bit raw but maybe useful for some.

我想做同样的事情,也得到了笛卡尔误差的误差。此外,我还想在我的代码和事件数量上进行审查。我写了这个小片段。还有些生疏,但可能对一些人有用。

ggsurvplot<-function(  
  time, 
  event, 
  event.marker=1, 
  marker,
  tabletitle="tabletitle", 
  xlab="Time(months)", 
  ylab="Disease Specific Survival", 
  ystratalabs=c("High", "Low"),
  pv=TRUE,
  legend=TRUE, 
  n.risk=TRUE,
  n.event=TRUE,
  n.cens=TRUE,
  timeby=24, 
  xmax=120,
  panel="A")

{
  require(ggplot2)
  require(survival)
  require(gridExtra)

  s.fit=survfit(Surv(time, event==event.marker)~marker)
  s.diff=survdiff(Surv(time, event=event.marker)~marker)


  #Build a data frame with all the data
  sdata<-data.frame(time=s.fit$time, 
                    surv=s.fit$surv, 
                    lower=s.fit$lower, 
                    upper=s.fit$upper,
                    n.censor=s.fit$n.censor,
                    n.event=s.fit$n.event,
                    n.risk=s.fit$n.risk)
  sdata$strata<-rep(names(s.fit$strata), s.fit$strata)
  m <- max(nchar(ystratalabs))
  if(xmax<=max(sdata$time)){
    xlims=c(0, round(xmax/timeby, digits=0)*timeby)
  }else{
    xlims=c(0, round((max(sdata$time))/timeby, digits=0)*timeby)
  }
  times <- seq(0, max(xlims), by = timeby)
  subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)
  strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs])
  time = summary(s.fit, time = times, extend = TRUE)$time


  #Buidling the plot basics
  p<-ggplot(data = sdata, aes(colour = strata, group = strata, shape=strata)) + 
                        theme_classic()+
                        geom_step(aes(x = time, y = surv), direction = "hv")+
                        scale_x_continuous(breaks=times)+ 
                        scale_y_continuous(breaks=seq(0,1,by=0.1)) +
                        geom_ribbon(aes(x = time, ymax = upper, ymin = lower, fill = strata), directions = "hv", linetype = 0,alpha = 0.10) + 
                        geom_point(data = subset(sdata, n.censor == 1), aes(x = time, y = surv), shape = 3) + 
                        labs(title=tabletitle)+
                        theme(
                          plot.margin=unit(c(1,0.5,(2.5+length(levels(factor(marker)))*2),2), "lines"),
                          legend.title=element_blank(),
                          legend.background=element_blank(),
                          legend.position=c(0.2,0.2))+
                        scale_colour_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        scale_shape_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        scale_fill_discrete(
                          breaks=c(levels(factor(sdata$strata))),
                          labels=ystratalabs) +
                        xlab(xlab)+
                        ylab(ylab)+
                        coord_cartesian(xlim = xlims, ylim=c(0,1)) 

                        #addping the p-value
                        if (pv==TRUE){
                                pval <- 1 - pchisq(s.diff$chisq, length(s.diff$n) - 1)
                                pvaltxt<-if(pval>=0.001){
                                              paste0("P = ", round(pval, digits=3))
                                          }else{
                                              "P < 0.001"
                                          }
                                          p <- p + annotate("text", x = 0.85 * max(xlims), y = 0.1, label = pvaltxt)
                        }

                        #adding information for tables
                        times <- seq(0, max(xlims), by = timeby)
                        subs <- 1:length(summary(s.fit,times=times,extend = TRUE)$strata)

                        risk.data<-data.frame(strata = factor(summary(s.fit,times = times,extend = TRUE)$strata[subs]),
                                              time = summary(s.fit, time = times, extend = TRUE)$time[subs],
                                              n.risk = summary(s.fit,times = times,extend = TRUE)$n.risk[subs],
                                              n.cens = summary(s.fit, times=times, extend=TRUE)$n.cens[subs],
                                              n.event=summary(s.fit, times=times, extend=TRUE)$n.event[subs])
                        #adding the risk table 
                        if(n.risk==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=-0.15, label="Numbers at risk")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.15+(-0.05*q)), label=paste0(ystratalabs[q]))
                                    for(i in ((q-1)*length(times)+1):(q*length(times))){
                                          p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.15+(-0.05*q)), label=paste0(risk.data$n.risk[i]))
                                    }
                                }
                        }
                        #adding the event table 
                        if(n.event==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.20+(-0.05*length(levels(factor(marker))))), label="Number of events")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(ystratalabs[q]))
                                for(i in ((q-1)*length(times)+1):(q*length(times))){
                                    p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.20+(-0.05*length(levels(factor(marker))))+(-0.05*q)), label=paste0(risk.data$n.event[i]))
                                  }
                                }
                              }
                        #adding the cens table 
                        if(n.event==TRUE){ 
                                p<- p + annotate("text", cex=3, x=0.5*max(xlims), y=(-0.25+(-0.05*length(levels(factor(marker)))*2)), label="Number of censored")
                                for (q in 1:length(levels(factor(marker)))){          
                                    p<- p + annotate("text", cex=3, x=-0.15*max(xlims),y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(ystratalabs[q]))
                                for(i in ((q-1)*length(times)+1):(q*length(times))){
                                    p <- p + annotate("text", cex=3, x=risk.data$time[i], y=(-0.25+(-0.05*length(levels(factor(marker)))*2)+(-0.05*q)), label=paste0(risk.data$n.cens[i]))
                                  }
                                }
                              }

                        #adding panel marker
                              p <- p + annotate("text", cex=10, x= -0.2*max(xlims), y=1.1, label=panel)
                        #drawing the plot with  the tables outside the margins
                              gt <- ggplot_gtable(ggplot_build(p))
                              gt$layout$clip[gt$layout$name=="panel"] <- "off"
                              grid.draw(gt)
}