Bar plot with error bars in R

Here’s a simple way to make a bar plot with error bars three ways: standard deviation, standard error of the mean, and a 95% confidence interval. The key step is to precalculate the statistics for ggplot2.

n.per.group<-10
alpha<-0.05 # for a (1.00-alpha)=95% confidence interval

# Simulate raw data for an experiment or observational study.
data.raw <- data.frame(
	treatment=rep(c('A','B'), each=n.per.group),
	value=c(rnorm(n.per.group, 2), rnorm(n.per.group, 3))	
	)

# This data frame calculates statistics for each treatment.
data.summary <- data.frame(
	treatment=levels(data.raw$treatment),
	mean=tapply(data.raw$value, data.raw$treatment, mean),
	n=tapply(data.raw$value, data.raw$treatment, length),
	sd=tapply(data.raw$value, data.raw$treatment, sd)
	)

# Precalculate standard error of the mean (SEM)
data.summary$sem <- data.summary$sd/sqrt(data.summary$n)

# Precalculate margin of error for confidence interval
data.summary$me <- qt(1-alpha/2, df=data.summary$n)*data.summary$sem

# Load ggplot2 library
require(ggplot2)

# Use ggplot to draw the bar plot using the precalculated 95% CI.
png('barplot-ci.png') # Write to PNG
ggplot(data.summary, aes(x = treatment, y = mean)) +  
  geom_bar(position = position_dodge(), stat="identity", fill="blue") + 
  geom_errorbar(aes(ymin=mean-me, ymax=mean+me)) +
  ggtitle("Bar plot with 95% confidence intervals") + # plot title
  theme_bw() + # remove grey background (because Tufte said so)
  theme(panel.grid.major = element_blank()) # remove x and y major grid lines (because Tufte said so)
dev.off() # Close PNG

# Plot one standard error (standard error of the mean/SEM)
png('barplot-sem.png')
ggplot(data.summary, aes(x = treatment, y = mean)) +  
  geom_bar(position = position_dodge(), stat="identity", fill="blue") + 
  geom_errorbar(aes(ymin=mean-sem, ymax=mean+sem)) +
  ggtitle("Bar plot with standard error as error bars") + 
  theme_bw() +
  theme(panel.grid.major = element_blank())
dev.off()


# Plot one standard deviation
png('barplot-sd.png')
ggplot(data.summary, aes(x = treatment, y = mean)) +  
  geom_bar(position = position_dodge(), stat="identity", fill="blue") + 
  geom_errorbar(aes(ymin=mean-sd, ymax=mean+sd)) +
  ggtitle("Bar plot with standard deviation as error bars") + 
  theme_bw() + 
  theme(panel.grid.major = element_blank())
dev.off()

The plots:

barplot-ci

barplot-sem

barplot-sd

Tested with R 2.15.2, R 3.0.2, and ggplot2 0.9.3.1. Recently ggplot2 has gone under some changes, and this code won’t work in earlier versions of ggplot2.

About these ads

6 thoughts on “Bar plot with error bars in R

  1. I guess you intended to use

    data.summary$me <- qt(1- alpha /2, df=data.summary$n)*data.summary$sem

    in line 22, right?

    You created the alpha paramenter but didn't use it in the rest of the code

  2. I have an alternative if you want to have more options. Some comments are in spanish, but the principal idea is english. Enjoy!

    if(   !("plotrix" %in% installed.packages()) ){
      install.packages("plotrix")
    }
    library(plotrix)
    
    barplotSD <- function(x , factor, Xlab = '', Ylab=''){
      wmeans<-by(x,factor,mean)
      wsd<-by(x,factor,sd)
      plotCI(barplot(wmeans,col='gray',ylim=c(0,max(wmeans+wsd)), xlab=Xlab , ylab= Ylab),wmeans,wsd,add=TRUE)
      
    }
    
    barplotSD2 <- function(x , factor, Xlab = '', Ylab='' ,  YLIM=NA, COL='steelblue3', cmean = F, MoreSpace= 0,
                           SEP =F, BORDER=T, PLOTAXES= T, IZDA = 1,DCHA=1, LWD = 2){
      # cmean is a new parameter and distinguis two cases:
      # case = T the interval is for mean sd = sd_var / sqrt(n)
      # case = F the interval is for the variable sd = sd_var. It is the normal case, the other is for compare with plotmeanCH
      
      # MoreSpace is for when the graphic is to small and adds a 10^MoreSpace to the picture
      # SEP If we want separation then we can use
        # IZDA more space in x axis in left part
        # DCHA more space in x axis in right part
      # BORDER If we want figure with border or not
      # PLOTAxES  If we want figure with axis or not
      
      wmeans <- by(x,factor,FUN= function(x){
        mean(x, na.rm=T)
        })
      wsd <- by(x,factor,FUN= function(x){
        sd(x, na.rm=T)
      })
      
      if(cmean){
        n2   <- rep(1,length(x))
        n <-
          by(n2,factor,sum)
        n <- 1/sqrt(n)
        wsd <- wsd*n  
      }
      liwB 0
      uiwB <-                        # Pongo las barras del error dependendiendo si es negativo o positivo el valor
        sign(wmeans) < 0
      liw <- rep(NA, length(wmeans))
      uiw <- rep(NA, length(wmeans))
      liw[liwB] <- 0
      liw[!liwB] <- wsd[!liwB]
      uiw[uiwB] <- 0
      uiw[!uiwB] 0) & any(sign(wmeans)<0) ){
          YLIM 0)){
          YLIM <- c(
            0
            ,  
            max(abs(wmeans) + abs(wsd)) + 0.05 * max(abs(wsd) + abs(wmeans))
          )  
        }
        if(all(sign(wmeans)<0)){
          YLIM <- c(
            -max(abs(wmeans) + abs(wsd)) - 0.05 * max( abs(wsd) + abs(wmeans) )
            ,  
            0
          )  
         }
      }
    # 
    
    #   # Now rounds
    #   YLIM[YLIM<0] <- floor(YLIM[YLIM0] 0])
    #   if(MoreSpace != 0){                     # For manual reside y axis
    #     YLIM[YLIM<0] <- floor(YLIM[YLIM0] 0]) +10 ^ MoreSpace
    #   }else{
    #     if(YLIM[2] >= 0 & max(abs(YLIM)) > 2){                      # Automatic reside y axis.
    #       MoreSpace <- 
    #         nchar(as.character(YLIM[2])) -1
    #       
    #       if(MoreSpace != 1  ){
    #         YLIM[YLIM<0] <- floor(YLIM[YLIM0] 0]) +10 ^ (MoreSpace-1)
    #       }
    #       
    #     }else{
    #       if( max(abs(YLIM)) > 2){
    #         MoreSpace <- nchar(as.character(YLIM[2])) -2
    #         
    #         if(MoreSpace != 1){
    #           YLIM[YLIM<0] <- floor(YLIM[YLIM0] 0]) +10 ^ (MoreSpace-1)
    #         }
    #       
    #       }
    #       
    #     }
    #   }
      if(SEP == T){
        plotCI(barplot(wmeans,ylim=YLIM, xlim= c(-0.5*IZDA, length(wmeans)+0.5*DCHA), col= COL, xlab=Xlab , ylab= Ylab,border=BORDER, axes= PLOTAXES),wmeans,wsd,add=TRUE,pch =NA, liw=liw , uiw=uiw , lwd= LWD)  
      }else{
        plotCI(barplot(wmeans,ylim=YLIM, col= COL, xlab=Xlab , ylab= Ylab,border=BORDER, , axes= PLOTAXES),wmeans,wsd,add=TRUE,pch =NA, liw=liw , uiw=uiw )  
        print("IZDA and DCHA parameters do not be used")
      }
      
      
    }
    
    # ############################
    # library(plotrix)
    # data(warpbreaks)
    
    # attach(warpbreaks)
    # 
    # wmeans<-by(breaks,tension,mean)
    # wsd<-by(breaks,tension,sd)
    # ## note that barplot() returns the midpoints of the bars, which plotCI
    # ##  uses as x-coordinates
    # plotCI(barplot(wmeans,col='gray',ylim=c(0,max(wmeans+wsd))),wmeans,wsd,add=TRUE)
    # 
    # ############################
    
    barplotSD2(x= mtcars$mpg, factor= mtcars$cyl, BORDER= F)
    
  3. Those of us who use
    options(stringsAsFactors = FALSE)

    need to explicitly make A and B factors
    treatment=rep(factor(c(‘A’,’B’)), each=n.per.group)
    - Aaron

  4. If you really want to do “dynamite plots” (I like this, LOL) why not simply use barplot() with arrows()?
    e.g.

    mymean <- tapply(data.raw$value, data.raw$treatment, mean)
    mysd <- tapply(data.raw$value, data.raw$treatment, sd)
    bp <- barplot(mymean, ylim = c(0,max(mymean+mysd)+.1), main="Dynamite Plots with SD")
    arrows(bp,mymean,bp,mymean+mysd, angle = 90)
    arrows(bp,mymean,bp,mymean-mysd, angle = 90)

    -didi

Leave a Reply

Fill in your details below or click an icon to log in:

WordPress.com Logo

You are commenting using your WordPress.com account. Log Out / Change )

Twitter picture

You are commenting using your Twitter account. Log Out / Change )

Facebook photo

You are commenting using your Facebook account. Log Out / Change )

Google+ photo

You are commenting using your Google+ account. Log Out / Change )

Connecting to %s