Middleton Lab bio photo

Middleton Lab

University of Missouri - Integrative Anatomy

Email Github

Software

Overview

I have written a few R packages and collected miscellaneous code and data that are of (potential) use. Sources for most of these can also be found on my github page. The links at the right take you to installation instructions, demo code and links to the full HTML manual for each.

Most of these packages can be installed directly via the install_github() function in the devtools package:

require(devtools)
install_github("package_name", "kmiddleton")

Where package_name is the package you want to install. Note that if you are using Windows, you will first need to install Rtools.

abd

The abd package (co-developed with Randall Pruim at Calvin College) contains data sets and example code for the biological statistics textbook The Analysis of Biological Data (Whitlock and Schluter, 2009). The package is available at CRAN (http://cran.r-project.org/web/packages/abd/), or the most recent version can be installed within R using the command:

require(devtools)
install_github("abd", "kmiddleton")

Examples

trellis.par.set(theme=col.abd())   # set color theme
abdData(3)                         # look for data sets in chapter 3
abdData('Finch')                   # look for data sets with 'finch' in name
histogram(~ count, DesertBirds,
          xlab = "Abundance")

Desert bird abundances

xyplot(relative.frequency ~ hemoglobin | group, Hemoglobin,
       type ='h', 
       lwd = 4, 
       layout = c(1, 4))

Hemoglobin

jnt

The jnt package performs the Johnson-Neyman Technique, which can be used to determine regions of insignificant slope differences in ANCOVA analyses.

The package can also be installed directly within R, using the command:

require(devtools)
install_github("jnt", "kmiddleton")
library("jnt")
demo(jnt)

Johnson-Neyman Plot

Zar5

The Zar5 package contains data sets and example code for Biostatistical Analysis (5th Ed.) by Jerrold Zar. The package can also be installed directly within R, using the command:

require(devtools)
install_github("zar5", "kmiddleton")

Examples are then organized by chapter and number:

library("Zar5")
demo(ex12.01)

kmmisc

I also keep a github repository of R examples https://github.com/kmiddleton/rexamples. Some of these functions have been rolled into an R package kmmisc (also available at github: https://github.com/kmiddleton/kmmisc). Installation directions for the latter are available online.

Examples using RStudio’s manipulate() function

RStudio is an excellent integrated development environment for the R statistical language. In addition to providing a friendly environment for R, RStudio has the very outstanding manipulate() function, which can be used, among other things, to change plots dynamically.

Shape of the Binomial Distribution

library(manipulate)

# lattice version
library(lattice)
binom <- function(p = 0.5, n = 18) {
  barchart( dbinom(0:n, n, p) ~ as.factor(0:n), 
                   horizontal = FALSE, 
                   ylab = "Probablility", 
                   xlab = "X Successes",
                   scales = list(axs = "i"))
}

# ggplot2 version
library(ggplot2)
binom <- function(p = 0.5, n = 18) {
  dd <- data.frame(Successes = as.factor(0:n), 
                   Probability = dbinom(0:n, n, p))
  ggplot(dd, aes(Successes, Probability)) + 
    geom_bar(stat = "identity")
}

manipulate( binom(n = n), 
            n = slider(2, 50) )
manipulate( binom(p = p), 
            p = slider(0, 1) )
manipulate( binom(p = p, n = n), 
            p = slider(0, 1), 
            n = slider(2, 50) )

Shape of the Poisson Distribution

library(manipulate)

# lattice version
library(lattice)
poisson_dist <- function (lambda, nmax){
  barplot(dpois(0:nmax, lambda = lambda),
          names.arg = 0:nmax,
          ylim = c(0, 0.4), col = "red",
          xlab = "X Successes", ylab = "Probability",
          main = bquote(mu == .(lambda)), 
          xaxs = "i", yaxs = "i",
          cex.axis = 1.5, cex.lab = 1.5,
          cex.main = 2,
          mgp = c(2.5, 0.8, 0))
}

# ggplot2 version
poisson_dist <- function (lambda, nmax){
  dd <- data.frame(Successes = as.factor(0:nmax),
                   Probability = dpois(0:nmax, lambda = lambda))
  ggplot(dd, aes(Successes, Probability)) + geom_bar(stat="identity")
}

manipulate( poisson_dist(lambda, nmax), 
            lambda = slider(0, 30, step = 1),
            nmax = slider(5, 30, step = 1))

Shape of the χ2 Distribution

library(manipulate)

chisq_dist <- function (df) {
  crit <- qchisq(0.95, df)
  curve(dchisq(x, df), from = 0, to = 40,
        xlab = expression(chi^2), ylab = "Probability",
        main = paste("df =", df, "  Critical value =",
          format(crit, digits = 3)), 
        lwd = 2,
        xaxs = "i", yaxs = "i",
        cex.axis = 1.5, cex.lab = 1.5, cex.main = 1.5,
        mgp = c(2.5, 0.8, 0))
  x <- seq(crit, 40, length = 100)
  y <- dchisq(x, df)
  polygon(c(x[1], x, x[100]), c(0, y, 0),
          col = "red", border = NA)
}
manipulate( chisq_dist(df), df = slider(1, 20) )

Shape of the Normal Distribution

library(manipulate)

normal_dist <- function(mu, s){
  x <- seq(mu - 3 * s, mu + 3 * s, by = 0.1)
  curve(dnorm(x, mu, sd = s),
    n = 201,
    col = "red", lwd = 2,
    xlab = "Y", ylab = "Probability",
    xaxs = "i", yaxs = "i",
    xlim = c(-20, 20), ylim = c(0, 0.45),
    cex.axis = 1.5, cex.lab = 1.5, cex.main = 2,
    mgp = c(2.5, 0.8, 0))
    text(-18, 0.4, bquote(mu == .(mu)), adj = 0)
    text(-18, 0.35, bquote(sigma == .(s)), adj = 0)
}
manipulate(normal_dist(mu, s), mu = slider(-10, 10), s = slider(0.01, 10))

Normal Approximation of the Binomial Distribution

library(manipulate)

normal_approx <- function (p){
  i <- 30
  plot(dbinom(1:i, i, p), type = "h",
    lwd = 5, cex.main = 2, cex.axis = 1.4, cex.lab = 1.4,
    ylab = "Probability", xlab = paste("p =", p),
    ylim = c(0, 0.20), main = paste("n =", i))
  ybar <- i * p
  ysd <- sqrt(i * p * (1-p))
  x <- seq(0, 60, length = 200)
  y <- dnorm(x, mean = ybar, sd = ysd)
  lines(x, y, col = "red", lwd = 3)
}
manipulate( normal_approx(p), p = slider(0.1, 0.9) )

Relationship between the Z and t-distributions

library(manipulate)

Z_from_t <- function (n){
   df <- n - 1
    curve(dnorm(x), from = -4, to = 4,
      lwd = 8, mgp = c(2.5, 0.75, 0),
      cex.main = 2, cex.axis = 1.5, cex.lab = 1.5,
      ylab = "Probability",
      xlab = paste("df =", df),
      main = paste("n =", n))
    legend("topright", c("Z", "t"), col = c("black", "red"), lwd = 2)
    x <- seq(-4, 4, length = 100)
    y <- dt(x, df = df)
    lines(x, y, col = "red", lwd = 6)
  }
manipulate( Z_from_t(n), n = slider(2, 50) )

Interactive Sum-of-Squares Plot

library(manipulate)

ssPlot <- function(X, Y, b){
  n <- length(X)
  SSy <- sum((Y - (X * b + (mean(Y) - b * mean(X))))^2)
  par(cex.lab = 2, cex.main = 2, mgp = c(2.5, 0.5, 0))
  plot(X, Y, type = "n",
      main = paste("b =", sprintf("%.2f", b), "\nSS =", 
                   sprintf("%.2f", SSy)))
  points(mean(X), mean(Y), pch = 1, cex = 4, col = "blue")
  abline(a = mean(Y) - b * mean(X), b = b, col = "blue")
  for (i in 1:n){
   segments(X[i], Y[i], X[i], X[i] * b + (mean(Y) - b * mean(X)), 
            col = "red")
  }
  points(X, Y, pch = 16)
}
n <- 30
X <- rnorm(n, mean = 10)
Y <- 2.4 * X + rnorm(n, mean = 1)
manipulate(ssPlot(X, Y, b), b = slider(-10, 10, step = 0.1))

binning

The binning package is used to analyze mouse wheel running data.

The package can also be installed directly within R, using the command:

require(devtools)
install_github("binning", "kmiddleton")

Earthquake Data

Update: Earthquake data collection ended on 15 June 2012. There are over 58,000 earthquakes in the files below.

Since late March, 2010 (a few days before the Baja California Earthquake), I have been collecting data from the USGS website on earthquakes in California and Nevada. This provides a handy (if increasingly large) data set for visualization and geolocation. Every day at noon, a shell script executes an R script that downloads the latest data, extracts the relevant information (using the XML package), appends it to the old data, and uploads it to the Department of Biology web server.

Download the latest earthquakes file: