Load the data

library(ggplot2)
load("/hpf/largeprojects/MICe/jason/MISS2017/intro-stats/irData.RData")
head(irData)
##   Sex Dose dentategyrus
## 1   M    0     7.139792
## 2   M    3     6.553313
## 3   M    7     6.307816
## 4   F    0     7.339992
## 5   F    3     6.530249
## 6   F    5     6.319855

This is from the initial radiation study described by Elizabeth this morning, with more information to be found here:

Gazdzinski LM, Cormier K, Lu FG, Lerch JP, Wong CS, Nieman BJ. Radiation-induced alterations in mouse brain development characterized by magnetic resonance imaging. Int J Radiat Oncol Biol Phys. 2012 Dec 1;84(5):e631–8.

Based on that data, we will look at the dentate gyrus of the hippocampus to illustrate some issues in statistical modelling.

Plotting

First, a few ways of showing the data:

qplot(Dose, dentategyrus, data=irData, geom="point") + 
  geom_smooth(method="lm") +
  scale_x_continuous(breaks=unique(irData$Dose)) + 
  ylab(expression(Volume ~ (mm^3))) +
  ggtitle("Volume of Dentate Gyrus", subtitle = "Plotted against Irradiation Dose") + 
  theme_classic() 

ggplot(irData, aes(y=dentategyrus, x=as.factor(Dose))) + 
  geom_boxplot() + 
  xlab("Radiation Dose") + 
  ggtitle("Volume of Dentate Gyrus") + 
  ylab(expression(Volume ~ (mm^3))) +
  theme_classic()

library(ggjoy)
ggplot(irData, aes(x=dentategyrus, y=as.factor(Dose))) + 
  ylab("Radiation Dose") +
  xlab(expression(Volume ~ (mm^3))) +
  ggtitle("Volume of Dentate Gyrus") +
  geom_joy()
## Picking joint bandwidth of 0.151

Modelling

Let’s start simple, and reduce it to just a dose of 0 (i.e. sham irradiation) and a dose of 7.

irSimple <- subset(irData, Dose %in% c(0,7))

And look at what the data shows

table(irSimple$Dose, irSimple$Sex)
##    
##     F M
##   0 6 4
##   7 5 5

A simple linear model

summary(lm(dentategyrus ~ Dose, irSimple))
## 
## Call:
## lm(formula = dentategyrus ~ Dose, data = irSimple)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.66907 -0.21547  0.05738  0.20480  0.33295 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.00704    0.08642  81.082  < 2e-16 ***
## Dose        -0.13822    0.01746  -7.917 2.84e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.2733 on 18 degrees of freedom
## Multiple R-squared:  0.7769, Adjusted R-squared:  0.7645 
## F-statistic: 62.68 on 1 and 18 DF,  p-value: 2.842e-07

This is in units of dose, i.e. for every Gy you decrease volume by -0.14.

So, highly significant. Let’s explore

Significance by simulation:

m <- mean(irSimple$dentategyrus[irSimple$Dose==0])
s <- sd(irSimple$dentategyrus[irSimple$Dose==0])

nSims <- 1000
simDraws <- vector(length = nSims)
nSamples <- 10
doseEffect <- -0.13822


for (i in 1:nSims) {
  simDraws[i] <- mean( rnorm(nSamples, mean=m, sd=s))
}
hist(simDraws)
abline(v=m+doseEffect, col="red")

mean(simDraws < (m+doseEffect))
## [1] 0.025
mean(simDraws < (m+(doseEffect*7)))
## [1] 0

By permutations

library(broom)
nSims <- 1000
nSamples <- nrow(irSimple)
doseEffects <- vector(length=nSims)

for (i in 1:nSims) {
  irSimple$doseDraw <- sample(irSimple$Dose, size = nSamples, replace = FALSE)
  l <- lm(dentategyrus ~ doseDraw, irSimple)
  doseEffects[i] <- tidy(l)[2, "statistic"]
}
hist(doseEffects)

mean(doseEffects < -7.917)
## [1] 0

Let’s look at a more subtle effect: sex in mice with sham irradiation:

irSex <- subset(irData, Dose == 0)
table(irSex$Sex)
## 
## F M 
## 6 4
summary(lm(dentategyrus ~ Sex, irSex))
## 
## Call:
## lm(formula = dentategyrus ~ Sex, data = irSex)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.25432 -0.18537  0.00292  0.14126  0.34641 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  6.99358    0.08858   78.95 7.38e-13 ***
## SexM         0.03365    0.14006    0.24    0.816    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.217 on 8 degrees of freedom
## Multiple R-squared:  0.007164,   Adjusted R-squared:  -0.1169 
## F-statistic: 0.05772 on 1 and 8 DF,  p-value: 0.8162

And let’s redo the permutation test

library(broom)
nSims <- 1000
nSamples <- nrow(irSex)
sexEffects <- vector(length=nSims)

for (i in 1:nSims) {
  irSex$sexDraw <- sample(irSex$Sex, size = nSamples, replace = FALSE)
  l <- lm(dentategyrus ~ sexDraw, irSex)
  sexEffects[i] <- tidy(l)[2, "statistic"]
}
hist(sexEffects)
abline(v=0.03365, col="red")

mean(sexEffects > 0.03365)
## [1] 0.483
mean(sexEffects > 0.03365) * 2
## [1] 0.966

More complex models

Back to the whole data

summary(lm(dentategyrus ~ Dose, irData))
## 
## Call:
## lm(formula = dentategyrus ~ Dose, data = irData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.72028 -0.19763 -0.00194  0.19083  0.70942 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.03794    0.08560  82.219  < 2e-16 ***
## Dose        -0.13532    0.01892  -7.151 1.33e-08 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3098 on 39 degrees of freedom
## Multiple R-squared:  0.5673, Adjusted R-squared:  0.5563 
## F-statistic: 51.14 on 1 and 39 DF,  p-value: 1.329e-08

Let’s add sex

qplot(Dose, dentategyrus, data=irData, geom="point", colour=Sex) + 
  geom_smooth(method="lm") +
  scale_x_continuous(breaks=unique(irData$Dose)) + 
  ylab(expression(Volume ~ (mm^3))) +
  ggtitle("Volume of Dentate Gyrus", subtitle = "Plotted against Irradiation Dose") + 
  theme_classic()

summary(lm(dentategyrus ~ Dose * Sex, irData))
## 
## Call:
## lm(formula = dentategyrus ~ Dose * Sex, data = irData)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.64933 -0.16230  0.03966  0.18228  0.62998 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  7.02235    0.11188  62.768  < 2e-16 ***
## Dose        -0.14752    0.02548  -5.789 1.21e-06 ***
## SexM         0.04128    0.17350   0.238    0.813    
## Dose:SexM    0.02295    0.03814   0.602    0.551    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.3095 on 37 degrees of freedom
## Multiple R-squared:  0.5905, Adjusted R-squared:  0.5573 
## F-statistic: 17.78 on 3 and 37 DF,  p-value: 2.601e-07