library(ggplot2)
load("/hpf/largeprojects/MICe/jason/MISS2017/intro-stats/irData.RData")
head(irData)
## Sex Dose dentategyrus
## 1 M 0 7.139792
## 2 M 3 6.553313
## 3 M 7 6.307816
## 4 F 0 7.339992
## 5 F 3 6.530249
## 6 F 5 6.319855
This is from the initial radiation study described by Elizabeth this morning, with more information to be found here:
Gazdzinski LM, Cormier K, Lu FG, Lerch JP, Wong CS, Nieman BJ. Radiation-induced alterations in mouse brain development characterized by magnetic resonance imaging. Int J Radiat Oncol Biol Phys. 2012 Dec 1;84(5):e631–8.
Based on that data, we will look at the dentate gyrus of the hippocampus to illustrate some issues in statistical modelling.
First, a few ways of showing the data:
qplot(Dose, dentategyrus, data=irData, geom="point") +
geom_smooth(method="lm") +
scale_x_continuous(breaks=unique(irData$Dose)) +
ylab(expression(Volume ~ (mm^3))) +
ggtitle("Volume of Dentate Gyrus", subtitle = "Plotted against Irradiation Dose") +
theme_classic()
ggplot(irData, aes(y=dentategyrus, x=as.factor(Dose))) +
geom_boxplot() +
xlab("Radiation Dose") +
ggtitle("Volume of Dentate Gyrus") +
ylab(expression(Volume ~ (mm^3))) +
theme_classic()
library(ggjoy)
ggplot(irData, aes(x=dentategyrus, y=as.factor(Dose))) +
ylab("Radiation Dose") +
xlab(expression(Volume ~ (mm^3))) +
ggtitle("Volume of Dentate Gyrus") +
geom_joy()
## Picking joint bandwidth of 0.151
Let’s start simple, and reduce it to just a dose of 0 (i.e. sham irradiation) and a dose of 7.
irSimple <- subset(irData, Dose %in% c(0,7))
And look at what the data shows
table(irSimple$Dose, irSimple$Sex)
##
## F M
## 0 6 4
## 7 5 5
A simple linear model
summary(lm(dentategyrus ~ Dose, irSimple))
##
## Call:
## lm(formula = dentategyrus ~ Dose, data = irSimple)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.66907 -0.21547 0.05738 0.20480 0.33295
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.00704 0.08642 81.082 < 2e-16 ***
## Dose -0.13822 0.01746 -7.917 2.84e-07 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.2733 on 18 degrees of freedom
## Multiple R-squared: 0.7769, Adjusted R-squared: 0.7645
## F-statistic: 62.68 on 1 and 18 DF, p-value: 2.842e-07
This is in units of dose, i.e. for every Gy you decrease volume by -0.14.
So, highly significant. Let’s explore
Significance by simulation:
m <- mean(irSimple$dentategyrus[irSimple$Dose==0])
s <- sd(irSimple$dentategyrus[irSimple$Dose==0])
nSims <- 1000
simDraws <- vector(length = nSims)
nSamples <- 10
doseEffect <- -0.13822
for (i in 1:nSims) {
simDraws[i] <- mean( rnorm(nSamples, mean=m, sd=s))
}
hist(simDraws)
abline(v=m+doseEffect, col="red")
mean(simDraws < (m+doseEffect))
## [1] 0.025
mean(simDraws < (m+(doseEffect*7)))
## [1] 0
By permutations
library(broom)
nSims <- 1000
nSamples <- nrow(irSimple)
doseEffects <- vector(length=nSims)
for (i in 1:nSims) {
irSimple$doseDraw <- sample(irSimple$Dose, size = nSamples, replace = FALSE)
l <- lm(dentategyrus ~ doseDraw, irSimple)
doseEffects[i] <- tidy(l)[2, "statistic"]
}
hist(doseEffects)
mean(doseEffects < -7.917)
## [1] 0
Let’s look at a more subtle effect: sex in mice with sham irradiation:
irSex <- subset(irData, Dose == 0)
table(irSex$Sex)
##
## F M
## 6 4
summary(lm(dentategyrus ~ Sex, irSex))
##
## Call:
## lm(formula = dentategyrus ~ Sex, data = irSex)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.25432 -0.18537 0.00292 0.14126 0.34641
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 6.99358 0.08858 78.95 7.38e-13 ***
## SexM 0.03365 0.14006 0.24 0.816
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.217 on 8 degrees of freedom
## Multiple R-squared: 0.007164, Adjusted R-squared: -0.1169
## F-statistic: 0.05772 on 1 and 8 DF, p-value: 0.8162
And let’s redo the permutation test
library(broom)
nSims <- 1000
nSamples <- nrow(irSex)
sexEffects <- vector(length=nSims)
for (i in 1:nSims) {
irSex$sexDraw <- sample(irSex$Sex, size = nSamples, replace = FALSE)
l <- lm(dentategyrus ~ sexDraw, irSex)
sexEffects[i] <- tidy(l)[2, "statistic"]
}
hist(sexEffects)
abline(v=0.03365, col="red")
mean(sexEffects > 0.03365)
## [1] 0.483
mean(sexEffects > 0.03365) * 2
## [1] 0.966
Back to the whole data
summary(lm(dentategyrus ~ Dose, irData))
##
## Call:
## lm(formula = dentategyrus ~ Dose, data = irData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.72028 -0.19763 -0.00194 0.19083 0.70942
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.03794 0.08560 82.219 < 2e-16 ***
## Dose -0.13532 0.01892 -7.151 1.33e-08 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3098 on 39 degrees of freedom
## Multiple R-squared: 0.5673, Adjusted R-squared: 0.5563
## F-statistic: 51.14 on 1 and 39 DF, p-value: 1.329e-08
Let’s add sex
qplot(Dose, dentategyrus, data=irData, geom="point", colour=Sex) +
geom_smooth(method="lm") +
scale_x_continuous(breaks=unique(irData$Dose)) +
ylab(expression(Volume ~ (mm^3))) +
ggtitle("Volume of Dentate Gyrus", subtitle = "Plotted against Irradiation Dose") +
theme_classic()
summary(lm(dentategyrus ~ Dose * Sex, irData))
##
## Call:
## lm(formula = dentategyrus ~ Dose * Sex, data = irData)
##
## Residuals:
## Min 1Q Median 3Q Max
## -0.64933 -0.16230 0.03966 0.18228 0.62998
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 7.02235 0.11188 62.768 < 2e-16 ***
## Dose -0.14752 0.02548 -5.789 1.21e-06 ***
## SexM 0.04128 0.17350 0.238 0.813
## Dose:SexM 0.02295 0.03814 0.602 0.551
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3095 on 37 degrees of freedom
## Multiple R-squared: 0.5905, Adjusted R-squared: 0.5573
## F-statistic: 17.78 on 3 and 37 DF, p-value: 2.601e-07