Analysing 100m-dash data in London Olympic game

Reading data

  • From oucomes in London Olympic game
  • 1st round 100m-dash race
  • Result (run time in second)
  • Reaction (reaction time in second)
  • Cate and Cate 2 (see below)

Note: “The reaction time is the time takes for the runner to respond to the start signal and begin leaving the starting blocks”. If an athlete left the blocks sooner than 0.1 second after the start signal, he/she was deemed to have false-started.

Question: Difference among the groups?

Cate
- Finalist
- Semi finalist (Sfinalist)
- Did not qualified in the first round (DNQ)

Cate2
- Qualified in the first round (Q)
- Did not qualified in the first round (DNQ)

Data <- read.csv("FPA_Lec8_ReactionTime.csv", header=T)
head(Data[,-3],10)
##    Lane                  Name Reaction Result      Cate Cate2
## 1     6             Tyson Gay    0.147  10.08  Finalist     Q
## 2     5      Richard Thompson    0.151  10.14  Finalist     Q
## 3     7          Gerald Phiri    0.147  10.16 Sfinalist     Q
## 4     3   Jaysuma Saidy Ndure    0.166  10.28       DNQ   DNQ
## 5     4 Angel David Rodriguez    0.168  10.34       DNQ   DNQ
## 6     2         Jurgen Themen    0.169  10.53       DNQ   DNQ
## 7     5        Isidro Montoya    0.165  10.54       DNQ   DNQ
## 8     1       Yeo Foo Ee Gary    0.144  10.69       DNQ   DNQ
## 9     4         Justin Gatlin    0.200   9.97  Finalist     Q
## 10    6        Derrick Atkins    0.179  10.22 Sfinalist     Q
tail(Data[,-3],10)
##    Lane            Name Reaction Result      Cate Cate2
## 45    7   Jeremy Bascom    0.135  10.31       DNQ   DNQ
## 46    3      Marek Niit    0.158  10.40       DNQ   DNQ
## 47    1    Azneem Ahmed    0.157  10.84       DNQ   DNQ
## 48    8  Dwain Chambers    0.157  10.02 Sfinalist     Q
## 49    5    Jimmy Vicaut    0.196  10.11 Sfinalist     Q
## 50    4  Keston Bledman    0.195  10.13 Sfinalist     Q
## 51    6   Warren Fraser    0.171  10.27       DNQ   DNQ
## 52    7    Miguel L?pez    0.145  10.31       DNQ   DNQ
## 53    1  G?rard Kob?an?    0.186  10.48       DNQ   DNQ
## 54    2 Fabrice Coiffic    0.165  10.59       DNQ   DNQ
attach(Data)

Graphical presentation of basic data

par(mfrow=c(2,2))
#Plot1
plot(Lane, Result)
#Plot2
boxplot(Result~Lane, xlab="Lane")
points(Lane, Result, pch=20,  col=Cate)
#Plot3
plot(Result, Reaction, pch=20, col=Cate)

Graphical presentation with “ggplot2”

library(ggplot2)
library(gridExtra)
plot1 <- qplot(as.character(Lane), Result, xlab="Lane")
plot2 <- qplot(as.character(Lane), Result, geom="boxplot",xlab="Lane")
plot3 <- qplot(Result, Reaction, col=Cate)
plot4 <- qplot(Reaction, facets=Cate~., fill=Cate, data=Data)
grid.arrange(plot1, plot2, plot3, plot4, nrow=2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.


T-test (two-side, equal variance)

t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="Sfinalist"], var.equal=T)
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "Sfinalist"]
## t = 0.84523, df = 22, p-value = 0.4071
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.008721669  0.020721669
## sample estimates:
## mean of x mean of y 
##   0.17025   0.16425
t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="DNQ"], var.equal=T)
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "DNQ"]
## t = 1.5662, df = 36, p-value = 0.1261
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.002413515  0.018780181
## sample estimates:
## mean of x mean of y 
## 0.1702500 0.1620667
t.test(Reaction[Cate=="Sfinalist"],Reaction[Cate=="DNQ"], var.equal=T)
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Sfinalist"] and Reaction[Cate == "DNQ"]
## t = 0.51585, df = 44, p-value = 0.6085
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.006346699  0.010713365
## sample estimates:
## mean of x mean of y 
## 0.1642500 0.1620667

T-test (two-side, unequal variance = Welch’s test)

t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="Sfinalist"], var.equal=F)
## 
##  Welch Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "Sfinalist"]
## t = 0.83773, df = 13.779, p-value = 0.4165
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.009384582  0.021384582
## sample estimates:
## mean of x mean of y 
##   0.17025   0.16425
t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="DNQ"], var.equal=F)
## 
##  Welch Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "DNQ"]
## t = 1.2992, df = 9.0665, p-value = 0.2259
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.00604932  0.02241599
## sample estimates:
## mean of x mean of y 
## 0.1702500 0.1620667
t.test(Reaction[Cate=="Sfinalist"],Reaction[Cate=="DNQ"], var.equal=F)
## 
##  Welch Two Sample t-test
## 
## data:  Reaction[Cate == "Sfinalist"] and Reaction[Cate == "DNQ"]
## t = 0.47178, df = 24.113, p-value = 0.6413
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.007365757  0.011732424
## sample estimates:
## mean of x mean of y 
## 0.1642500 0.1620667

T-test (one-side)

t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="Sfinalist"], var.equal=T, alternative="greater")
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "Sfinalist"]
## t = 0.84523, df = 22, p-value = 0.2035
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.006189382          Inf
## sample estimates:
## mean of x mean of y 
##   0.17025   0.16425
t.test(Reaction[Cate=="Finalist"],Reaction[Cate=="DNQ"], var.equal=T, alternative="greater")
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Finalist"] and Reaction[Cate == "DNQ"]
## t = 1.5662, df = 36, p-value = 0.06303
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.0006380696           Inf
## sample estimates:
## mean of x mean of y 
## 0.1702500 0.1620667
t.test(Reaction[Cate=="Sfinalist"],Reaction[Cate=="DNQ"], var.equal=T, alternative="greater")
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate == "Sfinalist"] and Reaction[Cate == "DNQ"]
## t = 0.51585, df = 44, p-value = 0.3043
## alternative hypothesis: true difference in means is greater than 0
## 95 percent confidence interval:
##  -0.004928231          Inf
## sample estimates:
## mean of x mean of y 
## 0.1642500 0.1620667

Analysis of variance for “reaction time” (function “aov”)

Res.aov <- aov(Reaction~Cate)
summary(Res.aov)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## Cate         2 0.000425 0.0002127   1.066  0.352
## Residuals   51 0.010172 0.0001995

Pairwise T-test

Res.pttest <- pairwise.t.test(Reaction, Cate, p.adj="bonf",pool.sd=T)
Res.pttest
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  Reaction and Cate 
## 
##           DNQ  Finalist
## Finalist  0.45 -       
## Sfinalist 1.00 0.99    
## 
## P value adjustment method: bonferroni

Pairwise T-test

Res.pttest <- pairwise.t.test(Reaction, Cate, p.adj="bonf",pool.sd=F)
Res.pttest
## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  Reaction and Cate 
## 
##           DNQ  Finalist
## Finalist  0.68 -       
## Sfinalist 1.00 1.00    
## 
## P value adjustment method: bonferroni

AoV and t-test are same??

Res2.aov <- aov(Reaction~Cate2)
summary(Res2.aov)
##             Df   Sum Sq   Mean Sq F value Pr(>F)
## Cate2        1 0.000233 0.0002333   1.171  0.284
## Residuals   52 0.010364 0.0001993
Res2.ttest <- t.test(Reaction[Cate2=="Q"],Reaction[Cate2=="DNQ"])
Res2.ttest
## 
##  Welch Two Sample t-test
## 
## data:  Reaction[Cate2 == "Q"] and Reaction[Cate2 == "DNQ"]
## t = 1.0473, df = 41.434, p-value = 0.301
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003880969  0.012247635
## sample estimates:
## mean of x mean of y 
## 0.1662500 0.1620667
Res2.ttest <- t.test(Reaction[Cate2=="Q"],Reaction[Cate2=="DNQ"], var.equal=T)
Res2.ttest
## 
##  Two Sample t-test
## 
## data:  Reaction[Cate2 == "Q"] and Reaction[Cate2 == "DNQ"]
## t = 1.082, df = 52, p-value = 0.2843
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  -0.003575051  0.011941718
## sample estimates:
## mean of x mean of y 
## 0.1662500 0.1620667

Power and sample size

#Evaluation of power
power.t.test(n=50, delta=2, sd=2, sig.level=0.05, power=NULL)
## 
##      Two-sample t test power calculation 
## 
##               n = 50
##           delta = 2
##              sd = 2
##       sig.level = 0.05
##           power = 0.9986074
##     alternative = two.sided
## 
## NOTE: n is number in *each* group
#Evaluation of necessary sample size
power.t.test(n=NULL, delta=2, sd=2, sig.level=0.05, power=0.8)
## 
##      Two-sample t test power calculation 
## 
##               n = 16.71477
##           delta = 2
##              sd = 2
##       sig.level = 0.05
##           power = 0.8
##     alternative = two.sided
## 
## NOTE: n is number in *each* group

Analysing proportion of horse mackerel (Aji)

Reading data

Data2 <- read.csv("FPA_Lec8_horsemackerel.csv", header=T)
Data2 <- cbind(Data2, Prop=Data2$BD/Data2$SL)
attach(Data2)

Quick visual presentation

par(mfcol=c(2,3))
SP <- as.numeric(Species)
SPname <- levels(Species)
plot(BD~SL, pch=SP, col=SP, xlab="Standard Length (SL)", ylab="Body Depth (BD)")
legend(220, 85, legend=levels(Species), pch=1:4, col=1:4, bty="n") 
boxplot(Prop~Species, ylab="Proportion")

for(i in 1:length(SPname)) hist(Prop[SP==i], xlab="Prop", main=paste(SPname[i]), col="gray")

plot1 <- qplot(SL, BD, col=Species)
plot2 <- qplot(Species, Prop, geom="boxplot")
plot3 <- qplot(Species, Prop, geom="boxplot", col=Species)
plot4 <- qplot(Prop, facets=Species~., fill=Species, data=Data2)
grid.arrange(plot1, plot2, plot3, plot4, nrow=2, ncol=2)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Summary statistics

y <- split(Prop, Species)
y$Kaiwari
##  [1] 0.4893617 0.5074627 0.5056818 0.5080645 0.4916667 0.4556213 0.4528302
##  [8] 0.4962406 0.4714286 0.4771242 0.4838710 0.4887640 0.4842767 0.4838710
## [15] 0.4842767 0.4887640 0.4714286 0.4771242 0.4838710 0.4864865 0.4831461
## [22] 0.4410256 0.4462810 0.4410256 0.4462810 0.4410256 0.4462810 0.4831461
## [29] 0.4864865 0.4410256 0.4462810
length(y$Kaiwari)
## [1] 31
sapply(y, length)
## Kaiwari   Maaji Maruaji    Moro 
##      31      83      15      46
lapply(y, length)
## $Kaiwari
## [1] 31
## 
## $Maaji
## [1] 83
## 
## $Maruaji
## [1] 15
## 
## $Moro
## [1] 46
Ns <- sapply(y, length)
Mean <- sapply(y, mean)
SD <- sapply(y, sd)
data.frame(Nsample=Ns, Mean, SD)
##         Nsample      Mean         SD
## Kaiwari      31 0.4738781 0.02159171
## Maaji        83 0.2477849 0.02045780
## Maruaji      15 0.2273268 0.01283770
## Moro         46 0.1908011 0.01381693

One sample test (H0:mu=0.2)

lapply(y, t.test, mu=0.2)
## $Kaiwari
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = 70.624, df = 30, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0.2
## 95 percent confidence interval:
##  0.4659582 0.4817980
## sample estimates:
## mean of x 
## 0.4738781 
## 
## 
## $Maaji
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = 21.28, df = 82, p-value < 2.2e-16
## alternative hypothesis: true mean is not equal to 0.2
## 95 percent confidence interval:
##  0.2433179 0.2522520
## sample estimates:
## mean of x 
## 0.2477849 
## 
## 
## $Maruaji
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = 8.2442, df = 14, p-value = 9.634e-07
## alternative hypothesis: true mean is not equal to 0.2
## 95 percent confidence interval:
##  0.2202175 0.2344360
## sample estimates:
## mean of x 
## 0.2273268 
## 
## 
## $Moro
## 
##  One Sample t-test
## 
## data:  X[[i]]
## t = -4.5155, df = 45, p-value = 4.527e-05
## alternative hypothesis: true mean is not equal to 0.2
## 95 percent confidence interval:
##  0.1866980 0.1949042
## sample estimates:
## mean of x 
## 0.1908011
res.t.test <- lapply(y, t.test, mu=0.2)

Two sample test

# variance same? 
var.test(y[[2]],y[[3]])
## 
##  F test to compare two variances
## 
## data:  y[[2]] and y[[3]]
## F = 2.5395, num df = 82, denom df = 14, p-value = 0.05378
## alternative hypothesis: true ratio of variances is not equal to 1
## 95 percent confidence interval:
##  0.9838916 5.1560229
## sample estimates:
## ratio of variances 
##           2.539471
# t-test
t.test(y[[2]],y[[3]], var.equal=T)
## 
##  Two Sample t-test
## 
## data:  y[[2]] and y[[3]]
## t = 3.7332, df = 96, p-value = 0.0003205
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.009580294 0.031336083
## sample estimates:
## mean of x mean of y 
## 0.2477849 0.2273268
t.test(y[[2]],y[[3]])
## 
##  Welch Two Sample t-test
## 
## data:  y[[2]] and y[[3]]
## t = 5.1098, df = 28.765, p-value = 1.91e-05
## alternative hypothesis: true difference in means is not equal to 0
## 95 percent confidence interval:
##  0.01226681 0.02864956
## sample estimates:
## mean of x mean of y 
## 0.2477849 0.2273268

Analysis of variance (common variance)

res.lm <- lm(Prop~Species)
summary(res.lm)
## 
## Call:
## lm(formula = Prop ~ Species)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041118 -0.012597  0.002215  0.011178  0.043030 
## 
## Coefficients:
##                 Estimate Std. Error t value Pr(>|t|)    
## (Intercept)     0.473878   0.003342  141.80   <2e-16 ***
## SpeciesMaaji   -0.226093   0.003917  -57.73   <2e-16 ***
## SpeciesMaruaji -0.246551   0.005852  -42.13   <2e-16 ***
## SpeciesMoro    -0.283077   0.004324  -65.47   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01861 on 171 degrees of freedom
## Multiple R-squared:  0.9653, Adjusted R-squared:  0.9647 
## F-statistic:  1584 on 3 and 171 DF,  p-value: < 2.2e-16
res.lm <- lm(Prop~Species-1)
summary(res.lm)
## 
## Call:
## lm(formula = Prop ~ Species - 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041118 -0.012597  0.002215  0.011178  0.043030 
## 
## Coefficients:
##                Estimate Std. Error t value Pr(>|t|)    
## SpeciesKaiwari 0.473878   0.003342  141.80   <2e-16 ***
## SpeciesMaaji   0.247785   0.002042  121.32   <2e-16 ***
## SpeciesMaruaji 0.227327   0.004804   47.32   <2e-16 ***
## SpeciesMoro    0.190801   0.002743   69.55   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01861 on 171 degrees of freedom
## Multiple R-squared:  0.9959, Adjusted R-squared:  0.9958 
## F-statistic: 1.048e+04 on 4 and 171 DF,  p-value: < 2.2e-16
res.anova1 <- anova(res.lm)
res.anova1
## Analysis of Variance Table
## 
## Response: Prop
##            Df  Sum Sq Mean Sq F value    Pr(>F)    
## Species     4 14.5072  3.6268   10476 < 2.2e-16 ***
## Residuals 171  0.0592  0.0003                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Analysis of variance (heterogeneity in variance)

res.bartlett <- bartlett.test(Prop~Species)
res.bartlett
## 
##  Bartlett test of homogeneity of variances
## 
## data:  Prop by Species
## Bartlett's K-squared = 12.515, df = 3, p-value = 0.005813
res.anova2 <- oneway.test(Prop~Species, var.equal = FALSE)
res.anova2
## 
##  One-way analysis of means (not assuming equal variances)
## 
## data:  Prop and Species
## F = 1373.1, num df = 3.00, denom df = 54.97, p-value < 2.2e-16

pairwise t-test

pairwise.t.test(Prop,Species)
## 
##  Pairwise comparisons using t tests with pooled SD 
## 
## data:  Prop and Species 
## 
##         Kaiwari Maaji   Maruaji
## Maaji   < 2e-16 -       -      
## Maruaji < 2e-16 0.00013 -      
## Moro    < 2e-16 < 2e-16 9.8e-10
## 
## P value adjustment method: holm
pairwise.t.test(Prop,Species, pool.sd = FALSE)
## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  Prop and Species 
## 
##         Kaiwari Maaji   Maruaji
## Maaji   < 2e-16 -       -      
## Maruaji < 2e-16 1.9e-05 -      
## Moro    < 2e-16 < 2e-16 1.9e-09
## 
## P value adjustment method: holm
pairwise.t.test(Prop,Species, pool.sd = FALSE, p.adj = "bonf")
## 
##  Pairwise comparisons using t tests with non-pooled SD 
## 
## data:  Prop and Species 
## 
##         Kaiwari Maaji   Maruaji
## Maaji   < 2e-16 -       -      
## Maruaji < 2e-16 0.00011 -      
## Moro    < 2e-16 < 2e-16 5.7e-09
## 
## P value adjustment method: bonferroni

Analysis of covariance

Though the linear regression will be handled in the next lecture, but preliminary…

plot(Prop~SL, pch=SP, col=SP, xlab="Standard Length (SL)", ylab="Proportion")
legend(220, 0.45, legend=levels(Species), pch=1:4, col=1:4, bty="n") 

res.lm2 <- lm(Prop~Species + SL - 1)
summary(res.lm2)
## 
## Call:
## lm(formula = Prop ~ Species + SL - 1)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.041877 -0.011140  0.000861  0.012130  0.042925 
## 
## Coefficients:
##                  Estimate Std. Error t value Pr(>|t|)    
## SpeciesKaiwari  4.947e-01  8.710e-03  56.798   <2e-16 ***
## SpeciesMaaji    2.697e-01  8.723e-03  30.922   <2e-16 ***
## SpeciesMaruaji  2.488e-01  9.549e-03  26.054   <2e-16 ***
## SpeciesMoro     2.193e-01  1.135e-02  19.327   <2e-16 ***
## SL             -1.413e-04  5.463e-05  -2.586   0.0106 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.01831 on 170 degrees of freedom
## Multiple R-squared:  0.9961, Adjusted R-squared:  0.996 
## F-statistic:  8660 on 5 and 170 DF,  p-value: < 2.2e-16
res.ancova <- anova(res.lm2)
res.ancova
## Analysis of Variance Table
## 
## Response: Prop
##            Df  Sum Sq Mean Sq   F value  Pr(>F)    
## Species     4 14.5072  3.6268 10823.831 < 2e-16 ***
## SL          1  0.0022  0.0022     6.686 0.01056 *  
## Residuals 170  0.0570  0.0003                      
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1