Traffic.R
- Notes
Code snipits and programs written in R, S or S-PLUS{{#Security:*|Sven}}
- A) unpaired non-parametric and parametric tests
- Wilcoxon and t-test
- T-tests assume) that n is large
- 1) ---------------------------- Load datasets ------------------------------- #
setwd("/home/sven/Desktop/Consulting/Duncan") dataDir <- "Data" dset <- read.table(file.path(dataDir, "Traffic.txt"), header=TRUE, sep="\t") dset2 <- read.table(file.path(dataDir, "TrafficStacked.txt"), header=TRUE, sep="\t") levels(dset2$Type) <- c("Roundabout","Traffic Signal")
names(dset) names(dset2)
library(lattice) attach(dset)
xyplot(I(iTotal/Traffic) ~ IntersectionPair,
data=dset, ylim=c(0,0.0005))
xyplot(I(iTotal.1/Traffic.1) ~ IntersectionPair,
data=dset, ylim=c(0,0.0005))
xyplot(I(iTotal/Traffic - iTotal.1/Traffic.1) ~ IntersectionPair,
data=dset, panel=function(x,y) { panel.abline(h=0) panel.xyplot(x,y) })
dotplot(IntersectionPair ~ I(iTotal/Traffic), data=dset) dotplot(IntersectionPair ~ I(iTotal.1/Traffic.1), data=dset)
- Illustrates that intersection #3 is a real problem
png("Graph1.png") dotplot(IntersectionPair ~ I(iTotal/(4 * 365.25 * Traffic) - iTotal.1/(4 * 365.25 * Traffic.1)), data=dset,
panel=function(x,y, ...){panel.abline(v=0); panel.dotplot(x,y)}, xlab=expression(over("Total Roundabout crashes","Total traffic") - over("Total traffic signal crashes", "Total traffic")), main="Intersection 3 needs investigation")
dev.off() detach(dset) attach(dset2)
- Good illustration of pairing
png("Graph2.png") dotplot(IntersectionPair ~ iTotal, groups=Type,
key = simpleKey(levels(Type), columns=2, space = "bottom"), xlab = "Unadjusted Accident Count ", ylab="Intersection pair", aspect=1, layout = c(1,1), main="Dotplot of counts by site")
dev.off()
- Legs is not really the issue, not enough information
dotplot(IntersectionPair ~ iTotal| factor(Legs), groups=Type, key = simpleKey(levels(Type), columns=2, space = "bottom"), xlab = "Unadjusted Accident Count ", ylab="Intersection pair", aspect=1, layout = c(1,3), main="Pairs 10 and 20 idential, pair 3 needs investigation", between=list(y=0.3))
- Example
dotplot(variety ~ yield | site, data = barley, groups = year, key = simpleKey(levels(barley$year), space = "right"), xlab = "Barley Yield (bushels/acre) ", aspect=0.5, layout = c(1,6), ylab=NULL)
- 2) --------------------- Wilcoxon rank signed test -------------------------- #
attach(dset)
- A) pairwise tests
Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1
wilcox.test(Total.RAvsTS, exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant
- Normal approximation
t.test(RAvsTS) # Borderline t.test(Vehicle.RAvsTS) # Not Significant t.test(CyclistPed.RAvsTS) # Not Significant
- B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant
- Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant
- Removing intersection 3 makes tests much more significant
wilcox.test(RAvsTS[-3], exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS[-3], exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS[-3], exact=FALSE) # Not Significant
- Normal approximation
t.test(RAvsTS[-3]) # Borderline t.test(Vehicle.RAvsTS[-3]) # Not Significant t.test(CyclistPed.RAvsTS[-3]) # Not Significant
- B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant
- Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant
detach(dset)
names(dset2)
attach(dset2) ls(pos=2)
accidentsTotal <- cbind(iTotal, Traffic-iTotal) accidentsCyclistPed <- cbind(iCyclistPed, Traffic-iCyclistPed) accidentsVehicle <- cbind(iVehicle, Traffic-iVehicle)
options(contrasts=c("contr.treatment", "contr.poly"))
- GLMS should include plot and diagnostics
- Need to use cloglog link function
- Total
obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial) summary(obj)
- Diagnostics
plot(obj)
plot(c(3,5), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))
- This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])
objSub <- glm(accidentsTotal ~ Type, family=binomial)
summary(objSub)
- Diagnostics
plot(objSub)
plot(c(1,2), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(1,2, length=101) lines(as.numeric(Type), predict(objSub, data.frame(Type=Type), type="response")) points(as.numeric(Type), accidentsTotal[,1]/accidentsTotal[,2])
- This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])
anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate
obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial(link=cloglog)) summary(obj)
- Cyclist vs Pedestrian
obj <- glm(accidentsCyclistPed ~ Type + Type%in%Legs, family=binomial) summary(obj)
objSub <- glm(accidentsCyclistPed ~ Type, family=binomial) summary(objSub)
anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate
- Vehicle
obj <- glm(accidentsVehicle ~ Type + Type%in%Legs, family=binomial) summary(obj)
objSub <- glm(accidentsVehicle ~ Type, family=binomial) summary(objSub)
anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate
- The above analysis suggests that (assuming Legs variable is not of interest)
- there is a Type effect in the total and Vehicle accidents, but not in Pedistrians/cyclists
- This suggests that the influence is from vehicles in the totals
accidentsTotal2 <- cbind(iTotal, (365.25*4*Traffic)-iTotal)
obj3 <- glm(accidentsTotal2 ~ Type + Type%in%Legs, family=binomial) summary(obj3)
plot(c(3,5), c(0,0.0000005), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))
- This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal2[,1]/accidentsTotal2[,2], col=rainbow(2)[as.numeric(Type)])
- Poisson model
obj4 <- glm(iTotal ~ Type + Type%in%Legs, family=poisson) summary(obj4)
obj5 <- glm(iTotal ~ Type , family=poisson) summary(obj5)
anova(obj4, obj5, test="Chisq")
- Reweight the trafficlights
iTotal2 <- iTotal.1 * Traffic/Traffic.1
wilcox.test(iTotal - iTotal2) t.test(iTotal - iTotal2)
iTotal3 <- c(iTotal, iTotal2) iTotal3
attach(dset2)
- Poisson model#2
obj4 <- glm(round(iTotal3) ~ Type + Type%in%Legs, family=poisson) summary(obj4)
obj5 <- glm(round(iTotal3) ~ Type , family=poisson) summary(obj5)
- Re-analyse with intersection #3 removed
dsetB <- read.table("Traffic2.txt", header=TRUE, sep="\t") dset2B <- read.table("TrafficStacked2.txt", header=TRUE, sep="\t")
attach(dsetB)
- A) pairwise tests
Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1
wilcox.test(Total.RAvsTS, exact=FALSE) # Highly Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant
- Normal approximation
t.test(Total.RAvsTS) # Highly Significant t.test(Vehicle.RAvsTS) # Significant t.test(CyclistPed.RAvsTS) # Not Significant
- B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Borderline wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant
- Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant
- Examples of calculations on page 258
- Table 259, n=37, x=12, p=0.05/2
n <- 37 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.05/2) abline(v=12)
n <- 85 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.1/2) abline(v=34)
- illustrates calculation less than 10%
pbinom(6,21, prob=0.5)*2
- Test is examining lower end of tail (as seen in plots above)
- 5-10% Significance in this example is somewhere between 7 and 8
qbinom(0.1,21, prob=0.5) qbinom(0.05, 21, prob=0.5)
for(i in c(0.1, 0.05, 0.01, 0.002)) {
message("Chisq = ", round(qchisq(1-i, 1),2), " (p=",i,")")
}
- Calculations in excel table
x <- c("Total Crashes"=681,Injury=74, "Non-inj"=601, Fatal=0, Serious=6, Minor=68,
Veh=0,Ped=0,Cycle=0,Veh=7,Ped=1,Cycle=1,Veh=45,Ped=13,Cycle=10, "Total Veh injury"=49, "Total Veh Serious&Fatal Injury" = 7)
y <- c(753,123,620,2,17,104, 2, 0,0,11,5,2,80,17,7,92, 13) n <- x+y chisq <- ((abs(x-y) -1)^2)/n
df <- data.frame(rbind("ROUNDABOUT SITES"=round(x,0),
"TRAFFIC SIGNAL SITES"=round(y,0), "signif (p-value)" = (1 - pchisq(chisq, 1)), "%crash saving at RBTs" = round(100*(y-x)/y), "Total (n)" = x+y, chisq = round(chisq,2) ))
- ind <- c(3:4, 7:12,14:15)
ind <- which(df[3,] > 0.065 | df[3,]==0) df[3, ind] <- "ns" df[4, ind] <- "" df[5, c(ind[-1],5)] <- "" df[6, c(ind[-1],5)] <- "" df
cat(" \t", file="binom.txt") write.table(df, "binom.txt", quote=FALSE, sep="\t", append=TRUE)
- So this is actually 6% significance
- Note the multiplication factor of 1.05 is not rounded
x <- x * 1.05 y <- c(753,133,620,2,18,113, 2, 0,0,11,5,2,89,17,7,102) n <- x+y chisq <- ((abs(x-y) -1)^2)/n
df <- data.frame(rbind(x=round(x,0),
y=round(y,0), "signif (p-value)" = round((1 - pchisq(chisq, 1)),3), "%crash saving at RBTs" = (y-x)/y, chisq = round(chisq,2) ))
- Binom check:
x <- 8 y <- 18 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1)
x <- 7 # x always has to be lower... y <- 10 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1)