Traffic.R
From Organic Design wiki
{{#Security:*|Sven}}
# Notes # A) unpaired non-parametric and parametric tests # Wilcoxon and t-test # T-tests assume) that n is large # 1) ---------------------------- Load datasets ------------------------------- # setwd("/home/sven/Desktop/Consulting/Duncan") dataDir <- "Data" dset <- read.table(file.path(dataDir, "Traffic.txt"), header=TRUE, sep="\t") dset2 <- read.table(file.path(dataDir, "TrafficStacked.txt"), header=TRUE, sep="\t") levels(dset2$Type) <- c("Roundabout","Traffic Signal") names(dset) names(dset2) library(lattice) attach(dset) xyplot(I(iTotal/Traffic) ~ IntersectionPair, data=dset, ylim=c(0,0.0005)) xyplot(I(iTotal.1/Traffic.1) ~ IntersectionPair, data=dset, ylim=c(0,0.0005)) xyplot(I(iTotal/Traffic - iTotal.1/Traffic.1) ~ IntersectionPair, data=dset, panel=function(x,y) { panel.abline(h=0) panel.xyplot(x,y) }) dotplot(IntersectionPair ~ I(iTotal/Traffic), data=dset) dotplot(IntersectionPair ~ I(iTotal.1/Traffic.1), data=dset) # Illustrates that intersection #3 is a real problem png("Graph1.png") dotplot(IntersectionPair ~ I(iTotal/(4 * 365.25 * Traffic) - iTotal.1/(4 * 365.25 * Traffic.1)), data=dset, panel=function(x,y, ...){panel.abline(v=0); panel.dotplot(x,y)}, xlab=expression(over("Total Roundabout crashes","Total traffic") - over("Total traffic signal crashes", "Total traffic")), main="Intersection 3 needs investigation") dev.off() detach(dset) attach(dset2) # Good illustration of pairing png("Graph2.png") dotplot(IntersectionPair ~ iTotal, groups=Type, key = simpleKey(levels(Type), columns=2, space = "bottom"), xlab = "Unadjusted Accident Count ", ylab="Intersection pair", aspect=1, layout = c(1,1), main="Dotplot of counts by site") dev.off() # Legs is not really the issue, not enough information dotplot(IntersectionPair ~ iTotal| factor(Legs), groups=Type, key = simpleKey(levels(Type), columns=2, space = "bottom"), xlab = "Unadjusted Accident Count ", ylab="Intersection pair", aspect=1, layout = c(1,3), main="Pairs 10 and 20 idential, pair 3 needs investigation", between=list(y=0.3)) # Example dotplot(variety ~ yield | site, data = barley, groups = year, key = simpleKey(levels(barley$year), space = "right"), xlab = "Barley Yield (bushels/acre) ", aspect=0.5, layout = c(1,6), ylab=NULL) # 2) --------------------- Wilcoxon rank signed test -------------------------- # attach(dset) # A) pairwise tests Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1 wilcox.test(Total.RAvsTS, exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant # Normal approximation t.test(RAvsTS) # Borderline t.test(Vehicle.RAvsTS) # Not Significant t.test(CyclistPed.RAvsTS) # Not Significant # B) unpaired tests wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant # Normal approximation t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant # Removing intersection 3 makes tests much more significant wilcox.test(RAvsTS[-3], exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS[-3], exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS[-3], exact=FALSE) # Not Significant # Normal approximation t.test(RAvsTS[-3]) # Borderline t.test(Vehicle.RAvsTS[-3]) # Not Significant t.test(CyclistPed.RAvsTS[-3]) # Not Significant # B) unpaired tests wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant # Normal approximation t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant detach(dset) names(dset2) attach(dset2) ls(pos=2) accidentsTotal <- cbind(iTotal, Traffic-iTotal) accidentsCyclistPed <- cbind(iCyclistPed, Traffic-iCyclistPed) accidentsVehicle <- cbind(iVehicle, Traffic-iVehicle) options(contrasts=c("contr.treatment", "contr.poly")) # GLMS should include plot and diagnostics # Need to use cloglog link function # Total obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial) summary(obj) # Diagnostics plot(obj) plot(c(3,5), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response")) # This shows that numbers of 5 leg intersections are small but influential points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)]) objSub <- glm(accidentsTotal ~ Type, family=binomial) summary(objSub) # Diagnostics plot(objSub) plot(c(1,2), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(1,2, length=101) lines(as.numeric(Type), predict(objSub, data.frame(Type=Type), type="response")) points(as.numeric(Type), accidentsTotal[,1]/accidentsTotal[,2]) # This shows that numbers of 5 leg intersections are small but influential points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)]) anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial(link=cloglog)) summary(obj) # Cyclist vs Pedestrian obj <- glm(accidentsCyclistPed ~ Type + Type%in%Legs, family=binomial) summary(obj) objSub <- glm(accidentsCyclistPed ~ Type, family=binomial) summary(objSub) anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate # Vehicle obj <- glm(accidentsVehicle ~ Type + Type%in%Legs, family=binomial) summary(obj) objSub <- glm(accidentsVehicle ~ Type, family=binomial) summary(objSub) anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate # The above analysis suggests that (assuming Legs variable is not of interest) # there is a Type effect in the total and Vehicle accidents, but not in Pedistrians/cyclists # This suggests that the influence is from vehicles in the totals accidentsTotal2 <- cbind(iTotal, (365.25*4*Traffic)-iTotal) obj3 <- glm(accidentsTotal2 ~ Type + Type%in%Legs, family=binomial) summary(obj3) plot(c(3,5), c(0,0.0000005), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response")) # This shows that numbers of 5 leg intersections are small but influential points(Legs, accidentsTotal2[,1]/accidentsTotal2[,2], col=rainbow(2)[as.numeric(Type)]) # Poisson model obj4 <- glm(iTotal ~ Type + Type%in%Legs, family=poisson) summary(obj4) obj5 <- glm(iTotal ~ Type , family=poisson) summary(obj5) anova(obj4, obj5, test="Chisq") # Reweight the trafficlights iTotal2 <- iTotal.1 * Traffic/Traffic.1 wilcox.test(iTotal - iTotal2) t.test(iTotal - iTotal2) iTotal3 <- c(iTotal, iTotal2) iTotal3 attach(dset2) # Poisson model#2 obj4 <- glm(round(iTotal3) ~ Type + Type%in%Legs, family=poisson) summary(obj4) obj5 <- glm(round(iTotal3) ~ Type , family=poisson) summary(obj5) # Re-analyse with intersection #3 removed dsetB <- read.table("Traffic2.txt", header=TRUE, sep="\t") dset2B <- read.table("TrafficStacked2.txt", header=TRUE, sep="\t") attach(dsetB) # A) pairwise tests Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1 wilcox.test(Total.RAvsTS, exact=FALSE) # Highly Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant # Normal approximation t.test(Total.RAvsTS) # Highly Significant t.test(Vehicle.RAvsTS) # Significant t.test(CyclistPed.RAvsTS) # Not Significant # B) unpaired tests wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Borderline wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant # Normal approximation t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant # Examples of calculations on page 258 # Table 259, n=37, x=12, p=0.05/2 n <- 37 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.05/2) abline(v=12) n <- 85 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.1/2) abline(v=34) # illustrates calculation less than 10% pbinom(6,21, prob=0.5)*2 # Test is examining lower end of tail (as seen in plots above) # 5-10% Significance in this example is somewhere between 7 and 8 qbinom(0.1,21, prob=0.5) qbinom(0.05, 21, prob=0.5) for(i in c(0.1, 0.05, 0.01, 0.002)) { message("Chisq = ", round(qchisq(1-i, 1),2), " (p=",i,")") } # Calculations in excel table x <- c("Total Crashes"=681,Injury=74, "Non-inj"=601, Fatal=0, Serious=6, Minor=68, Veh=0,Ped=0,Cycle=0,Veh=7,Ped=1,Cycle=1,Veh=45,Ped=13,Cycle=10, "Total Veh injury"=49, "Total Veh Serious&Fatal Injury" = 7) y <- c(753,123,620,2,17,104, 2, 0,0,11,5,2,80,17,7,92, 13) n <- x+y chisq <- ((abs(x-y) -1)^2)/n df <- data.frame(rbind("ROUNDABOUT SITES"=round(x,0), "TRAFFIC SIGNAL SITES"=round(y,0), "signif (p-value)" = (1 - pchisq(chisq, 1)), "%crash saving at RBTs" = round(100*(y-x)/y), "Total (n)" = x+y, chisq = round(chisq,2) )) #ind <- c(3:4, 7:12,14:15) ind <- which(df[3,] > 0.065 | df[3,]==0) df[3, ind] <- "ns" df[4, ind] <- "" df[5, c(ind[-1],5)] <- "" df[6, c(ind[-1],5)] <- "" df cat(" \t", file="binom.txt") write.table(df, "binom.txt", quote=FALSE, sep="\t", append=TRUE) # So this is actually 6% significance # Note the multiplication factor of 1.05 is not rounded x <- x * 1.05 y <- c(753,133,620,2,18,113, 2, 0,0,11,5,2,89,17,7,102) n <- x+y chisq <- ((abs(x-y) -1)^2)/n df <- data.frame(rbind(x=round(x,0), y=round(y,0), "signif (p-value)" = round((1 - pchisq(chisq, 1)),3), "%crash saving at RBTs" = (y-x)/y, chisq = round(chisq,2) )) # Binom check: x <- 8 y <- 18 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1) x <- 7 # x always has to be lower... y <- 10 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1)