Difference between revisions of "Traffic.R"

Latest revision as of 23:53, 2 March 2009

Notes

Code snipits and programs written in R, S or S-PLUS{{#Security:*|Sven}}

A) unpaired non-parametric and parametric tests
Wilcoxon and t-test
T-tests assume) that n is large

1) ---------------------------- Load datasets ------------------------------- #

setwd("/home/sven/Desktop/Consulting/Duncan") dataDir <- "Data" dset <- read.table(file.path(dataDir, "Traffic.txt"), header=TRUE, sep="\t") dset2 <- read.table(file.path(dataDir, "TrafficStacked.txt"), header=TRUE, sep="\t") levels(dset2$Type) <- c("Roundabout","Traffic Signal")

names(dset) names(dset2)

library(lattice) attach(dset)

xyplot(I(iTotal/Traffic) ~ IntersectionPair,

      data=dset, ylim=c(0,0.0005))

xyplot(I(iTotal.1/Traffic.1) ~ IntersectionPair,

      data=dset, ylim=c(0,0.0005))

xyplot(I(iTotal/Traffic - iTotal.1/Traffic.1) ~ IntersectionPair,

      data=dset,
      panel=function(x,y) {
        panel.abline(h=0)
        panel.xyplot(x,y)
      })

dotplot(IntersectionPair ~ I(iTotal/Traffic), data=dset) dotplot(IntersectionPair ~ I(iTotal.1/Traffic.1), data=dset)

Illustrates that intersection #3 is a real problem

png("Graph1.png") dotplot(IntersectionPair ~ I(iTotal/(4 * 365.25 * Traffic) - iTotal.1/(4 * 365.25 * Traffic.1)), data=dset,

       panel=function(x,y, ...){panel.abline(v=0); panel.dotplot(x,y)}, xlab=expression(over("Total Roundabout crashes","Total traffic") - over("Total traffic signal crashes", "Total traffic")), main="Intersection 3 needs investigation")

dev.off() detach(dset) attach(dset2)

Good illustration of pairing

png("Graph2.png") dotplot(IntersectionPair ~ iTotal, groups=Type,

           key = simpleKey(levels(Type), columns=2, space = "bottom"),
            xlab = "Unadjusted Accident Count ", ylab="Intersection pair",
            aspect=1, layout = c(1,1), main="Dotplot of counts by site")

dev.off()

Legs is not really the issue, not enough information

dotplot(IntersectionPair ~ iTotal| factor(Legs), groups=Type,
           key = simpleKey(levels(Type), columns=2, space = "bottom"),
            xlab = "Unadjusted Accident Count ", ylab="Intersection pair",
            aspect=1, layout = c(1,3), main="Pairs 10 and 20 idential, pair 3 needs investigation", between=list(y=0.3))

Example

  dotplot(variety ~ yield | site, data = barley, groups = year,
            key = simpleKey(levels(barley$year), space = "right"),
            xlab = "Barley Yield (bushels/acre) ",
            aspect=0.5, layout = c(1,6), ylab=NULL)

2) --------------------- Wilcoxon rank signed test -------------------------- #

attach(dset)

A) pairwise tests

Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1

wilcox.test(Total.RAvsTS, exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant

Normal approximation

t.test(RAvsTS) # Borderline t.test(Vehicle.RAvsTS) # Not Significant t.test(CyclistPed.RAvsTS) # Not Significant

B) unpaired tests

wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant

Normal approximation

t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant

Removing intersection 3 makes tests much more significant

wilcox.test(RAvsTS[-3], exact=FALSE) # Significant wilcox.test(Vehicle.RAvsTS[-3], exact=FALSE) # Not Significant wilcox.test(CyclistPed.RAvsTS[-3], exact=FALSE) # Not Significant

Normal approximation

t.test(RAvsTS[-3]) # Borderline t.test(Vehicle.RAvsTS[-3]) # Not Significant t.test(CyclistPed.RAvsTS[-3]) # Not Significant

B) unpaired tests

wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Not Significant wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant

Normal approximation

t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Not Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Not Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant

detach(dset)

names(dset2)

attach(dset2) ls(pos=2)

accidentsTotal <- cbind(iTotal, Traffic-iTotal) accidentsCyclistPed <- cbind(iCyclistPed, Traffic-iCyclistPed) accidentsVehicle <- cbind(iVehicle, Traffic-iVehicle)

options(contrasts=c("contr.treatment", "contr.poly"))

GLMS should include plot and diagnostics
Need to use cloglog link function
Total

obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial) summary(obj)

Diagnostics

plot(obj)

plot(c(3,5), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))

This shows that numbers of 5 leg intersections are small but influential

points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])

objSub <- glm(accidentsTotal ~ Type, family=binomial) summary(objSub)

Diagnostics

plot(objSub)

plot(c(1,2), c(0,0.001), xlab="Legs", ylab="Prob", type="n") ld <- seq(1,2, length=101) lines(as.numeric(Type), predict(objSub, data.frame(Type=Type), type="response")) points(as.numeric(Type), accidentsTotal[,1]/accidentsTotal[,2])

This shows that numbers of 5 leg intersections are small but influential

points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])

anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial(link=cloglog)) summary(obj)

Cyclist vs Pedestrian

obj <- glm(accidentsCyclistPed ~ Type + Type%in%Legs, family=binomial) summary(obj)

objSub <- glm(accidentsCyclistPed ~ Type, family=binomial) summary(objSub)

anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

Vehicle

obj <- glm(accidentsVehicle ~ Type + Type%in%Legs, family=binomial) summary(obj)

objSub <- glm(accidentsVehicle ~ Type, family=binomial) summary(objSub)

anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

The above analysis suggests that (assuming Legs variable is not of interest)
there is a Type effect in the total and Vehicle accidents, but not in Pedistrians/cyclists
This suggests that the influence is from vehicles in the totals

accidentsTotal2 <- cbind(iTotal, (365.25*4*Traffic)-iTotal)

obj3 <- glm(accidentsTotal2 ~ Type + Type%in%Legs, family=binomial) summary(obj3)

plot(c(3,5), c(0,0.0000005), xlab="Legs", ylab="Prob", type="n") ld <- seq(3,5, length=101) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response")) lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))

This shows that numbers of 5 leg intersections are small but influential

points(Legs, accidentsTotal2[,1]/accidentsTotal2[,2], col=rainbow(2)[as.numeric(Type)])

Poisson model

obj4 <- glm(iTotal ~ Type + Type%in%Legs, family=poisson) summary(obj4)

obj5 <- glm(iTotal ~ Type , family=poisson) summary(obj5)

anova(obj4, obj5, test="Chisq")

Reweight the trafficlights

iTotal2 <- iTotal.1 * Traffic/Traffic.1

wilcox.test(iTotal - iTotal2) t.test(iTotal - iTotal2)

iTotal3 <- c(iTotal, iTotal2) iTotal3

attach(dset2)

Poisson model#2

obj4 <- glm(round(iTotal3) ~ Type + Type%in%Legs, family=poisson) summary(obj4)

obj5 <- glm(round(iTotal3) ~ Type , family=poisson) summary(obj5)

Re-analyse with intersection #3 removed

dsetB <- read.table("Traffic2.txt", header=TRUE, sep="\t") dset2B <- read.table("TrafficStacked2.txt", header=TRUE, sep="\t")

attach(dsetB)

A) pairwise tests

Total.RAvsTS <- iTotal/Traffic - iTotal.1/Traffic.1 Vehicle.RAvsTS <- iVehicle/Traffic - iVehicle.1/Traffic.1 CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1

wilcox.test(Total.RAvsTS, exact=FALSE) # Highly Significant wilcox.test(Vehicle.RAvsTS, exact=FALSE) # Significant wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant

Normal approximation

t.test(Total.RAvsTS) # Highly Significant t.test(Vehicle.RAvsTS) # Significant t.test(CyclistPed.RAvsTS) # Not Significant

B) unpaired tests

wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE) # Significant wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE) # Borderline wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE) # Not Significant

Normal approximation

t.test(iTotal/Traffic, iTotal.1/Traffic.1) # Significant t.test(iVehicle/Traffic, iVehicle.1/Traffic.1) # Significant t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1) # Not Significant

Examples of calculations on page 258
Table 259, n=37, x=12, p=0.05/2

n <- 37 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.05/2) abline(v=12)

n <- 85 x <- 0:n plot(x, pbinom(x,n, prob=0.5), type="l") abline(h=0.1/2) abline(v=34)

illustrates calculation less than 10%

pbinom(6,21, prob=0.5)*2

Test is examining lower end of tail (as seen in plots above)
5-10% Significance in this example is somewhere between 7 and 8

qbinom(0.1,21, prob=0.5) qbinom(0.05, 21, prob=0.5)

for(i in c(0.1, 0.05, 0.01, 0.002)) {

 message("Chisq = ", round(qchisq(1-i, 1),2), " (p=",i,")")

}

Calculations in excel table

x <- c("Total Crashes"=681,Injury=74, "Non-inj"=601, Fatal=0, Serious=6, Minor=68,

      Veh=0,Ped=0,Cycle=0,Veh=7,Ped=1,Cycle=1,Veh=45,Ped=13,Cycle=10, "Total Veh injury"=49, "Total Veh Serious&Fatal Injury" = 7)

y <- c(753,123,620,2,17,104, 2, 0,0,11,5,2,80,17,7,92, 13) n <- x+y chisq <- ((abs(x-y) -1)^2)/n

df <- data.frame(rbind("ROUNDABOUT SITES"=round(x,0),

                      "TRAFFIC SIGNAL SITES"=round(y,0),
                      "signif (p-value)" = (1 - pchisq(chisq, 1)),
                      "%crash saving at RBTs" = round(100*(y-x)/y),
                      "Total (n)" = x+y,
                      chisq = round(chisq,2)
                      ))

ind <- c(3:4, 7:12,14:15)

ind <- which(df[3,] > 0.065 | df[3,]==0) df[3, ind] <- "ns" df[4, ind] <- "" df[5, c(ind[-1],5)] <- "" df[6, c(ind[-1],5)] <- "" df

cat(" \t", file="binom.txt") write.table(df, "binom.txt", quote=FALSE, sep="\t", append=TRUE)

So this is actually 6% significance

Note the multiplication factor of 1.05 is not rounded

x <- x * 1.05 y <- c(753,133,620,2,18,113, 2, 0,0,11,5,2,89,17,7,102) n <- x+y chisq <- ((abs(x-y) -1)^2)/n

df <- data.frame(rbind(x=round(x,0),

                      y=round(y,0),
                      "signif (p-value)" = round((1 - pchisq(chisq, 1)),3),
                      "%crash saving at RBTs" = (y-x)/y,
                      chisq = round(chisq,2)
     ))

Binom check:

x <- 8 y <- 18 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1)

x <- 7 # x always has to be lower... y <- 10 pbinom(x,x+y, prob=0.5)*2 1-pchisq(((abs(x-y) -1)^2)/(x+y),1)

@@ Line 1: / Line 1: @@
-{{#Security:*|Sven}}
+# Notes{{R}}{{#Security:*|Sven}}
-<pre>
-# Notes
 # A) unpaired non-parametric and parametric tests
 # Wilcoxon and t-test
@@ Line 322: / Line 320: @@
 pbinom(x,x+y, prob=0.5)*2
 -pchisq(((abs(x-y) -1)^2)/(x+y),1)
-</pre>
-[[Category:R]]

Difference between revisions of "Traffic.R"

Latest revision as of 23:53, 2 March 2009

Navigation menu

Views

Personal tools

Navigation

Search

Navigation

Blogs

Recent activity

Site map

Tools