Traffic.R

From Organic Design wiki
Revision as of 23:48, 2 March 2009 by Sven (talk | contribs) (New page: {{#Security:*|Sven}} <pre> # Notes # A...)
(diff) ← Older revision | Latest revision (diff) | Newer revision → (diff)

{{#Security:*|Sven}}

# Notes
# A) unpaired non-parametric and parametric tests
# Wilcoxon and t-test
# T-tests assume) that n is large

# 1) ---------------------------- Load datasets ------------------------------- #
setwd("/home/sven/Desktop/Consulting/Duncan")
dataDir <- "Data"
dset  <- read.table(file.path(dataDir, "Traffic.txt"), header=TRUE, sep="\t")
dset2 <- read.table(file.path(dataDir, "TrafficStacked.txt"), header=TRUE, sep="\t")
levels(dset2$Type) <- c("Roundabout","Traffic Signal")

names(dset)
names(dset2)

library(lattice)
attach(dset)

xyplot(I(iTotal/Traffic) ~ IntersectionPair,
       data=dset, ylim=c(0,0.0005))
xyplot(I(iTotal.1/Traffic.1) ~ IntersectionPair,
       data=dset, ylim=c(0,0.0005))
xyplot(I(iTotal/Traffic - iTotal.1/Traffic.1) ~ IntersectionPair,
       data=dset,
       panel=function(x,y) {
         panel.abline(h=0)
         panel.xyplot(x,y)
       })

dotplot(IntersectionPair ~ I(iTotal/Traffic), data=dset)
dotplot(IntersectionPair ~ I(iTotal.1/Traffic.1), data=dset)
# Illustrates that intersection #3 is a real problem
png("Graph1.png")
dotplot(IntersectionPair ~ I(iTotal/(4 * 365.25 * Traffic) - iTotal.1/(4 * 365.25 * Traffic.1)), data=dset,
        panel=function(x,y, ...){panel.abline(v=0); panel.dotplot(x,y)}, xlab=expression(over("Total Roundabout crashes","Total traffic") - over("Total traffic signal crashes", "Total traffic")), main="Intersection 3 needs investigation")
dev.off()
detach(dset)
attach(dset2)

# Good illustration of pairing
png("Graph2.png")
dotplot(IntersectionPair ~ iTotal, groups=Type,
            key = simpleKey(levels(Type), columns=2, space = "bottom"),
             xlab = "Unadjusted Accident Count ", ylab="Intersection pair",
             aspect=1, layout = c(1,1), main="Dotplot of counts by site")
dev.off()
# Legs is not really the issue, not enough information
 dotplot(IntersectionPair ~ iTotal| factor(Legs), groups=Type,
            key = simpleKey(levels(Type), columns=2, space = "bottom"),
             xlab = "Unadjusted Accident Count ", ylab="Intersection pair",
             aspect=1, layout = c(1,3), main="Pairs 10 and 20 idential, pair 3 needs investigation", between=list(y=0.3))

# Example
   dotplot(variety ~ yield | site, data = barley, groups = year,
             key = simpleKey(levels(barley$year), space = "right"),
             xlab = "Barley Yield (bushels/acre) ",
             aspect=0.5, layout = c(1,6), ylab=NULL)

# 2) --------------------- Wilcoxon rank signed test -------------------------- #
attach(dset)
# A) pairwise tests
Total.RAvsTS      <- iTotal/Traffic - iTotal.1/Traffic.1
Vehicle.RAvsTS    <- iVehicle/Traffic - iVehicle.1/Traffic.1
CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1

wilcox.test(Total.RAvsTS, exact=FALSE)            # Significant
wilcox.test(Vehicle.RAvsTS, exact=FALSE)    # Not Significant
wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant

# Normal approximation
t.test(RAvsTS)                              # Borderline
t.test(Vehicle.RAvsTS)                      # Not Significant
t.test(CyclistPed.RAvsTS)                   # Not Significant

# B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE)            # Not Significant
wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE)        # Not Significant
wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE)  # Not Significant

# Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1)                              # Not Significant
t.test(iVehicle/Traffic, iVehicle.1/Traffic.1)                          # Not Significant
t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1)                    # Not Significant

# Removing intersection 3 makes tests much more significant
wilcox.test(RAvsTS[-3], exact=FALSE)            # Significant
wilcox.test(Vehicle.RAvsTS[-3], exact=FALSE)    # Not Significant
wilcox.test(CyclistPed.RAvsTS[-3], exact=FALSE) # Not Significant

# Normal approximation
t.test(RAvsTS[-3])                              # Borderline
t.test(Vehicle.RAvsTS[-3])                      # Not Significant
t.test(CyclistPed.RAvsTS[-3])                   # Not Significant

# B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE)            # Not Significant
wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE)        # Not Significant
wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE)  # Not Significant

# Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1)                              # Not Significant
t.test(iVehicle/Traffic, iVehicle.1/Traffic.1)                          # Not Significant
t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1)                    # Not Significant

detach(dset)

names(dset2)

attach(dset2)
ls(pos=2)

accidentsTotal <- cbind(iTotal, Traffic-iTotal)
accidentsCyclistPed <- cbind(iCyclistPed, Traffic-iCyclistPed)
accidentsVehicle <- cbind(iVehicle, Traffic-iVehicle)

options(contrasts=c("contr.treatment", "contr.poly"))

# GLMS should include plot and diagnostics
# Need to use cloglog link function
# Total
obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial)
summary(obj)
# Diagnostics
plot(obj)

plot(c(3,5), c(0,0.001), xlab="Legs", ylab="Prob", type="n")
ld <- seq(3,5, length=101)
lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response"))
lines(ld, predict(obj, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))

# This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])


objSub <- glm(accidentsTotal ~ Type, family=binomial)
summary(objSub)

# Diagnostics
plot(objSub)

plot(c(1,2), c(0,0.001), xlab="Legs", ylab="Prob", type="n")
ld <- seq(1,2, length=101)
lines(as.numeric(Type), predict(objSub, data.frame(Type=Type), type="response"))
points(as.numeric(Type), accidentsTotal[,1]/accidentsTotal[,2])

# This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal[,1]/accidentsTotal[,2], col=rainbow(2)[as.numeric(Type)])


anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

obj <- glm(accidentsTotal ~ Type + Type%in%Legs, family=binomial(link=cloglog))
summary(obj)

# Cyclist vs Pedestrian
obj <- glm(accidentsCyclistPed ~ Type + Type%in%Legs, family=binomial)
summary(obj)

objSub <- glm(accidentsCyclistPed ~ Type, family=binomial)
summary(objSub)

anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

# Vehicle
obj <- glm(accidentsVehicle ~ Type + Type%in%Legs, family=binomial)
summary(obj)

objSub <- glm(accidentsVehicle ~ Type, family=binomial)
summary(objSub)

anova(obj, objSub, test = "Chisq") # suggests sub model is inadequate

# The above analysis suggests that (assuming Legs variable is not of interest)
# there is a Type effect in the total and Vehicle accidents, but not in Pedistrians/cyclists
# This suggests that the influence is from vehicles in the totals

accidentsTotal2 <- cbind(iTotal, (365.25*4*Traffic)-iTotal)

obj3 <- glm(accidentsTotal2 ~ Type + Type%in%Legs, family=binomial)
summary(obj3)

plot(c(3,5), c(0,0.0000005), xlab="Legs", ylab="Prob", type="n")
ld <- seq(3,5, length=101)
lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Round about", length(ld)), levels=levels(Type))), type="response"))
lines(ld, predict(obj3, data.frame(Legs=ld, Type=factor(rep("Traffic Signal", length(ld)), levels=levels(Type))), type="response"))

# This shows that numbers of 5 leg intersections are small but influential
points(Legs, accidentsTotal2[,1]/accidentsTotal2[,2], col=rainbow(2)[as.numeric(Type)])

# Poisson model
obj4 <- glm(iTotal ~ Type + Type%in%Legs, family=poisson)
summary(obj4)

obj5 <- glm(iTotal ~ Type , family=poisson)
summary(obj5)

anova(obj4, obj5, test="Chisq")


# Reweight the trafficlights
iTotal2 <- iTotal.1 * Traffic/Traffic.1

wilcox.test(iTotal - iTotal2)
t.test(iTotal - iTotal2)

iTotal3 <- c(iTotal, iTotal2)
iTotal3

attach(dset2)
# Poisson model#2
obj4 <- glm(round(iTotal3) ~ Type + Type%in%Legs, family=poisson)
summary(obj4)

obj5 <- glm(round(iTotal3) ~ Type , family=poisson)
summary(obj5)


# Re-analyse with intersection #3 removed
dsetB  <- read.table("Traffic2.txt", header=TRUE, sep="\t")
dset2B <- read.table("TrafficStacked2.txt", header=TRUE, sep="\t")

attach(dsetB)
# A) pairwise tests
Total.RAvsTS      <- iTotal/Traffic - iTotal.1/Traffic.1
Vehicle.RAvsTS    <- iVehicle/Traffic - iVehicle.1/Traffic.1
CyclistPed.RAvsTS <- iCyclistPed/Traffic - iCyclistPed.1/Traffic.1

wilcox.test(Total.RAvsTS, exact=FALSE)            # Highly Significant
wilcox.test(Vehicle.RAvsTS, exact=FALSE)    # Significant
wilcox.test(CyclistPed.RAvsTS, exact=FALSE) # Not Significant

# Normal approximation
t.test(Total.RAvsTS)                              #  Highly Significant
t.test(Vehicle.RAvsTS)                      # Significant
t.test(CyclistPed.RAvsTS)                   # Not Significant

# B) unpaired tests
wilcox.test(iTotal/Traffic, iTotal.1/Traffic.1, exact=FALSE)            # Significant
wilcox.test(iVehicle/Traffic, iVehicle.1/Traffic.1, exact=FALSE)        # Borderline
wilcox.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1, exact=FALSE)  # Not Significant

# Normal approximation
t.test(iTotal/Traffic, iTotal.1/Traffic.1)                              # Significant
t.test(iVehicle/Traffic, iVehicle.1/Traffic.1)                          # Significant
t.test(iCyclistPed/Traffic, iCyclistPed.1/Traffic.1)                    # Not Significant

# Examples of calculations on page 258
# Table 259, n=37, x=12, p=0.05/2
n <- 37
x <- 0:n
plot(x, pbinom(x,n, prob=0.5), type="l")
abline(h=0.05/2)
abline(v=12)

n <- 85
x <- 0:n
plot(x, pbinom(x,n, prob=0.5), type="l")
abline(h=0.1/2)
abline(v=34)

# illustrates calculation less than 10%
pbinom(6,21, prob=0.5)*2
# Test is examining lower end of tail (as seen in plots above)
# 5-10% Significance in this example is somewhere between 7 and 8
qbinom(0.1,21, prob=0.5) 
qbinom(0.05, 21, prob=0.5)

for(i in c(0.1, 0.05, 0.01, 0.002)) {
  message("Chisq = ", round(qchisq(1-i, 1),2), " (p=",i,")")
}

# Calculations in excel table
x <- c("Total Crashes"=681,Injury=74, "Non-inj"=601, Fatal=0, Serious=6, Minor=68,
       Veh=0,Ped=0,Cycle=0,Veh=7,Ped=1,Cycle=1,Veh=45,Ped=13,Cycle=10, "Total Veh injury"=49, "Total Veh Serious&Fatal Injury" = 7)
y <- c(753,123,620,2,17,104, 2, 0,0,11,5,2,80,17,7,92, 13)
n <- x+y
chisq <- ((abs(x-y) -1)^2)/n

df <- data.frame(rbind("ROUNDABOUT SITES"=round(x,0),
                       "TRAFFIC SIGNAL SITES"=round(y,0),
                       "signif (p-value)" = (1 - pchisq(chisq, 1)),
                       "%crash saving at RBTs" = round(100*(y-x)/y),
                       "Total (n)" = x+y,
                       chisq = round(chisq,2)
                       ))
#ind <- c(3:4, 7:12,14:15)
ind <- which(df[3,] > 0.065 | df[3,]==0)
df[3, ind] <- "ns"
df[4, ind] <- ""
df[5, c(ind[-1],5)] <- ""
df[6, c(ind[-1],5)] <- ""
df

cat(" \t", file="binom.txt")
write.table(df, "binom.txt", quote=FALSE, sep="\t", append=TRUE)

# So this is actually 6% significance

# Note the multiplication factor of 1.05 is not rounded
x <- x * 1.05
y <- c(753,133,620,2,18,113, 2, 0,0,11,5,2,89,17,7,102)
n <- x+y
chisq <- ((abs(x-y) -1)^2)/n

df <- data.frame(rbind(x=round(x,0),
                       y=round(y,0),
                       "signif (p-value)" = round((1 - pchisq(chisq, 1)),3),
                       "%crash saving at RBTs" = (y-x)/y,
                       chisq = round(chisq,2)
      ))

# Binom check:
x <- 8
y <- 18
pbinom(x,x+y, prob=0.5)*2
1-pchisq(((abs(x-y) -1)^2)/(x+y),1)

x <- 7 # x always has to be lower...
y <- 10
pbinom(x,x+y, prob=0.5)*2
1-pchisq(((abs(x-y) -1)^2)/(x+y),1)