library(ggplot2)
library(plyr)
library(dplyr)
library(plotly)
library(lubridate)
library(highcharter)
library(reshape2)
library(tidyr)
library(gridExtra)
cal=read.csv("911.csv",stringsAsFactors = FALSE,header=TRUE)
dim(cal)
## [1] 205580 9
cal$timeStamp=as.POSIXct(cal$timeStamp)
cal$date=as.Date(cal$timeStamp)
cal$year=year(cal$timeStamp)
cal$month=month(cal$timeStamp)
cal$day=day(cal$timeStamp)
cal$hour=hour(cal$timeStamp)
cal$month=month(as.Date(cal$timeStamp),label=TRUE,abbr=TRUE)
cal=separate(cal,title,c("type","subtype"),sep=":")
cal=separate(cal,desc,c("e","f","g"),sep=";")
cal$e=NULL
cal$f=NULL
cal$g=NULL
glimpse(cal)
## Observations: 205,580
## Variables: 14
## $ lat <dbl> 40.29788, 40.25806, 40.12118, 40.11615, 40.25149, 40...
## $ lng <dbl> -75.58129, -75.26468, -75.35198, -75.34351, -75.6033...
## $ zip <int> 19525, 19446, 19401, 19401, NA, 19446, 19044, 19426,...
## $ type <chr> "EMS", "EMS", "Fire", "EMS", "EMS", "EMS", "EMS", "E...
## $ subtype <chr> " BACK PAINS/INJURY", " DIABETIC EMERGENCY", " GAS-O...
## $ timeStamp <dttm> 2015-12-10 17:10:52, 2015-12-10 17:29:21, 2015-12-1...
## $ twp <chr> "NEW HANOVER", "HATFIELD TOWNSHIP", "NORRISTOWN", "N...
## $ addr <chr> "REINDEER CT & DEAD END", "BRIAR PATH & WHITEMARSH L...
## $ e <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1...
## $ date <date> 2015-12-10, 2015-12-10, 2015-12-10, 2015-12-10, 201...
## $ year <dbl> 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015, 2015...
## $ month <ord> Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, Dec, De...
## $ day <int> 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, 10, ...
## $ hour <int> 17, 17, 14, 16, 16, 15, 16, 16, 16, 17, 17, 16, 17, ...
temp= cal %>% group_by(hour) %>% summarise(count=n())
highchart() %>% hc_title(text="TimeSeries of 911 Calls spread over a day",style=list(color="red",fontText="Verdana")) %>% hc_xAxis(categories=temp$hour) %>% hc_add_series(name="count of calls",data=temp$count) %>% hc_add_theme(hc_theme_google())
The calls seems to peak starting in the morning at 6AM and peak calls are reveived at 5PM in the evening.This is when most of the offices would have left and people tend to return home,traffic would be heavy,parties and functions would start ideally.There is a little downward trend shown at 1PM which is ideally the lunchtime.At 4AM in the morning,least calls are received.
temp = cal %>% group_by(type) %>% summarise(count=n())
highchart() %>% hc_title(text="Type of Calls made",style=list(color="green",fontStyle="verdana"))%>% hc_xAxis(categories=temp$type,color=temp$type) %>% hc_add_series(type="column",data=temp$count)
EMS calls are made mode to 911 followed by Traffic.Lts analyse what type of EMS calls are made and at which part of the day it is being made.
temp=cal %>% group_by(type,hour) %>% summarise(count=n())
ggplot(temp,aes(type,count,fill=type))+geom_bar(stat="identity")+theme(axis.text.x = element_text(angle=90,vjust=0.4),plot.title=element_text(size=14,color="red",face="bold",hjust=0.5),plot.subtitle = element_text(size=10,color="black",face="italic",hjust=0.5),legend.position="none")+labs(x="Type",y="count",title="Type of calls made through the day",subtitle="Analysis of calls every hour by type")+facet_wrap(~hour,scales="free")
Observations:
cal$day=wday(as.Date(cal$date),label=TRUE,abbr = FALSE)
temp= cal %>% group_by(type,day) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(day,count,fill=day))+geom_bar(stat="identity")+theme(axis.text.x = element_text(angle=90,vjust=0.5),plot.title = element_text(color="blue",size=15,face="bold",hjust=0.5),legend.position="none")+labs(x="Day of the week",y="count",title="Day of the week and type ")+facet_wrap(~type,scales = "free")
Observations:
Friday receives maximum calls under EMS & Traffic while Sunday receives low calls on Traffic while EMS and Fire are more.This is justified since Sunday is an off and people tend to stay at home and could organise a party or celebration or some stuff that there is a possibility of having EMS or fire related mishaps.
When compared to number of calls in EMS & Fire in Traffic type,there is a drastic fall in the number of calls made during weekends-Saturday and Sunday.
temp = cal %>% group_by(month,type) %>% summarise(count=n())
p1=ggplot(temp,aes(month,count,fill=type))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=90,vjust=0.9),plot.title=element_text(color="red",size=15,hjust=0.5))+labs(x="Month",y="Count")+ggtitle("911 Calls made each month")
p2=ggplot(temp,aes(month,count,fill=month))+geom_bar(stat="identity")+facet_wrap(~type)+theme(axis.text.x=element_text(angle=90,vjust=0.9),legend.position="None",plot.title=element_text(color="red",size=15,hjust=0.5))+labs(x="Month",y="Count")+ggtitle("911 Calls made each month-by category")
grid.arrange(p1,p2,nrow=1,ncol=2,top="911 Call analysis")
Jan and Dec has seen maximum number of calls made and whereas from a period of Jun-Nov the calls have remained low when compared to other months.From feb to may there are gradual ups and downs.
temp=cal %>% filter(type=="EMS")%>% group_by(subtype) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(subtype,count,fill=subtype))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=90,vjust=0.9),legend.position="None",plot.title=element_text(color="red",size=15,hjust=0.5))+labs(x="Type",y="Count")+ggtitle("EMS calls made through 911")+coord_flip()
Cardiac Emergency,Vehicle accident,Respiratory emergency and fall victim calls are made maximum through 911.
temp=cal %>% filter(type=="EMS") %>% group_by(subtype,hour) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(hour,count,fill=subtype))+geom_bar(stat="identity")+labs(x="Time in Hour",y="Count",fill="EMS Type")+ggtitle("EMS Call pattern through the day")+theme(legend.position="bottom",plot.title = element_text(color="red",size=30,face='bold',hjust=0.5))+scale_x_continuous(limits=c(0,23),breaks=seq(0,23,1))
Interestingly, The calls seems to follow almost a normal distribution from 6AM till midnight.Peaks calls are made at 10 AM in the morning.Though the general trend in 911 peak calls were received at 5PM,most EMS calls are made in the morning 10AM compared to 5PM in evening.
temp=cal %>% filter(type=="Traffic") %>% group_by(subtype) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(subtype,count,fill=subtype))+geom_bar(stat="identity")+theme(axis.text.x=element_text(angle=90,hjust=0.5),legend.position="None",plot.title=element_text(color="red",size=15,hjust=0.5))+labs(x="Type",y="Count")+ggtitle("Traffic calls made through 911")+coord_flip()
Vehicle accident,disabled vehicles,road obstruction seems to be dominating calls under TRAFFIC category.Lts analyse which part of the day are most of the calls made.according to my guess,since traffic peaks during office start and leaving hours,peak calls should be made around 9AM & 5PM.
temp=cal %>% filter(type=="Traffic") %>% group_by(subtype,hour) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(hour,count,fill=subtype))+geom_bar(stat="identity")+labs(x="Time in Hour",y="Count",fill="Traffic Type")+ggtitle("Traffic Call pattern through the day")+theme(legend.position="bottom",plot.title = element_text(color="red",size=15,face='bold',hjust=0.5))+scale_x_continuous(limits=c(0,23),breaks=seq(0,23,1))
As guessed,the traffic calls peak during evening hours starting from 3PM to 5PM.This graph also follows an normal distribution.
temp=cal %>% filter(type=="Fire") %>% group_by(subtype) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(subtype,count,fill=subtype))+geom_bar(stat="identity")+theme(axis.text.x = element_text(angle=90),plot.title = element_text(hjust=0.5,size=15,color="red",face="bold"),legend.position="None")+labs(x="Subtype",y="Count")+ggtitle("Fire calls made through 911")
Fire alarm dominates the calls made under Fire followed by vehicle accident.This category is represented in all the types.
temp=cal %>% group_by(twp) %>% summarise(count=n()) %>% arrange(-count)
ggplot(temp,aes(factor(twp,levels=twp),count,fill=twp))+geom_bar(stat="identity")+theme(legend.position="none",plot.title = element_text(color="red",size=15,face='bold',hjust=0.5),axis.text.x = element_text(angle=90,vjust=-0.4,hjust=0.5))+labs(x="City",y="Count",title="City registering most calls")
Lower Merion registers most calls.There are few unknown cities which has registered 74 calls.Let us see what type of calls are made in Lower Merion.As we have already seen,EMS calls might dominate here since irrespective of the time and day of the week,EMS related calls are made more.
temp=cal %>% filter(twp=='LOWER MERION') %>% group_by(type) %>% summarise(count=n())
ggplot(temp,aes(type,count,fill=type))+geom_bar(stat="identity")+labs(x="Type",y="Count",title="Type of calls made at LOWER MERION")+theme(legend.position="none",plot.title = element_text(color="red",size=15,face='bold',hjust=0.5),axis.text.x = element_text(angle=90,vjust=-0.4))