Step1: Input dataset DOHMH Childcare Center Inspections (Updated from november 25,2022) from NYC OpenData
Step2: Due to irregularities in writing, there is a lot of duplication in the child care records. The same child care centers can be different in many separate records because of typo. We were interested in calculating the number of violations for each child care center more accurately, so we standardized the lowercase names of child care centers, removed punctuation and spaces, and eliminated characters that could cause inconsistencies to merge the same legal names.
children_center<-
raw_data%>%
janitor::clean_names()%>%
mutate(
legal_name=tolower(legal_name),
legal_name=gsub('[[:punct:] ]+',' ',legal_name),
legal_name=gsub(" ","",legal_name),
legal_name=gsub("llc","",legal_name),
legal_name=gsub("inc","",legal_name),
legal_name=gsub("th","",legal_name),
legal_name=gsub("school","",legal_name),
legal_name=gsub("i","",legal_name),
legal_name=gsub("center","",legal_name),
legal_name=gsub("ctr","",legal_name)
)%>%
select(legal_name)%>%
group_by(legal_name)%>%
summarise(
n_obs=n()
)
total_obs<-
raw_data%>%
janitor::clean_names()%>%
drop_na(zip_code)%>%
drop_na(violation_category)%>%
select(zip_code,violation_category)%>%
group_by(zip_code)%>%
summarise(
n_obs=n()
)
# choropleth map for violation
leaflet::leaflet(data = zip_map_crs) %>%
addProviderTiles('CartoDB.Positron') %>%
addPolygons(fillColor = ~bite_pal(n_obs),
fillOpacity = 0.8,
color = "#BDBDC3",
weight = 1,
popup = label_popup,
highlightOptions = highlightOptions(color = "black", weight = 2,
bringToFront = TRUE)) %>%
addLegend('bottomleft',
pal = bite_pal,
values = ~n_obs,
title = 'Number of violation incidents by zip code',
opacity = 1)