library(tidyverse)
library(gt)
library(leaflet)
<- "EPSG:4326"
googlecrs
<- "/home/ajackson/Dropbox/Rprojects/ERD/Data/FEMA_HousingAssist/"
path <- "/home/ajackson/Dropbox/Rprojects/Curated_Data_Files/Zip_Yr_Pop/"
datapath
# Read CSV, all as character.
# Commented out lines because I don't want to do that more than once
# df <- read_csv(paste0(path, "IndividualsAndHouseholdsProgramValidRegistrations.csv"),
# col_types=stringr::str_dup("c", 71))
#
# nanoparquet::write_parquet(df, paste0(path, "IndividualsAndHouseholdsProgramValidRegistrations.parquet"))
#
# df <- df %>%
# mutate(declarationDate=date(ymd_hms(declarationDate)))
# Let's read in the disaster declaration summaries as well
<- read_csv(paste0(path, "DisasterDeclarationsSummaries.csv"),
summaries col_types=stringr::str_dup("c", 28))
FEMA flood assistance data
FEMA
Supporting Activism
Analyze the flood assistance data from FEMA
FEMA dataset
Looking at the OpenFEMA Dataset: Individuals and Households Program - Valid Registrations - v1
This is about 25 million records, csv format, 9.4 Gb. I’m only interested in data pertaining to flooding, so the first step will be to pull records that are related to flooding. Data is from 2002 up to when I downloaded the data, March 2025.
There is also a much smaller dataset we will load that has disaster summaries that will let us attach names to the various disasters.
Determine how to pull out flood records
# df_test <- df %>% slice_sample(n=50000) # Grab 50,000 records
# saveRDS(df_test, paste0(path, "df_test.rds"))
<- readRDS(paste0(path, "df_test.rds"))
df_test
%>%
df_test mutate(waterLevel=as.numeric(waterLevel)) %>%
ggplot(aes(x=waterLevel)) +
geom_histogram() +
labs(
title="Water Level distribution"
)
%>%
df_test ggplot(aes(x=highWaterLocation)) +
geom_bar() +
coord_flip() +
labs(
title="High Water Location"
)
%>%
df_test ggplot(aes(x=floodDamage)) +
geom_bar() +
coord_flip() +
labs(
title="Flood Damage Flag"
)
%>%
df_test mutate(floodDamageAmount=as.numeric(floodDamageAmount)) %>%
ggplot(aes(x=floodDamageAmount)) +
geom_histogram() +
labs(
title="Flood Damage Amount"
)
# make a table
<- tribble(~Variable, ~Flood)
Damages
<- df_test %>%
Damages_value mutate(waterLevel=as.numeric(waterLevel)) %>%
filter(waterLevel>0) %>%
count()
1,]$Variable <- "Water Level"
Damages[1,]$Flood <- Damages_value$n[1]
Damages[
<- df_test %>%
Damages_value filter(!is.na(highWaterLocation)) %>%
count()
2,]$Variable <- "High Water Location"
Damages[2,]$Flood <- Damages_value$n[1]
Damages[
<- df_test %>%
Damages_value filter(floodDamage=="1") %>%
count()
3,]$Variable <- "Flood Damage Flag"
Damages[3,]$Flood <- Damages_value$n[1]
Damages[
<- df_test %>%
Damages_value mutate(floodDamageAmount=as.numeric(floodDamageAmount)) %>%
filter(floodDamageAmount>0) %>%
count()
4,]$Variable <- "Flood Damage Value"
Damages[4,]$Flood <- Damages_value$n[1]
Damages[
%>%
Damages gt()
Variable | Flood |
---|---|
Water Level | 6718 |
High Water Location | 6868 |
Flood Damage Flag | 5633 |
Flood Damage Value | 5468 |
# Look at combinations
<- df_test %>%
foo filter(!is.na(highWaterLocation))
%>%
df_test mutate(floodDamageAmount=as.numeric(floodDamageAmount)) %>%
filter(floodDamageAmount>0) %>%
filter(floodDamage=="0")
# A tibble: 0 × 71
# ℹ 71 variables: incidentType <chr>, declarationDate <date>,
# disasterNumber <chr>, county <chr>, damagedStateAbbreviation <chr>,
# damagedCity <chr>, damagedZipCode <chr>, applicantAge <chr>,
# householdComposition <chr>, occupantsUnderTwo <chr>, occupants2to5 <chr>,
# occupants6to18 <chr>, occupants19to64 <chr>, occupants65andOver <chr>,
# grossIncome <chr>, ownRent <chr>, primaryResidence <chr>,
# residenceType <chr>, homeOwnersInsurance <chr>, floodInsurance <chr>, …
# Nothing from above. But there can be flagged flood damage with a $0 amount.
Lets look at some simple statistics
Some abbreviations:
Abbrev | Full Name |
---|---|
IHP | Individual Housing Program |
HA | Housing Assistance |
ONA | Other Needs Assistance |
TSA | Transitional Sheltering Assistance |
# top 10 states for number of applications
%>%
df2 group_by(damagedStateAbbreviation) %>%
summarize(Applications=n(),
Accepted=sum(ihpEligible|haEligible|onaEligible|tsaEligible)) %>%
ungroup() %>%
mutate(Rejected=Applications-Accepted) %>%
arrange(desc(Applications)) %>%
rename(State=damagedStateAbbreviation) %>%
head(10) %>%
mutate(State = fct_reorder(State, Applications)) %>%
select(-Applications) %>%
pivot_longer(!State, names_to = "Status", values_to = "Applications") %>%
ggplot(aes(y=Applications, x=State, fill=Status)) +
geom_col() +
scale_y_continuous(labels = scales::label_comma())+
scale_fill_manual(values =c('green','red')) +
coord_flip() +
labs(title="Number of Flood Disaster Assistance Applications by State",
subtitle="From 2002 until March 2025, Top Ten")
# top 10 zipcodes for number of applications
%>%
df2 group_by(damagedZipCode) %>%
summarize(Applications=n(),
Accepted=sum(ihpEligible|haEligible|onaEligible|tsaEligible),
State=first(damagedStateAbbreviation)) %>%
ungroup() %>%
mutate(Rejected=Applications-Accepted) %>%
arrange(desc(Applications)) %>%
rename(Zip=damagedZipCode) %>%
head(10) %>%
mutate(Zip=paste(Zip, '-', State)) %>%
mutate(Zip = fct_reorder(Zip, Applications)) %>%
select(-Applications, -State) %>%
pivot_longer(!Zip, names_to = "Status", values_to = "Applications") %>%
ggplot(aes(y=Applications, x=Zip, fill=Status)) +
geom_col() +
scale_y_continuous(labels = scales::label_comma())+
scale_fill_manual(values =c('green','red')) +
coord_flip() +
labs(title="Number of Flood Disaster Assistance Applications by Zip",
subtitle="From 2002 until March 2025, Top Ten")
# top 10 zipcodes for number of disasters
%>%
df2 group_by(damagedZipCode) %>%
summarize(Applications=n(),
Floods=as.integer(n_distinct(declarationDate)),
State=first(damagedStateAbbreviation)
%>%
) ungroup() %>%
arrange(desc(Floods), desc(Applications)) %>%
rename(Zip=damagedZipCode) %>%
head(15) %>%
mutate(Zip=paste(Zip, '-', State)) %>%
mutate(Zip = fct_reorder(Zip, Applications)) %>%
select(-State) %>%
mutate(Floods=fct(as.character(Floods))) %>%
ggplot(aes(y=Applications, x=Zip, fill=Floods)) +
geom_col() +
scale_y_continuous(labels = scales::label_comma())+
scale_fill_manual(values =c('#F0E442','#0072B2', "green"), name="# Floods") +
coord_flip() +
labs(title="Number of Flood Disaster Assistance Applications by Zip",
subtitle="From 2002 until March 2025, top fifteen by number of floods")
# Distribution of # floods by zipcode
%>%
df2 group_by(damagedZipCode) %>%
summarize(Applications=n(),
Floods=n_distinct(declarationDate),
State=first(damagedStateAbbreviation)
%>%
) ungroup() %>%
rename(Zip=damagedZipCode) %>%
ggplot(aes(x=Floods)) +
geom_histogram(bins=13) +
labs(title="Number of Flood Events by Zip Code",
subtitle="From 2002 until March 2025",
x="Number of Flood Events",
y="Number of Zip Codes")
Let’s make a map of number of flood events, for # > 4
<- df2 %>%
foo group_by(damagedZipCode) %>%
summarize(Applications=n(),
Floods=n_distinct(declarationDate),
State=first(damagedStateAbbreviation)
%>%
) ungroup() %>%
filter(Floods>4) %>%
filter(State!="PR") %>% # Drop Puerto Rico
rename(Zip=damagedZipCode)
# Get zipcode polygons
<- readRDS(paste0(datapath, "ZCTA_geometry.rds"))
ZCTA_geom
<- left_join(foo, ZCTA_geom, by=join_by(Zip==GEOID)) %>%
foo2 select(-NAME, -variable, -estimate, -moe) %>%
::st_as_sf() %>%
sf::st_transform(crs=googlecrs)
sf
# make map
<- colorNumeric("YlOrBr",
pal c(min(foo2$Floods, na.rm=TRUE),
max(foo2$Floods, na.rm=TRUE)),
na.color = "transparent")
<- leaflet(data=foo2) %>% addTiles() %>%
map addPolygons(data=foo2,
weight=2,
color="black",
fillColor = pal(foo2$Floods),
popup = paste(
"<div class='leaflet-popup-scrolled' style='max-width:150px;max-height:200px'>",
"Zip code:", foo2$Zip, "<br>",
"Applications:", foo2$Applications, "<br>",
"# of floods:", foo2$Floods,
"</div>"
))
map