# B17017_001 Total households - poverty status
# B17017_002 Total households with income below poverty level
# B01003_001 Total population
<- c(Pop="B01001_001", # Total population
acs_vars Households="B17017_001", # Households
Poverty="B17017_002") # Households in poverty
<- NULL
Census_data
for (Year in 2011:2023) {
print(paste("Year:", Year))
<- get_acs(geography="zcta",
foo survey="acs5",
variables=acs_vars,
year=Year,
output="wide",
cache_table = TRUE,
geometry=FALSE)
<- foo %>%
foo mutate(year=Year)
<- rbind(Census_data, foo)
Census_data }
[1] "Year: 2011"
[1] "Year: 2012"
[1] "Year: 2013"
[1] "Year: 2014"
[1] "Year: 2015"
[1] "Year: 2016"
[1] "Year: 2017"
[1] "Year: 2018"
[1] "Year: 2019"
[1] "Year: 2020"
[1] "Year: 2021"
[1] "Year: 2022"
[1] "Year: 2023"
# Do a little cleanup
# GEOID sometimes has state code, sometimes not, so delete it and fix ZCTA
<- Census_data %>%
Census_data select(-GEOID, ZCTA=NAME,
Year=year,
Pop=PopE,
Household=HouseholdsE,
Poverty=PovertyE) %>%
mutate(ZCTA=stringr::str_remove(ZCTA, "ZCTA5 "))
# New Mexico has several zcta's that report NA households and poverty in 2018.
# These NA's will be a continuing issue unless I handle them now.
# Let's find the problematic ZCTA's, and get an avg % diff with Pop or PopM
<- Census_data %>%
Bad_zctas filter(is.na(Household))
<- Census_data %>%
New_Mexico filter(ZCTA %in% Bad_zctas$ZCTA)
<- New_Mexico %>%
foobar group_by(ZCTA) %>%
summarise(House_pct=mean(Household/Pop, na.rm=TRUE),
HouseM_pct=mean(HouseholdsM/PopM, na.rm=TRUE),
Poverty_pct=mean(Poverty/Pop, na.rm=TRUE),
PovertyM_pct=mean(PovertyM/PopM, na.rm=TRUE))
<- Bad_zctas %>%
Bad_zctas left_join(., foobar, by="ZCTA") %>%
mutate(Household=House_pct*Pop,
HouseholdsM=HouseM_pct*PopM,
Poverty=Poverty_pct*Pop,
PovertyM=PovertyM_pct*PopM ) %>%
select(-House_pct, -HouseM_pct, -Poverty_pct, -PovertyM_pct)
<- rows_patch(Census_data, Bad_zctas, by=c("ZCTA", "Year"))
Census_data
# Save the data
saveRDS(Census_data, paste0(path, "Pop_House_Poverty_by_ZCTA_year.rds"))