# B17017_001 Total households - poverty status
# B17017_002 Total households with income below poverty level
# B01003_001 Total population
acs_vars <- c(Pop="B01001_001", # Total population
Households="B17017_001", # Households
Poverty="B17017_002") # Households in poverty
Census_data <- NULL
for (Year in 2011:2023) {
print(paste("Year:", Year))
foo <- get_acs(geography="zcta",
survey="acs5",
variables=acs_vars,
year=Year,
output="wide",
cache_table = TRUE,
geometry=FALSE)
foo <- foo %>%
mutate(year=Year)
Census_data <- rbind(Census_data, foo)
}[1] "Year: 2011"
[1] "Year: 2012"
[1] "Year: 2013"
[1] "Year: 2014"
[1] "Year: 2015"
[1] "Year: 2016"
[1] "Year: 2017"
[1] "Year: 2018"
[1] "Year: 2019"
[1] "Year: 2020"
[1] "Year: 2021"
[1] "Year: 2022"
[1] "Year: 2023"
# Do a little cleanup
# GEOID sometimes has state code, sometimes not, so delete it and fix ZCTA
Census_data <- Census_data %>%
select(-GEOID, ZCTA=NAME,
Year=year,
Pop=PopE,
Household=HouseholdsE,
Poverty=PovertyE) %>%
mutate(ZCTA=stringr::str_remove(ZCTA, "ZCTA5 "))
# New Mexico has several zcta's that report NA households and poverty in 2018.
# These NA's will be a continuing issue unless I handle them now.
# Let's find the problematic ZCTA's, and get an avg % diff with Pop or PopM
Bad_zctas <- Census_data %>%
filter(is.na(Household))
New_Mexico <- Census_data %>%
filter(ZCTA %in% Bad_zctas$ZCTA)
foobar <- New_Mexico %>%
group_by(ZCTA) %>%
summarise(House_pct=mean(Household/Pop, na.rm=TRUE),
HouseM_pct=mean(HouseholdsM/PopM, na.rm=TRUE),
Poverty_pct=mean(Poverty/Pop, na.rm=TRUE),
PovertyM_pct=mean(PovertyM/PopM, na.rm=TRUE))
Bad_zctas <- Bad_zctas %>%
left_join(., foobar, by="ZCTA") %>%
mutate(Household=House_pct*Pop,
HouseholdsM=HouseM_pct*PopM,
Poverty=Poverty_pct*Pop,
PovertyM=PovertyM_pct*PopM ) %>%
select(-House_pct, -HouseM_pct, -Poverty_pct, -PovertyM_pct)
Census_data <- rows_patch(Census_data, Bad_zctas, by=c("ZCTA", "Year"))
# Save the data
saveRDS(Census_data, paste0(path, "Pop_House_Poverty_by_ZCTA_year.rds"))