Census Population data by ZCTA and Year

FEMA

Supporting Activism

Build a nice reusable dataset of census data by ZCTA and year

Author

Alan Jackson

Published

April 1, 2025

Basic census data by ZCTA and Year

This file has Population, Households, and Households in Poverty by ZCTA for the years 2011-2023.

I did have to manufacture a small number of missing values in New Mexico for 2018.

Figure out which fields to grab

# B17017_001 Total households - poverty status
# B17017_002 Total households with income below poverty level
# B01003_001 Total population

acs_vars <- c(Pop="B01001_001", # Total population
              Households="B17017_001", # Households
              Poverty="B17017_002") # Households in poverty

Census_data <- NULL

for (Year in 2011:2023) {
  print(paste("Year:", Year))
  foo  <- get_acs(geography="zcta",
                  survey="acs5",
                  variables=acs_vars,
                  year=Year,
                  output="wide",
                  cache_table = TRUE,
                  geometry=FALSE) 
  foo <- foo %>% 
    mutate(year=Year)
  
  Census_data <- rbind(Census_data, foo)
}

[1] "Year: 2011"

[1] "Year: 2012"

[1] "Year: 2013"

[1] "Year: 2014"

[1] "Year: 2015"

[1] "Year: 2016"

[1] "Year: 2017"

[1] "Year: 2018"

[1] "Year: 2019"

[1] "Year: 2020"

[1] "Year: 2021"

[1] "Year: 2022"

[1] "Year: 2023"

#   Do a little cleanup

#     GEOID sometimes has state code, sometimes not, so delete it and fix ZCTA

Census_data <- Census_data %>% 
  select(-GEOID, ZCTA=NAME,
         Year=year,
         Pop=PopE, 
         Household=HouseholdsE, 
         Poverty=PovertyE) %>% 
  mutate(ZCTA=stringr::str_remove(ZCTA, "ZCTA5 "))
  
#   New Mexico has several zcta's that report NA households and poverty in 2018.
#   These NA's will be a continuing issue unless I handle them now.

#   Let's find the problematic ZCTA's, and get an avg % diff with Pop or PopM

Bad_zctas <- Census_data %>% 
  filter(is.na(Household))

New_Mexico <- Census_data %>% 
  filter(ZCTA %in% Bad_zctas$ZCTA)

foobar <- New_Mexico %>% 
  group_by(ZCTA) %>% 
    summarise(House_pct=mean(Household/Pop, na.rm=TRUE),
              HouseM_pct=mean(HouseholdsM/PopM, na.rm=TRUE),
              Poverty_pct=mean(Poverty/Pop, na.rm=TRUE),
              PovertyM_pct=mean(PovertyM/PopM, na.rm=TRUE))

Bad_zctas <- Bad_zctas %>% 
  left_join(., foobar, by="ZCTA") %>% 
  mutate(Household=House_pct*Pop,
         HouseholdsM=HouseM_pct*PopM,
         Poverty=Poverty_pct*Pop,
         PovertyM=PovertyM_pct*PopM ) %>% 
  select(-House_pct, -HouseM_pct, -Poverty_pct, -PovertyM_pct)

Census_data <- rows_patch(Census_data, Bad_zctas, by=c("ZCTA", "Year"))
  
#   Save the data

saveRDS(Census_data, paste0(path, "Pop_House_Poverty_by_ZCTA_year.rds"))

Quick look at the data

Census_data %>% filter(ZCTA=="77008") %>% 
  gt::gt()

ZCTA	Pop	PopM	Household	HouseholdsM	Poverty	PovertyM	Year
77008	29776	1322	14227	476	1265	284	2011
77008	29424	1321	14368	466	1274	299	2012
77008	30807	1413	14742	455	1248	309	2013
77008	31418	1321	15081	462	1179	258	2014
77008	31868	1188	15283	373	1149	309	2015
77008	32588	1346	15671	370	1035	270	2016
77008	34491	1303	16116	385	1033	247	2017
77008	34184	1150	16262	400	1139	242	2018
77008	34895	1274	16337	506	1207	289	2019
77008	36248	2124	17326	925	1567	336	2020
77008	37097	2073	17500	854	1467	351	2021
77008	38496	2053	18349	850	1412	379	2022
77008	40155	2001	19513	840	1732	442	2023