library(tidyverse)
library(lubridate)
library(stringr)
library(tmap)
<- 4326
googlecrs
<- "/home/ajackson/Dropbox/Rprojects/Curated_Data_Files/Houston_Permits/"
path
<- readRDS(paste0(path, "Clean_Final_2022-9Sep2024.rds"))
df <- readRDS(paste0(path, "Clean_Final_2017_to_Mar_2022.rds"))
old <- readRDS(paste0(path, "Clean_Final_1989_2019.rds"))
older
# Combine the three files and extract solar, battery, and generator
<- rbind((older %>% select(Permit_date=Date, Zipcode, Lat, Lon, Comments,
df2 Address=Output)),
%>% select(Permit_date, Zipcode, Lat, Lon, Comments,
(old Address=Cleaned_address)),
%>% select(Permit_date, Zipcode, Lat, Lon, Comments,
(df Address=Cleaned_address))) %>%
distinct() %>%
filter(!duplicated(cbind(Comments, Address))) %>%
mutate(Permit_date=as_date(Permit_date)) %>%
ungroup()
<- tribble(~Event, ~Date, ~nudge,
Outages "Harvey", "2017-Aug-27", 125,
"Freeze", "2021-Feb-14", 150,
"Derecho", "2024-May-16", 175,
"Beryl", "2024-July-8", 200) %>%
mutate(Date=lubridate::ymd(Date))
Houston Solar Panels 2024 version
Create various datasets
Having bought solar panels myself a few of years ago, and realizing that the city permit database could be used to find most installations, I decided that it would be interesting to look at the recent history and a few other facets of residential solar panel installations.
The first step is to download the structural permit data as a CSV file from the city open data site and clean it up. Details may be found on my github site, or in the data section of this site. The code for cleaning up the older data is in the file Clean_old_City_permits.Rmd on that same github site.
Grabbing the correct records
As far as I can tell, Solar Panels are designated as such in the Description field, and nothing else. So a simple filter on “Solar” should suffice to capture all the installation permits. In more recent records, batteries are also flagged with tags like BATTERY, BATTERIES, LITHIUM, LI-ION, or POWERWALL. Additionally, since about 2017 but more consistently in recent years, the number of panels has been noted, so I can use that as a way to estimate added capacity.
Generators use words like GENERATOR in the comments, and about half the time also say whether the fuel is diesel or natural gas. Sometimes the power rating is also given, in kilowatts. For some reason, residential generators do not seem to appear in the permit database, even though they are required to get a permit. That is a mystery.
And there are 2 installations of solar roof tiles.
# Filter for various items - Solar really didn't begin until 2014
# Solar panels
<- df2 %>%
Solar filter(Permit_date>"2013-12-31") %>%
filter(str_detect(Comments, "SOLAR "))
# Gas and diesel generators, but exclude cell towers
<- df2 %>%
Generator filter(Permit_date>"2013-12-31") %>%
filter(str_detect(Comments, "GAS GEN|GENERAT")) %>%
filter(!str_detect(Comments, "CELL|TELECOM")) %>%
ungroup()
# Try to decide if it is a residential or commercial generator
# Assume > 10 kW is commercial
<- Generator %>%
Generator mutate(KW=str_extract(Comments, "[\\d,]* ?E?KW ")) %>%
mutate(KW=str_extract(KW, "[\\d,]+")) %>%
mutate(KW=str_remove(KW, ",")) %>%
mutate(KW=as.integer(KW)) %>%
mutate(Fuel=case_when(
str_detect(Comments, "DIESEL") ~ "Diesel",
str_detect(Comments, "GAS") ~ "Gas",
.default = "UNK"
)%>%
) mutate(Fuel=as_factor(Fuel))
# Car chargers
<- df2 %>%
Charger filter(Permit_date>"2013-12-31") %>%
filter(str_detect(Comments, "CHARGING|CHARGER|CAR CHAR"))
# Batteries
<- df2 %>%
Battery filter(Permit_date>"2013-12-31") %>%
filter(str_detect(Comments, "BATTER|LITHIUM|LI-ION|POWERWALL|ENERGY STOR"))
Time series
So let’s look at the growth in stuff over time. We’ll consolidate to monthly numbers to make the plot look descent.
Looks like a reasonable linear fit for solar, commercial generators basically flat.
Hmmm…. for batteries looks like a big jump not long after Valentines Day of 2021. Why would that be?
# Solar installations
%>%
Solar filter(Permit_date<ymd("2024-09-01")) %>% # don't want a fractional month
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=n()) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal))+
geom_point()+
geom_smooth(method = lm) +
geom_vline(data=Outages, aes(xintercept=Date),
linetype="dashed") +
geom_text(data=Outages, aes(x=Date, y=nudge, label=Event)) +
labs(title="Residential Solar Panel Permits in Houston",
subtitle="Monthly totals",
x="Year",
y="Month Total")
# Battery installations
%>%
Battery filter(Permit_date<ymd("2024-09-01")) %>% # don't want a fractional month
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=n()) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal))+
geom_point()+
geom_smooth(method = lm) +
geom_vline(data=Outages, aes(xintercept=Date),
linetype="dashed") +
geom_text(data=Outages, aes(x=Date, y=nudge/7, label=Event)) +
labs(title="Residential Battery Permits in Houston",
subtitle="Monthly totals",
x="Year",
y="Month Total")
# Generator installations
%>%
Generator filter(Permit_date<ymd("2024-09-01")) %>% # don't want a fractional month
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=n()) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal))+
geom_point()+
geom_smooth(method = lm) +
geom_vline(data=Outages, aes(xintercept=Date),
linetype="dashed") +
geom_text(data=Outages, aes(x=Date, y=nudge/7, label=Event)) +
labs(title="Commercial Generator Permits in Houston",
subtitle="Monthly totals",
x="Year",
y="Month Total")
# Generator installations by fuel type
%>%
Generator filter(Permit_date<ymd("2024-09-01")) %>% # don't want a fractional month
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month, Fuel) %>%
summarise(MonthlyTotal=n()) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal)) +
geom_point() +
facet_grid(rows = vars(Fuel)) +
labs(title="Commercial Generator Permits in Houston, by Fuel Type",
subtitle="Monthly totals",
x="Year",
y="Month Total")
# Generator kW ratings
%>%
Generator filter(!is.na(KW)) %>%
ggplot(aes(x=KW)) +
geom_histogram() +
labs(title="Commercial Generator Installations, Kilowatt sizes")
Look at amount of power capacity
We will use the number of panels (where noted) to get an idea of how much kW capacity is being added. While capacity per panel varies, 300 watts is not a bad average number.
<- Solar %>%
panels mutate(Num_panels=
as.numeric(
str_extract(
str_extract(Comments, "\\(\\d+\\)"),"\\d+"))) %>%
filter(str_detect(Comments, "RESID"),
<100,
Num_panels>0)
Num_panels
%>%
panels ggplot()+
geom_histogram(aes(x=Num_panels))
# About 300 watts per panel
%>%
panels filter(Permit_date>ymd("2020-1-1")) %>%
mutate(Watts=300*Num_panels/1000) %>%
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=sum(Watts)) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal))+
geom_point()+
geom_smooth() +
labs(title="Monthly Residential Solar Panel Kilowatts in Houston",
subtitle="For installations where the number of panels was noted",
x="Year",
y="Month Total")
# Assume average 30 panels per install, at 300 watts per, so 9 kwatts per
%>%
Solar filter(year(Permit_date)>2020) %>%
mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=n()) %>%
mutate(Kwatts=MonthlyTotal*9) %>%
ggplot(aes(x=By_Month, y=Kwatts))+
geom_point()+
geom_smooth() +
labs(title="Residential Solar Panel Total Estimated kW added in Houston",
subtitle="Assuming 9 kW per install on average",
x="Year",
y="Month Total")
Let’s look at the distribution by Zip code
Surprisingly the Houston Arrow does not appear on this map. That is a bit of a surprise. Note that 77048 is dominated by a property of apartment homes that were built with a large solar array.
# Top 20 zipcodes
%>%
Solar group_by(Zipcode) %>%
summarise(n = n()) %>%
::arrange(desc(n)) %>%
dplyr::slice(1:20) %>%
dplyr::gt() %>%
gt::tab_header(
gttitle = "Solar Panel Installations by Zip Code",
subtitle = "2014 - present"
%>%
) ::cols_label(
gtZipcode = "Zip Code",
n = "Number of Installations"
)
Solar Panel Installations by Zip Code | |
---|---|
2014 - present | |
Zip Code | Number of Installations |
77048 | 307 |
77072 | 260 |
77045 | 259 |
77008 | 246 |
77009 | 230 |
77099 | 228 |
77047 | 223 |
77075 | 203 |
77088 | 200 |
77034 | 185 |
77035 | 185 |
77016 | 181 |
77007 | 176 |
77085 | 175 |
77025 | 173 |
77096 | 173 |
77053 | 168 |
77489 | 164 |
77021 | 162 |
77077 | 157 |
# now let's make a map
="/home/ajackson/Dropbox/Rprojects/Curated_Data_Files/Zipcodes/"
zippath<- readRDS(paste0(zippath, "COH_Zip_Polys.rds")) %>%
zips mutate(Zipcode=as.character(ZIP_CODE)) %>%
::st_as_sf()
sf
# Number of permits per zip
<- Solar %>%
solar_zip group_by(Zipcode) %>%
summarise(Num = n()) %>%
mutate(Label=paste(as.character(Num) , "permits", Zipcode)) %>%
mutate(Num=cut_width(Num, 50))
<- right_join(zips, solar_zip, by="Zipcode")
solar_zip
# Create a factored palette function
<- leaflet::colorFactor(
pal palette = "Blues",
domain = solar_zip$Num)
::leaflet(solar_zip) %>%
leaflet::setView(lng = -95.362306, lat = 29.756931, zoom = 12) %>%
leaflet::addTiles() %>%
leaflet::addPolygons(weight=1,
leafletfillColor = ~pal((Num)),
fillOpacity = 0.5,
label = ~Label) %>%
::addLegend("bottomleft", pal = pal, values = ~Num,
leaflettitle = "Num Permits",
opacity = 1
)
Let’s combine solar panels with assessments
Interestingly, it is clear that solar panels are not just for the wealthy. The median value of homes with panels is pretty close to the median value of homes in general. I used the 2021 HCAD total market value for the value for each home.
There are some differences however. It appears that it roughly twice as likely that a house worth over $1,000,000 will have solar panels than a house worth less than that.
="/home/ajackson/Dropbox/Rprojects/Curated_Data_Files/HCAD/"
HCADpath<- readRDS(paste0(HCADpath, "Values_2021.rds")) %>%
HCAD rename(Address=site_addr_1)
<- inner_join(Solar, HCAD, by="Address") %>%
foo mutate(Value=as.numeric(tot_mkt_val)) %>%
filter(!stringr::str_detect(state_class, "X")) %>%
select(Address, Zipcode, Permit_date, Comments, new_own_dt, Value, state_class)
%>%
foo filter(Value<2000000) %>%
ggplot(aes(x=Value)) +
geom_histogram(bins=32)+
scale_x_continuous(labels=scales::dollar_format()) +
labs(title="Property Market Value",
subtitle="For Properties with Solar Panels",
x="Market Value",
y="Number of Properties")
%>%
HCAD mutate(Value=as.numeric(tot_mkt_val)) %>%
filter(!stringr::str_detect(state_class, "X")) %>%
filter(Value<2000000) %>%
ggplot(aes(x=Value)) +
geom_histogram(bins=32)+
scale_x_continuous(labels=scales::dollar_format()) +
labs(title="Property Market Value",
subtitle="For All Properties",
x="Market Value",
y="Number of Properties")
<- HCAD %>%
foo2 mutate(Value=as.numeric(tot_mkt_val)) %>%
filter(!stringr::str_detect(state_class, "X")) %>%
filter(Value<2000000) %>%
mutate(Bins=cut_width(Value, 100000)) %>%
group_by(Bins) %>%
summarize(n=n())
%>%
foo filter(Value<2000000) %>%
mutate(Bins=cut_width(Value, 100000)) %>%
group_by(Bins) %>%
summarize(nsolar=n()) %>%
left_join(., foo2, by="Bins") %>%
mutate(Pct=nsolar/n) %>%
cbind(., seq(0000, 2050000, 100000)) %>%
rename(Value=5) %>%
ggplot(aes(x=Value, y=Pct)) +
geom_histogram(bins=20, stat="identity")+
scale_x_continuous(labels=scales::dollar_format()) +
scale_y_continuous(labels=scales::percent) +
labs(title="Property Market Value",
subtitle="For All Properties",
x="Market Value",
y="Percent of Properties")
Car charging
One final somewhat related topic - electric car chargers. These are all commercial, I don’t think residential chargers (220 volt) require a permit. Looks like construction of charging stations is beginning to take off.
%>%
Charger mutate(By_Month=floor_date(Permit_date, unit="month")) %>%
group_by(By_Month) %>%
summarise(MonthlyTotal=n()) %>%
ggplot(aes(x=By_Month, y=MonthlyTotal))+
geom_point()+
labs(title="Electric Car Charger Permits in Houston",
subtitle="Monthly totals",
x="Year",
y="Month Total")