library(tidyverse)
<- "EPSG:4326"
googlecrs <- "EPSG:32615"
localUTM
<- "/home/ajackson/Dropbox/Rprojects/TexasDioceseCreationCare/Data/" path
Read and Analyze subsidence data
Setup
Data scraped (with some difficulty) from https://www.arcgis.com/home/webmap/viewer.html?webmap=1e3c97ed53e2476bb842ceccd6a90514&extent=-96.3605,29.2149,-94.1042,30.3246
Found from the website for the Harris-Galveston Subsidence District.
Scraping process
Install clipit to increase clipboard buffer size
Open table
Open information screen
Find table in info screen
Select all the data with
ctl-C
Dump into file with
xsel -b > foo8
Shorten extremely long line with
cat foo8 | sed 's/<tr>/\r<tr>/g' > foo9.html
Read into editor and add
<html> <body> <table>
after eliminating un-needed stuff.
Read and parse the data
# Import the data
<- rvest::read_html(paste0(path, "SubsidenceDataHarris.html")) %>%
df ::html_table()
rvest
<- do.call(rbind, df)
df
<- df %>%
df rename(Station=X1, Operator=X2, Latitude=X3, Longitude=X4, Start_Year=X5, End_Year=X6,
Years_Monitoring=X7, Total_Vertical_Displacement_cm=X8, Subsidence_Rate_cmperyr=X9,
POR_Plot=X10, RateLabel=X11, FID=X12
)
# Some records are recorded at stations at the same location, which screws up kriging.
# We will find those records and replace them with one record at that spot and average
# the variable values.
<- df %>%
df mutate(index=paste0(as.character(Latitude), as.character(Longitude))) %>%
group_by(index) %>%
summarise(
Station=first(Station),
Operator=first(Operator),
Subsidence_Rate_cmperyr=mean(Subsidence_Rate_cmperyr),
Total_Vertical_Displacement_cm=mean(Total_Vertical_Displacement_cm),
Start_Year=min(Start_Year),
End_Year=max(End_Year),
Years_Monitoring=max(Years_Monitoring),
NumInAvg=n(),
Latitude=first(Latitude),
Longitude=first(Longitude)) %>%
select(-index)
<- sf::st_as_sf(df, coords=c("Longitude", "Latitude"), crs=googlecrs, agr = "identity")
df_sf <- sf::st_transform(df_sf, crs=localUTM)
df_xy
# saveRDS(df_xy, paste0(path, "Subsidence_xy.rds"))
<- sf::st_bbox(df_sf)
bbox
# Expand box by 20% to give a little extra room
<- (bbox[["xmax"]]-bbox[["xmin"]])*0.1
Dx <- (bbox[["ymax"]]-bbox[["ymin"]])*0.1
Dy "xmin"] <- bbox["xmin"] - Dx
bbox["xmax"] <- bbox["xmax"] + Dx
bbox["ymin"] <- bbox["ymin"] - Dy
bbox["ymax"] <- bbox["ymax"] + Dy
bbox[
<- c(bbox["xmin"], bbox["ymin"], bbox["xmax"], bbox["ymax"]) bb
Plot the data
<- basemapR::base_map(bbox, basemap="mapnik", increase_zoom=2)
Base_basemapR
# This is the best looking one.
%>%
df_sf ggplot() +
+
Base_basemapR geom_sf()
Grid and contour
Though really what I care most about is the grid, since that will yield the subsidence values at each of the church locations
# First we create a box to build the grid inside of - in XY coordinates, not
# lat long.
= bbox %>%
bbox_xy ::st_as_sfc() %>%
sf::st_transform(crs = localUTM) %>%
sf::st_bbox()
sf
# sfc POINT file
<- sf::st_make_grid(x=bbox_xy,
grd_sf what="corners",
cellsize=2000,
crs=localUTM
)# Data frame
<- expand.grid(
grd_df X = seq(from = bbox_xy["xmin"], to = bbox_xy["xmax"]+2000, by = 2000),
Y = seq(from = bbox_xy["ymin"], to = bbox_xy["ymax"]+2000, by = 2000) # 1000 m resolution
)
Inverse distance weighting
# Create interpolator and interpolate to grid
<- gstat::gstat(
fit_IDW formula = Subsidence_Rate_cmperyr ~ 1,
data = df_xy,
#nmax = 10, nmin = 3, # can also limit the reach with these numbers
set = list(idp = 2.5) # inverse distance power
)
# Use predict to apply the model fit to the grid (using the data frame grid
# version)
# We set debug.level to turn off annoying output
<- predict(fit_IDW, grd_sf, debug.level=0)
interp_IDW
# Convert to a stars object so we can use the contouring in stars
<- stars::st_rasterize(interp_IDW %>% dplyr::select(Subsidence_Rate_cmperyr=var1.pred, geometry))
interp_IDW_stars
# Quick sanity check, and can use to adjust the distance power Looks pretty
# good - most input points look close to the output grid points, with some
# notable exceptions. The red point to the north is possibly bad data. Easier
# to judge from areal displays.
ggplot() +
# geom_stars is a good way to display a grid
::geom_stars(data=interp_IDW_stars) +
starsgeom_sf(data=df_xy, aes(color=Subsidence_Rate_cmperyr), size=5) +
geom_sf(data=df_xy, color="Black", size=1) +
# This is for the raster color fill
scale_fill_gradientn(colors=rainbow(5), limits=c(-2,1)) +
# This is for the original data points
scale_color_gradientn(colors=rainbow(5), limits=c(-2,1)) +
labs(title="Inverse Distance Weighting, Power = 2.5")
Search for best IDW power
Even optimizing, the result is kind of sucky.
# Do a leave-one-out analysis for a variety of weighting powers
<- NULL
Validate for (power in (2:8)*0.5) {
<- gstat::gstat(formula = Subsidence_Rate_cmperyr ~ 1, data = df_xy, set = list(idp = power))
my_fit <- sf::st_as_sf(gstat::gstat.cv(my_fit, debug.level=0, verbose=FALSE)) %>%
foo rename(Observed=observed, Predicted=var1.pred ) %>%
mutate(power=power)
<- bind_rows(Validate, foo)
Validate
}
%>%
Validate ggplot(aes(x=Observed, y=Predicted)) +
geom_point() +
geom_smooth(method='lm') +
geom_abline(intercept=0, slope=1, color="red", linetype="dashed") +
facet_wrap(vars(power)) +
labs(title="Leave-one-out Validation",
x="Observed (inches)",
y="Predicted (inches)")
# Root mean square error
<- Validate %>%
RMS group_by(power) %>%
summarise(RMS=sqrt(sum((Predicted-Observed)^2) / n()))
%>%
RMS ggplot(aes(x=power, y=RMS)) +
geom_point() +
geom_smooth() +
labs(title="RMS Error vs. Inverse Distance Weighting Power")
Kriging
First the drift.
I don’t like the looks of the fit - I think that there is enough data surrounding the bowl near zero that there is no drift. This is a flat plane with a bowl in the middle.
# Let's look at 1st order fit
<- bind_cols(sf::st_drop_geometry(df_xy),
df_xy_df ::st_coordinates(df_xy)) # make a tibble
sf
# Define the 1st order polynomial equation
.1 <- as.formula(Subsidence_Rate_cmperyr ~ X + Y)
f
# Run the regression model
.1 <- lm( f.1, data=df_xy_df)
lm
# Use predict to apply the model fit to the grid This re-attaches X-Y
# coordinates to the predicted values so they can be made into a grid
.1 <- sf::st_as_sf(bind_cols(grd_df,
Poly_fitdata.frame(var1.pred=predict(lm.1, grd_df))),
crs=localUTM,
coords=c("X", "Y"))
# Convert to a stars object
.1 <- stars::st_rasterize(Poly_fit.1 %>%
Poly_fit_star::select(Subsidence_Rate_cmperyr=var1.pred, geometry))
dplyr
ggplot() +
::geom_stars(data=Poly_fit_star.1) +
starsscale_fill_gradientn(colors=rainbow(5)) +
geom_sf(data=df_xy, aes(color=Subsidence_Rate_cmperyr), size=5) +
scale_color_gradientn(colors=rainbow(5), limits=c(-2,1)) +
scale_fill_gradientn(colors=rainbow(5), limits=c(-2,1)) +
geom_sf(data=df_xy, color="Black", size=1)+
labs(title="Linear trend from subsidence data")
Kriging
I pick the Spherical model
# Let's look at the variogram
<- gstat::variogram(Subsidence_Rate_cmperyr ~ 1,
var.smpl data=df_xy)
# Compute the variogram model by passing the nugget, sill and range values
# to fit.variogram() via the vgm() function.
# models are "Sph", "Exp", "Mat", and "Gau"
<- NULL
Plot_data for (model in c("Sph", "Exp", "Mat", "Gau")) {
<- gstat::fit.variogram(var.smpl, fit.ranges = TRUE, fit.sills = TRUE,
foo debug.level = 0,
::vgm(psill=NA, model=model, range=NA, nugget=0))
gstat<- bind_rows(Plot_data, gstat::variogramLine(foo, maxdist = max(var.smpl$dist)) %>%
Plot_data mutate(Model=model))
}
ggplot(var.smpl, aes(x = dist, y = gamma)) +
geom_point() +
geom_text(aes(hjust="left", label=np), nudge_x=200) +
geom_line(data=Plot_data, aes(x=dist, y=gamma, color=Model)) +
labs(title="Variogram for detrended data",
subtitle="Numbers represent number of points in the distance bin")
Do Kriging
# We have our model, so now let's use it
<- gstat::fit.variogram(var.smpl, fit.ranges = TRUE, fit.sills = TRUE,
dat.fit debug.level = 0,
::vgm(psill=NA, model="Gau", range=NA, nugget=0))
gstat
# saveRDS(dat.fit, paste0(path, "gstat_variogram_Subsidence.rds"))
<- gstat::krige(Subsidence_Rate_cmperyr ~ 1,
Krigged_grid
df_xy,
grd_sf,debug.level=1,
dat.fit)
[using ordinary kriging]
<- stars::st_rasterize(Krigged_grid %>% dplyr::select(Subsidence_Rate_cmperyr=var1.pred, geometry))
Krigged_star
# Residuals
ggplot() +
::geom_stars(data=Krigged_star) +
starsscale_fill_gradientn(colors=rainbow(5)) +
geom_sf(data=df_xy, color="Black", size=1) +
labs(title="Residuals from Kriging")
Contours
<- seq(from = -1, to = 0.2, by = 0.2)
brks # Create contour lines
<- stars::st_contour(Krigged_star, contour_lines=TRUE, breaks=brks)
Contours_Krig # Plot to see what it all looks like
ggplot() +
::geom_stars(data=Krigged_star) +
starsscale_fill_gradientn(colors=rainbow(5), limits=c(-1,0.2)) +
geom_sf(data=df_xy, aes(color=Subsidence_Rate_cmperyr), size=5) +
scale_color_gradientn(colors=rainbow(5), limits=c(-1,0.2)) +
geom_sf(data=df_xy, color="Black", size=1) +
geom_sf(data=Contours_Krig, color="black")
Final publication quality map
# Now lets transform the contours to lat-long space and plot on the basemap
<- sf::st_transform(Contours_Krig, crs=googlecrs)
Contours_LL <- sf::st_transform(Krigged_star, crs=googlecrs)
Krig_star_LL
# saveRDS(Contours_LL, paste0(path, "Contours_LL_Subsidence.rds"))
# saveRDS(Krig_star_LL, paste0(path, "Star_Grid_LL_Subsidence.rds"))
ggplot() +
# this is the best looking basemap
+
Base_basemapR # Gridded data
::geom_stars(data=Krig_star_LL, alpha=0.4) +
stars# Add points
geom_sf(data=df_sf) +
# Create filled density "contours"
geom_sf(data=Contours_LL, color="black") +
scale_fill_viridis_c(direction=-1, alpha=0.4, name="Subsidence\nRate (cm/yr)") +
# Add a scale bar
::annotation_scale(data=df_sf, aes(unit_category="imperial", style="ticks"),
ggspatiallocation="br", width_hint=0.2, bar_cols=1) +
# Add CI annotation at specified window coordinates
annotate("text", label="C.I. = 0.2 cm per year subsidence
Data sourced from the Harris-Galveston Subsidence District
2022 Annual Groundwater Report
Map a product of Alan Jackson",
x=-Inf,
y=-Inf,
hjust="inward", vjust="inward", size = 2) +
coord_sf(crs=googlecrs) + # required
# Add title
labs(title="Subsidence Rate",
subtitle="Points represent the measurement locations",
x="Longitude", y="Latitude")
# ggsave(paste0(path, "Subsidence_map.png"),dpi=400)