Chapter 4 Simulations with systematicsurvey
library(ggplot2)
library(INLA)
library(raster)
library(rgeos)
library(rgdal)
library(systematicsurvey)
# can install it using
## devtools::install_github("Craig44/systematicsurvey")
set.seed(123)
## Assume a square domain
= c(0,100)
survey_xlim = c(0,100)
survey_ylim # simualted population size
= 5e5
N ## spatial distribution
= c(4,1)
beta1 = c(1,4)
beta2
= diff(survey_ylim) * diff(survey_xlim) # Area of survey
survey_area ## create a polygon
= Polygon(cbind(x = c(survey_xlim[1],survey_xlim[1],survey_xlim[2],survey_xlim[2]), y = c(survey_ylim[1],survey_ylim[2],survey_ylim[2],survey_ylim[1])))
s_poly = Polygons(list(s_poly),1)
s_polys = SpatialPolygons(list(s_polys)) ## add projection coordinater here if you are using
sp_survey_poly = rgeos::gArea(sp_survey_poly)
survey_area = sp_survey_poly
survey_polygon
## sampling unit
= 0.5
quad_width = 0.5
quad_height = 10 ## spacings between quadrant/transect midpoints
quad_x_spacing = 10 ## spacings between quadrant/transect midpoints
quad_y_spacing = quad_width * quad_height
sampling_unit_area = quad_width ## will define how many boxlets there are, should be less than quadrant size
boxlet_diameter = survey_area / sampling_unit_area ## sampling fraction
kappa ## variables that define the toal number of sampling units
= max(survey_xlim) / quad_x_spacing # r
n_col = max(survey_ylim) / quad_y_spacing # r
n_row = n_row * n_col
n_strata = quad_y_spacing / quad_height# l
n_l = quad_x_spacing / quad_width # k
n_k = n_l * n_row * n_col * n_k
n_quads = rep(1:n_l, n_k)
within_rows = sort(rep(1:n_k, n_l))
within_cols ## lower and upper limits for discrete non-overlapping quadrats
= seq(0,max(survey_xlim), by = quad_width)
quad_x = seq(0,max(survey_ylim), by = quad_height)
quad_y
# create a raster layer with all projection spaces
<- raster(resolution = quad_width, xmn = survey_xlim[1], xmx = survey_xlim[2], ymn = survey_ylim[1], ymx = survey_ylim[2])
proj_raster <- rasterToPolygons(proj_raster, dissolve=F)
proj_poly
## simulate spatial distribution
= rbeta(N, beta1[1], beta1[2]) * max(survey_xlim)
x_coord = rbeta(N, beta2[1], beta2[2]) * max(survey_ylim)
y_coord ## rasterize
<- rasterize(x = cbind(x_coord, y_coord), y = proj_raster, field = 1, fun = "sum", na.rm = T)
pop_raster <- rasterToPoints(pop_raster, spatial = TRUE)
pop_raster_pts # Then to a 'conventional' dataframe
<- data.frame(pop_raster_pts)
pop_df ggplot() +
geom_tile(data = pop_df , aes(x = x, y = y, fill = layer)) +
ggtitle("Population") +
labs(x = "", y = "", fill = "Abundance")
Now simulate a two-dimensional systematic survey with a single PSU.
= sample(1:length(within_cols), size = 1)
PSU # convert cols and rows to survey units
= within_cols[PSU] * quad_width
within_x = within_rows[PSU] * quad_height
within_y
= seq(from = within_x, to = survey_xlim[2], by = quad_x_spacing)
upper_x1 = seq(from = within_y, to = survey_ylim[2], by = quad_y_spacing)
upper_y1 = rep(upper_x1, length(upper_y1))
upper_x = sort(rep(upper_y1, length(upper_x1)))
upper_y = upper_y - quad_height / 2
sample_y_coord = upper_x - quad_width / 2
sample_x_coord ## data frame
= data.frame(x_mid = sample_x_coord, y_mid = sample_y_coord)
Data $y_i = NA
Data= 1;
counter ## population in sampling units
for(i in 1:length(upper_x)) {
$y_i[i] = sum(x_coord >= (upper_x[i] - quad_width) & x_coord <= upper_x[i]
Data& y_coord >= (upper_y[i] - quad_height) & y_coord <= upper_y[i])
}$area = quad_height * quad_height
Data## convert to densities
$d_i = Data$y_i / Data$area
Data
## convert it to spatial data frame
coordinates( Data ) <- ~ x_mid + y_mid
## can also add projection of data if working lat longs etc
# proj4string(Data) <- CRS("+init=epsg:4326")
## calculate sample raster
<- rasterize(x = cbind(sample_x_coord, sample_y_coord), y = proj_raster, field = 1, fun = "sum", na.rm = T)
sample_raster ## calculate polygons
<- rasterToPolygons(sample_raster, dissolve=TRUE) samp_poly
4.1 SRS estimator
= kappa^2 * var(Data@data$d_i) / (nrow(Data@data))
srs_var_hat ## standard error
sqrt(srs_var_hat)
## [1] 397008.6
4.3 Boxlet estimator
The first thing we need to do is calculate the fine lattice within \(\mathcal{D}\) (these lattice points correspond to the centroids of the so-called boxlets). This is done using the ?BoxletEstimatorSamplingFrame
function. This function will return a list containing a fine lattice for all the boxlets. The other function that will calculate the boxlet variance is ?BoxletEstimator
. These are applied in the following code chunk.
## build lattice that we will evaluate the variance over
## for large survey areas can take a few seconds to a minute
= BoxletEstimatorSamplingFrame(survey_polygon, quad_width = quad_width,
boxlet_frame_single quad_height = quad_height, quad_x_spacing = quad_x_spacing,
quad_y_spacing = quad_y_spacing, boxlet_per_sample_width = 1,
boxlet_per_sample_height = 1, trace = F)
## use the
= BoxletEstimator(spatial_df = Data, survey_polygon = survey_polygon,
boxlet_estimator_single quad_width = quad_width, quad_height = quad_height,
quad_x_spacing = quad_x_spacing, quad_y_spacing = quad_y_spacing,
boxlet_sampling_frame = boxlet_frame_single)
names(boxlet_estimator_single)
## [1] "var_total_boxlet" "boxlet_df" "N_hat" "gam_N_hat"
## standard error
sqrt(boxlet_estimator_single$var_total_boxlet)
## [1] 84470.52
4.4 Geostatistical estimator
Build a mesh INLA mesh
= inla.mesh.2d(max.edge = c(3,20), n =15, cutoff = 6,
mesh loc.domain = SpatialPoints( data.frame(x = c(0,0,100,100),y= c(0,100,100,0))))
## don't give data points to the inla.mesh.2d function
## the triangulation mesh calculation will want to put all the vertices
## at observation locations due to the systematice shape
plot(mesh)
points(Data, col = "red", pch = 16)
Identify which cells to get the model to predict for
= apply(gContains(samp_poly,proj_poly, byid = TRUE), 1, any)
proj_indicator ## visualise
<- c("Domain" = "gold", "Samples" = "blue", "Projection" = "black")
colors = fortify(proj_poly) pop_outline
## Regions defined for each Polygons
= fortify(samp_poly) sample_outline
## Regions defined for each Polygons
= data.frame(x = c(survey_xlim[1],survey_xlim[2], survey_xlim[2], survey_xlim[1],survey_xlim[1]),
domain_area y = c(survey_ylim[1],survey_ylim[1],survey_ylim[2],survey_ylim[2],survey_ylim[1]))
ggplot() +
coord_fixed() +
geom_path(aes(x = x, y = y, col="Domain"), data = domain_area,
size=1) +
geom_path(aes(x = long, y = lat, group = group, col="Projection"), data = pop_outline,
size=1) +
geom_path(aes(x = long, y = lat, group = group, col="Samples"), data = sample_outline,
size=1) +
labs(x = "x", y = "y", color = "")+
theme_void() +
theme(legend.position="bottom", legend.text = element_text( size = 16)) +
scale_color_manual(values = colors[1:3])
Fit the model
## create a projection data frame
<- as.data.frame(rasterToPoints(proj_raster, spatial = F))
proj_df $area = quad_width * quad_height
proj_df$y_i = 0 ## dummy variable
proj_df
= tryCatch(
fit_poisson expr = SpatialModelEstimator(spatial_df = Data, formula = y_i ~ 1, mesh = mesh,
extrapolation_grid = proj_df, family = 0, link = 0, bias_correct = T,
convergence = list(grad_tol = 0.001)),
error = function(e) {e}
)