::install_github("LukeCe/spflow") devtools
Spatial Econometrics Interaction Modelling
::p_load(tmap, sf, spdep, sp, Matrix, spflow, reshape2, knitr, tidyverse) pacman
Data Preparation
Before we can calibrate Spatial Econometric Interaction models by using spflow package, three data sets are required. They are:
a spatial weights,
a tibble data.frame consists of the origins, destination, flows and distances between the origins and destination, and
a tibble data.frame consists of the explanatory variables.
Building the Geographical Area
For the purpose of this study, URA Master Planning 2019 Planning Subzone GIS data will be used.
In the code chunk below, MPS-2019 shapefile will be imported into R environment as a sf tibble data.frame called mpsz.
# mpsz <- st_read(dsn = "data/geospatial", layer = "MPSZ-2019") %>%
# st_transform(crs = 3414)
In this study, our analysis will be focused on planning subzone with bus stop. In view of this, the code chunk below will be used to
# mpsz$'BUSSTOP_COUNT' <- lengths(
# st_intersects(mpsz, busstop)
# )
# mpsz_busstop <- mpsz %>%
# filter(BUSSTOP_COUNT > 0)
# mpsz_busstop
Preparing Spatial Weights
# centroids <- suppressWarnings({
# st_point_on_surface(st_geometry(mpsz_busstop))
# })
#
# mpsz_nb <- list(
# "by_contiguity" = poly2nb(mpsz_busstop),
# "by_distance" = dnearneigh(centroids, d1 = 0, d2 = 5000),
# "by_knn" = knn2nb(knearneigh(centroids, 3))
# )
# mpsz_nb
#write_rds(mpsz_nb, "data/rds/mpsz_nb.rds")
# odbus6_9 <- read_rds("data/rds/odbus6_9.rds")
# busstop_mpsz <- st_intersection(busstop, mpsz) %>%
# select(BUS_STOP_N, SUBZONE_C) %>%
# st_drop_geometry()
# od_data <- left_join(odbus6_9 , busstop_mpsz,
# by = c("ORIGIN_PT_CODE" = "BUS_STOP_N")) %>%
# rename(ORIGIN_BS = ORIGIN_PT_CODE,
# ORIGIN_SZ = SUBZONE_C,
# DESTIN_BS = DESTINATION_PT_CODE)
# duplicate <- od_data %>%
# group_by_all() %>%
# filter(n() > 1) %>%
# ungroup()
# od_data <- unique(od_data)
# od_data <- left_join(od_data , busstop_mpsz,
# by = c("DESTIN_BS" = "BUS_STOP_N"))
# duplicate <- od_data %>%
# group_by_all() %>%
# filter(n() > 1) %>%
# ungroup()
# od_data <- unique(od_data)
# od_data <- od_data %>%
# rename(DESTIN_SZ = SUBZONE_C) %>%
# drop_na() %>%
# group_by(ORIGIN_SZ, DESTIN_SZ) %>%
# summarise(MORNING_PEAK = sum(TRIPS))
# mpsz_sp <- as(mpsz, "Spatial")
# mpsz_sp
# dist <- spDists(mpsz_sp,
# longlat = FALSE)
# head(dist, n=c(10, 10))
# sz_names <- mpsz$SUBZONE_C
# colnames(dist) <- paste0(sz_names)
# rownames(dist) <- paste0(sz_names)
# distPair <- melt(dist) %>%
# rename(dist = value)
# head(distPair, 10)
# distPair %>%
# filter(dist > 0) %>%
# summary()
# distPair$dist <- ifelse(distPair$dist == 0,
# 50, distPair$dist)
# distPair %>%
# summary()
# distPair <- distPair %>%
# rename(orig = Var1,
# dest = Var2)
# write_rds(distPair, "data/rds/distPair.rds")
# mpsz_var
# write_rds(mpsz_var, "data/rds/mpsz_var.rds")
<- read_rds("data/rds/mpsz_nb.rds")
mpsz_nb <- read_rds("data/rds/mpsz_flow.rds")
mpsz_flow <- read_rds("data/rds/mpsz_var.rds") mpsz_var
Creating spflow_network-class
object
spflow_network-class
is an S4 class that contains all information on a spatial network which is composed by a set of nodes that are linked by some neighborhood relation. It can be created by using https://lukece.github.io/spflow/reference/spflow_network.html of spflow package.
For our model, we choose the contiguity based neighborhood structure.
<- spflow_network(
mpsz_net id_net = "sg",
node_neighborhood =
nb2mat(mpsz_nb$by_contiguity),
node_data = mpsz_var,
node_key_column = "SZ_CODE")
mpsz_net
Spatial network nodes with id: sg
--------------------------------------------------
Number of nodes: 313
Average number of links per node: 6.077
Density of the neighborhood matrix: 1.94% (non-zero connections)
Data on nodes:
SZ_NAME SZ_CODE BUSSTOP_COUNT AGE7_12 AGE13_24 AGE25_64
1 INSTITUTION HILL RVSZ05 2 330 360 2260
2 ROBERTSON QUAY SRSZ01 10 320 350 2200
3 FORT CANNING MUSZ02 6 0 10 30
4 MARINA EAST (MP) MPSZ05 2 0 0 0
5 SENTOSA SISZ01 1 200 260 1440
6 CITY TERMINALS BMSZ17 10 0 0 0
--- --- --- --- --- --- ---
308 NEE SOON YSSZ07 12 90 140 590
309 UPPER THOMSON BSSZ01 47 1590 3660 15980
310 SHANGRI-LA AMSZ05 12 810 1920 9650
311 TOWNSVILLE AMSZ04 9 980 2000 11320
312 MARYMOUNT BSSZ02 25 1610 4060 16860
313 TUAS VIEW EXTENSION TSSZ06 11 0 0 0
SCHOOL_COUNT BUSINESS_COUNT RETAILS_COUNT FINSERV_COUNT ENTERTN_COUNT
1 1 6 26 3 0
2 0 4 207 18 6
3 0 7 17 0 3
4 0 0 0 0 0
5 0 1 84 29 2
6 0 11 14 4 0
--- --- --- --- --- ---
308 0 0 7 0 0
309 3 21 305 30 0
310 3 0 53 9 0
311 1 0 83 11 0
312 3 19 135 8 0
313 0 53 3 1 0
FB_COUNT LR_COUNT COORD_X COORD_Y
1 4 3 103.84 1.29
2 38 11 103.84 1.29
3 4 7 103.85 1.29
4 0 0 103.88 1.29
5 38 20 103.83 1.25
6 15 0 103.85 1.26
--- --- --- --- ---
308 0 0 103.81 1.4
309 5 11 103.83 1.36
310 0 0 103.84 1.37
311 1 1 103.85 1.36
312 3 11 103.84 1.35
313 0 0 103.61 1.26
<- spflow_network_pair(
mpsz_net_pairs id_orig_net = "sg",
id_dest_net = "sg",
pair_data = mpsz_flow,
orig_key_column = "ORIGIN_SZ",
dest_key_column = "DESTIN_SZ")
mpsz_net_pairs
Spatial network pair with id: sg_sg
--------------------------------------------------
Origin network id: sg (with 313 nodes)
Destination network id: sg (with 313 nodes)
Number of pairs: 97969
Completeness of pairs: 100.00% (97969/97969)
Data on node-pairs:
DESTIN_SZ ORIGIN_SZ DISTANCE TRIPS
1 RVSZ05 RVSZ05 0 67
314 SRSZ01 RVSZ05 305.74 251
627 MUSZ02 RVSZ05 951.83 0
940 MPSZ05 RVSZ05 5254.07 0
1253 SISZ01 RVSZ05 4975 0
1566 BMSZ17 RVSZ05 3176.16 0
--- --- --- --- ---
96404 YSSZ07 TSSZ06 26972.97 0
96717 BSSZ01 TSSZ06 25582.48 0
97030 AMSZ05 TSSZ06 26714.79 0
97343 AMSZ04 TSSZ06 27572.74 0
97656 BSSZ02 TSSZ06 26681.7 0
97969 TSSZ06 TSSZ06 0 270
<- spflow_network_multi(mpsz_net, mpsz_net_pairs)
mpsz_multi_net mpsz_multi_net
Collection of spatial network nodes and pairs
--------------------------------------------------
Contains 1 spatial network nodes
With id : sg
Contains 1 spatial network pairs
With id : sg_sg
Availability of origin-destination pair information:
ID_ORIG_NET ID_DEST_NET ID_NET_PAIR COMPLETENESS C_PAIRS C_ORIG C_DEST
sg sg sg_sg 100.00% 97969/97969 313/313 313/313
<- log(1 + TRIPS) ~
cor_formula +
BUSSTOP_COUNT +
AGE7_12 +
AGE13_24 +
AGE25_64 +
SCHOOL_COUNT +
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT P_(log(DISTANCE + 1)) #P_ is only applicable if you are using distance or cost (impedance)
<- pair_cor(
cor_mat
mpsz_multi_net,spflow_formula = cor_formula,
add_lags_x = FALSE) #lag is the distance neighborhood. We are including in this case.
colnames(cor_mat) <- paste0(
substr(
colnames(cor_mat),1,3),"...")
cor_image(cor_mat)
<- spflow(
base_model spflow_formula = log(1 + TRIPS) ~
O_(BUSSTOP_COUNT +
+
AGE25_64) D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE + 1)),
spflow_networks = mpsz_multi_net)
base_model
--------------------------------------------------
Spatial interaction model estimated by: MLE
Spatial correlation structure: SDM (model_9)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
rho_d 0.680 0.004 192.553 0.000
rho_o 0.678 0.004 187.734 0.000
rho_w -0.396 0.006 -65.592 0.000
(Intercept) 0.410 0.065 6.267 0.000
(Intra) 1.312 0.081 16.262 0.000
D_SCHOOL_COUNT 0.017 0.002 7.885 0.000
D_SCHOOL_COUNT.lag1 0.002 0.004 0.552 0.581
D_BUSINESS_COUNT 0.000 0.000 3.015 0.003
D_BUSINESS_COUNT.lag1 0.000 0.000 -0.249 0.804
D_RETAILS_COUNT 0.000 0.000 -0.306 0.759
D_RETAILS_COUNT.lag1 0.000 0.000 0.152 0.880
D_FINSERV_COUNT 0.002 0.000 6.787 0.000
D_FINSERV_COUNT.lag1 -0.002 0.001 -3.767 0.000
O_BUSSTOP_COUNT 0.002 0.000 6.807 0.000
O_BUSSTOP_COUNT.lag1 -0.001 0.000 -2.364 0.018
O_AGE25_64 0.000 0.000 7.336 0.000
O_AGE25_64.lag1 0.000 0.000 -2.797 0.005
P_log(DISTANCE + 1) -0.050 0.007 -6.794 0.000
--------------------------------------------------
R2_corr: 0.6942948
Observations: 97969
Model coherence: Validated
<- par(mfrow = c(1, 3),
old_par mar = c(2,2,2,2))
spflow_moran_plots((base_model))
par(old_par)
<- pair_cor(base_model)
corr_residual colnames(corr_residual) <- substr(colnames(corr_residual), 1, 3)
cor_image(corr_residual)
<- log(1 + TRIPS) ~
spflow_formula O_(BUSSTOP_COUNT +
+
AGE25_64) D_(SCHOOL_COUNT +
+
BUSINESS_COUNT +
RETAILS_COUNT +
FINSERV_COUNT) P_(log(DISTANCE + 1))
<- spflow_control(
model_control estimation_method = "mle",
model = "model_1")
<- spflow(
mle_model1
spflow_formula,spflow_networks = mpsz_multi_net,
estimation_control = model_control)
mle_model1
--------------------------------------------------
Spatial interaction model estimated by: OLS
Spatial correlation structure: SLX (model_1)
Dependent variable: log(1 + TRIPS)
--------------------------------------------------
Coefficients:
est sd t.stat p.val
(Intercept) 11.384 0.069 164.255 0.000
(Intra) -6.006 0.112 -53.393 0.000
D_SCHOOL_COUNT 0.093 0.003 28.599 0.000
D_SCHOOL_COUNT.lag1 0.255 0.006 44.905 0.000
D_BUSINESS_COUNT 0.001 0.000 10.036 0.000
D_BUSINESS_COUNT.lag1 0.003 0.000 18.274 0.000
D_RETAILS_COUNT 0.000 0.000 -1.940 0.052
D_RETAILS_COUNT.lag1 0.000 0.000 -2.581 0.010
D_FINSERV_COUNT 0.005 0.000 10.979 0.000
D_FINSERV_COUNT.lag1 -0.016 0.001 -17.134 0.000
O_BUSSTOP_COUNT 0.014 0.001 25.865 0.000
O_BUSSTOP_COUNT.lag1 0.015 0.001 21.728 0.000
O_AGE25_64 0.000 0.000 14.479 0.000
O_AGE25_64.lag1 0.000 0.000 14.452 0.000
P_log(DISTANCE + 1) -1.281 0.008 -165.327 0.000
--------------------------------------------------
R2_corr: 0.2831458
Observations: 97969
Model coherence: Validated