Last active
December 31, 2024 17:00
-
-
Save JosiahParry/4856c7b6c036f4b978d27f005338fe61 to your computer and use it in GitHub Desktop.
Read an ArcGIS SWM File as spdep listw
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Original function | |
read_swm <- function(file) { | |
con <- file(file, "rb") | |
header <- NULL | |
while(TRUE) { | |
r <- readBin(con, "raw", size=1L, n=1, endian="little") | |
if (r == charToRaw("\n")) break | |
else header <- c(header, r) | |
} | |
cheader <- rawToChar(header) | |
if (nchar(cheader) >= 8) { | |
new <- substr(cheader, 1, 8) == "VERSION@" | |
} else { | |
new <- FALSE | |
} | |
fixed <- FALSE | |
if (new) { | |
lcheader <- strsplit(cheader, ";") | |
lcheader1 <- sapply(lcheader, function(x) strsplit(x, "@")) | |
ffound <- match("FIXEDWEIGHTS", sapply(lcheader1, "[", 1)) | |
if (!is.na(ffound)) fixed <- toupper(lcheader1[[ffound]][2]) == "TRUE" | |
} | |
n <- readBin(con, "integer", size=4L, n=1, endian="little") | |
row_stand <- readBin(con, "integer", size=4L, n=1, endian="little") | |
neighbours <- vector(mode="list", length=n) | |
weights <- vector(mode="list", length=n) | |
reg_id <- integer(n) | |
for (i in 1:n) { | |
reg_id[i] <- readBin(con, "integer", size=4L, n=1, endian="little") | |
# problem: neighbours are members of reg_id | |
ni <- readBin(con, "integer", size=4L, n=1, endian="little") | |
if (ni > 0L) { | |
neighbours[[i]] <- readBin(con, "integer", size=4L, n=ni, endian="little") | |
if (fixed) { | |
weights[[i]] <- rep(readBin(con, "numeric", size=8L, n=1, | |
endian="little"), ni) | |
} else { | |
weights[[i]] <- readBin(con, "numeric", size=8L, n=ni, endian="little") | |
} | |
} | |
sw <- readBin(con, "numeric", size=8L, n=1, endian="little") | |
} | |
for (i in 1:n) { | |
if (length(neighbours[[i]] > 0)) { | |
neighbours[[i]] <- match(neighbours[[i]], reg_id) | |
o <- order(neighbours[[i]]) | |
neighbours[[i]] <- neighbours[[i]][o] | |
weights[[i]] <- weights[[i]][o] | |
} else { | |
neighbours[[i]] <- 0L | |
} | |
} | |
res <- list(neighbours=neighbours, weights=weights) | |
res | |
} | |
# read_swm(file) | |
# C-based version thanks ChatGPT | |
r"(SEXP read_swm(SEXP file) { | |
// Open file in binary mode | |
const char *filename = CHAR(STRING_ELT(file, 0)); | |
FILE *con = fopen(filename, "rb"); | |
if (!con) { | |
error("Cannot open file"); | |
return R_NilValue; | |
} | |
// Read header line by line | |
char c; | |
char header[1024]; | |
int header_len = 0; | |
while ((c = fgetc(con)) != EOF) { | |
if (c == '\n') break; | |
header[header_len++] = c; | |
} | |
header[header_len] = '\0'; | |
int new_format = 0; | |
int fixed = 0; | |
if (strncmp(header, "VERSION@", 8) == 0) { | |
new_format = 1; | |
// Check for FIXEDWEIGHTS in header | |
char *found = strstr(header, "FIXEDWEIGHTS@"); | |
if (found && strncasecmp(found + 13, "TRUE", 4) == 0) { | |
fixed = 1; | |
} | |
} | |
// Read the number of regions and row standardization flag | |
int n, row_stand; | |
fread(&n, sizeof(int), 1, con); | |
fread(&row_stand, sizeof(int), 1, con); | |
// Prepare output R lists | |
SEXP neighbours = PROTECT(allocVector(VECSXP, n)); | |
SEXP weights = PROTECT(allocVector(VECSXP, n)); | |
SEXP reg_id = PROTECT(allocVector(INTSXP, n)); | |
// Read data for each region | |
for (int i = 0; i < n; i++) { | |
int region_id; | |
fread(®ion_id, sizeof(int), 1, con); | |
INTEGER(reg_id)[i] = region_id; | |
int ni; | |
fread(&ni, sizeof(int), 1, con); | |
if (ni > 0) { | |
// Read neighbors | |
SEXP nb = PROTECT(allocVector(INTSXP, ni)); | |
fread(INTEGER(nb), sizeof(int), ni, con); | |
// Read weights | |
SEXP w = PROTECT(allocVector(REALSXP, ni)); | |
if (fixed) { | |
double fixed_weight; | |
fread(&fixed_weight, sizeof(double), 1, con); | |
for (int j = 0; j < ni; j++) { | |
REAL(w)[j] = fixed_weight; | |
} | |
} else { | |
fread(REAL(w), sizeof(double), ni, con); | |
} | |
SET_VECTOR_ELT(neighbours, i, nb); | |
SET_VECTOR_ELT(weights, i, w); | |
UNPROTECT(2); // nb, w | |
} else { | |
SET_VECTOR_ELT(neighbours, i, R_NilValue); | |
SET_VECTOR_ELT(weights, i, R_NilValue); | |
} | |
double sw; | |
fread(&sw, sizeof(double), 1, con); // skip row sum | |
} | |
fclose(con); | |
// Create a list containing neighbours, weights, and region_id | |
SEXP res = PROTECT(allocVector(VECSXP, 3)); | |
SET_VECTOR_ELT(res, 0, neighbours); // First element: neighbours | |
SET_VECTOR_ELT(res, 1, weights); // Second element: weights | |
SET_VECTOR_ELT(res, 2, reg_id); // Third element: region_id | |
UNPROTECT(4); // neighbours, weights, reg_id, res | |
return res; | |
} | |
)" -> .x | |
callme::compile(.x) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment