List files and stream (non-spatial) table from within a tarball on CRAN.
(we need dev gdalraster for the full dir/vsitar listing capability, but reading from remote files or archives is available in many GDAL versions and existing supported GDAL packages on CRAN)
cransrc <- "https://cran.r-project.org/src/contrib"
library(gdalraster) ## for listing dirs recursively we need gh:USDAForestService/gdalraster for now
#> GDAL 3.10.0dev-449d5f09b7, released 2024/08/26, GEOS 3.12.2, PROJ 9.4.1
## list all R packages .tar.gz
r_pkgs <- vsi_read_dir(glue::glue("/vsicurl/{cransrc}"), recursive = FALSE)
## list files in the inst/ dir of the arrow package
src <- glue::glue("/vsitar//vsicurl/{cransrc}/{grep('^arrow_', r_pkgs, value = TRUE)}")
inst_files <- grep("/inst/", vsi_read_dir(glue::glue("{src}"), recursive = TRUE), value = TRUE)
## and find that one Parquet file (not spatial)
parq_file <- grep('parquet$', inst_files, value = TRUE)[1]
parq_file
#> [1] "arrow/inst/v0.7.1.parquet"
dsn <- glue::glue("{src}/{parq_file}")
## now this of course works the in all the GDAL packages
terra::vect(dsn, proxy = TRUE)
#> class : SpatVectorProxy
#> geometry : none
#> dimensions : 10, 11 (geometries, attributes)
#> extent : NaN, NaN, NaN, NaN (xmin, xmax, ymin, ymax)
#> source : v0.7.1.parquet
#> coord. ref. :
#> names : carat cut color clarity depth table price x y z
#> type : <num> <chr> <chr> <chr> <num> <num> <int> <num> <num> <num>
#> __index_level_0__
#> <int>
#sf::read_sf(dsn)
v <- new(gdalraster::GDALVector, dsn)
v$getNextFeature()
#> $FID
#> integer64
#> [1] 0
#>
#> $carat
#> [1] 0.23
#>
#> $cut
#> [1] "Ideal"
#>
#> $color
#> [1] "E"
#>
#> $clarity
#> [1] "SI2"
#>
#> $depth
#> [1] 61.5
#>
#> $table
#> [1] 55
#>
#> $price
#> integer64
#> [1] 326
#>
#> $x
#> [1] 3.95
#>
#> $y
#> [1] 3.98
#>
#> $z
#> [1] 2.43
#>
#> $`__index_level_0__`
#> integer64
#> [1] 0
vapour::vapour_read_fields(dsn, limit_n = 5)
#> $carat
#> [1] 0.23 0.21 0.23 0.29 0.31
#>
#> $cut
#> [1] "Ideal" "Premium" "Good" "Premium" "Good"
#>
#> $color
#> [1] "E" "E" "E" "I" "J"
#>
#> $clarity
#> [1] "SI2" "SI1" "VS1" "VS2" "SI2"
#>
#> $depth
#> [1] 61.5 59.8 56.9 62.4 63.3
#>
#> $table
#> [1] 55 61 65 58 58
#>
#> $price
#> [1] 326 326 327 334 335
#>
#> $x
#> [1] 3.95 3.89 4.05 4.20 4.34
#>
#> $y
#> [1] 3.98 3.84 4.07 4.23 4.35
#>
#> $z
#> [1] 2.43 2.31 2.31 2.63 2.75
#>
#> $`__index_level_0__`
#> [1] 0 1 2 3 4
Created on 2024-08-29 with reprex v2.1.0
Some extra complexity (mostly taken from
download.packages
) will save CRAN some resources by lettingavailable.packages()
download the list of packages from a mirror and cache it for the duration of the session:The package is downloaded from the mirror as well.