Last active
November 30, 2021 12:54
-
-
Save wrathematics/4821828b52fcbe2a49d0ea8f5d9378d5 to your computer and use it in GitHub Desktop.
Use the hdfmat package to split an HDF5 matrix generated by armadillo into row chunks
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// h5c++ generate.cpp -o generate | |
#include <string> | |
#define ARMA_USE_HDF5 | |
#include <armadillo> | |
static inline void gen(const int m, const int n, std::string fname) | |
{ | |
arma::fmat x(m, n); | |
for (int j=0; j<n; j++) | |
{ | |
arma::fvec col = arma::linspace<arma::fvec>(j*m + 1, j*m + m, m); | |
x.col(j) = col; | |
} | |
std::cout << x << std::endl; | |
x.save(arma::hdf5_name(fname, "mydata")); | |
} | |
int main() | |
{ | |
const int m = 10; | |
const int n = 5; | |
const std::string fname = "/tmp/test/test_mat.h5"; | |
gen(m, n, fname); | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// h5c++ read.cpp -o read | |
#include <string> | |
#define ARMA_USE_HDF5 | |
#include <armadillo> | |
int main() | |
{ | |
const std::string fname = "/tmp/test/test_mat1.h5"; | |
arma::fmat x; | |
x.load(arma::hdf5_name(fname, "mydata")); | |
std::cout << x << std::endl; | |
return 0; | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
suppressMessages(library(hdfmat)) | |
nchunks = 3 | |
storage_path = "/tmp/test" | |
fname_preface = "test_mat" | |
post = ".h5" | |
varname = "mydata" | |
# ------------------------------------------------------------------------------ | |
h = hdfmat_open(file.path(storage_path, paste0(fname_preface, post)), varname) | |
ncols = h$dim()[2] | |
split_n_by_k = function(n, k){ | |
if (k > n) stop("") | |
if (n == k) return(rep(1, n)) | |
else { | |
t = n %/% k | |
r = n %% k | |
ret = rep(t, k) | |
if (r > 0) | |
ret[1:r] = ret[1:r] + 1 | |
ret | |
} | |
} | |
chunk_lens = split_n_by_k(ncols, nchunks) | |
stops = cumsum(chunk_lens) | |
for (chunk in 1:nchunks){ | |
col_stop = stops[chunk] | |
col_start = col_stop - chunk_lens[chunk] + 1 | |
x = h$read(col_start=col_start, col_stop=col_stop) | |
fname_chunk = file.path(storage_path, paste0(fname_preface, chunk, post)) | |
h_chunk = hdfmat(file=fname_chunk, varname, nrow(x), ncol(x), type="float") | |
h_chunk$fill(x) | |
h_chunk$close() | |
invisible(gc()) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Workflow is