Last active
August 21, 2016 14:42
-
-
Save abicky/58ea79b01d9e394d5076 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# Copyright 2015- Takeshi Arabiki | |
# License: GNU GPL version 2 or later <http://gnu.org/licenses/gpl.html> | |
library(Rcpp) | |
Sys.setenv(PKG_LIBS = "-lpcrecpp") | |
sourceCpp(code = ' | |
#include <Rcpp.h> // Rcpp 0.11.3 | |
#include <pcrecpp.h> // pcrecpp 8.36 | |
// [[Rcpp::export]] | |
Rcpp::String regexquote(Rcpp::String pattern) { | |
return pcrecpp::RE::QuoteMeta(pattern.get_cstring()); | |
} | |
class Regex { | |
public: | |
pcrecpp::RE re; | |
Regex(Rcpp::String pattern) : re("(" + std::string(pattern) + ")", pcrecpp::UTF8()) { | |
if (!re.error().empty()) { | |
Rcpp::stop("Invalid regular expression: " + re.error()); | |
} | |
} | |
~Regex() {} | |
Rcpp::List scan(Rcpp::String str) { | |
std::vector<std::vector<std::string> > all_matches; | |
pcrecpp::StringPiece input(str.get_cstring()); | |
int n = re.NumberOfCapturingGroups(); | |
pcrecpp::Arg** args = new pcrecpp::Arg*[n]; | |
pcrecpp::Arg* match_ptrs = new pcrecpp::Arg[n]; | |
std::string* matches = new std::string[n]; | |
for (int i = 0; i < n; ++i) { | |
match_ptrs[i] = &matches[i]; | |
args[i] = &match_ptrs[i]; | |
} | |
int consumed; | |
while (re.DoMatch(input, pcrecpp::RE::UNANCHORED, &consumed, args, n)) { | |
all_matches.push_back(std::vector<std::string>(matches, matches + n)); | |
input.remove_prefix(consumed); | |
} | |
delete[] args; | |
delete[] match_ptrs; | |
delete[] matches; | |
return Rcpp::wrap(all_matches); | |
} | |
}; | |
RCPP_MODULE(Regex) { | |
Rcpp::class_<Regex>("Regex") | |
.constructor<Rcpp::String>() | |
.method("scan", &Regex::scan) | |
; | |
} | |
') |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment