Created
March 24, 2020 22:19
-
-
Save parthsarthiprasad/48e8e22de534df24bd46fd8bea9c4971 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
inline proc findAll(needle: string, region: range(?) = 1:byteIndex..): [] | |
{ | |
/* | |
find all occurrences of needle in string. | |
return type :- integer array containing starting indices of matched pattern. (1-based indexing) | |
empty array if no index found | |
Algorithm :- boyle bad character shift array with boyle moore horspool algorithm. | |
preprocessing phase is O(m+k) time and | |
O(k) space complexity | |
where m = length of pattern , | |
n = length of string | |
k = numchar = 256 | |
worst-case performance O(mn), | |
average time is O(n) | |
best case = sub-linear | |
*/ | |
use List; | |
var numchar = 256 : int; | |
var res:list(byteIndex); | |
var rlow:byteIndex; | |
var rhigh:byteIndex; | |
//handle region | |
if(!(region.hasLowBound())) then rlow = 1; else rlow=region.low; | |
if(!(region.hasHighBound())) then rhigh = this.len; else rhigh=region.high; | |
var nlen = needle.len; | |
//handle corner cases | |
if((nlen>(rhigh-rlow+1)) || (nlen==0) || (this.len==0)){ | |
var x = res.toArray(); | |
var finres:[0..res.size-1] int; | |
finres = x.reindex(0..#x.size):int; | |
return finres; | |
} | |
//preprocess the needle | |
//preprocess BoyleMoore bad character shift. | |
//preBmBs(needle ,rlow, rhigh, bmbc); | |
// boyle moore bad character array to store the preprocess of the input needle | |
var bmbc:[1..numchar] int; | |
for i in 1..numchar | |
{ | |
bmbc[i]=nlen; | |
} | |
for i in 1..(nlen-1) | |
{ | |
bmbc[needle[i]]=nlen-i+1; | |
} | |
//find all occurrences using BMH search | |
//var i = rlow; | |
var j = rlow; | |
while(j<=rhigh-nlen){ | |
var c = this[j+nlen-1] : byteIndex; | |
if(needle[nlen]==c && !(needle == this[j..j+nlen])){ | |
res.append(j); | |
} | |
j += bmbc[c]; | |
} | |
var x = res.toArray(); | |
var finres: [0..res.size-1] int; | |
finres = x.reindex(0..#x.size):int; | |
return finres; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment