Last active
June 9, 2016 20:17
-
-
Save julienschmidt/7178960 to your computer and use it in GitHub Desktop.
Find all sub-sequence permutations of a given word in a dictionary
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bytes" | |
"fmt" | |
"io/ioutil" | |
"strings" | |
) | |
var index64 = [64]int{ | |
0, 1, 48, 2, 57, 49, 28, 3, | |
61, 58, 50, 42, 38, 29, 17, 4, | |
62, 55, 59, 36, 53, 51, 43, 22, | |
45, 39, 33, 30, 24, 18, 12, 5, | |
63, 47, 56, 27, 60, 41, 37, 16, | |
54, 35, 52, 21, 44, 32, 23, 11, | |
46, 26, 40, 15, 34, 20, 31, 10, | |
25, 14, 19, 9, 13, 8, 7, 6, | |
} | |
func readDict(path string, word string) (map[string]bool, error) { | |
buf, err := ioutil.ReadFile(path) | |
if err != nil { | |
return nil, err | |
} | |
// Either just use this: | |
// (much shorter but also slower) | |
/* | |
dict := make(map[string]bool) | |
b := bytes.Split(buf, []byte("\n")) | |
for i := range b { | |
dict[string(bytes.ToLower(b[i]))] = true | |
} | |
return dict, nil | |
*/ | |
// Or the following: | |
var first [256]bool | |
for i := 0; i < len(word); i++ { | |
first[word[i]] = true | |
} | |
for i, upper := 0, strings.ToUpper(word); i < len(word); i++ { | |
first[upper[i]] = true | |
} | |
indexBitmap := make([]uint64, (len(buf)+63)/64) | |
count := 0 | |
for i := 0; i < len(buf); i++ { | |
// Find the next line break | |
pos := bytes.IndexByte(buf[i:], '\n') | |
if pos <= len(word) { | |
if pos < 0 { | |
break | |
} | |
if first[buf[i]] { | |
count++ | |
} | |
} | |
i += pos | |
// Save pos in bitmap | |
indexBitmap[i/64] |= 1 << (uint(i) % 64) | |
} | |
dict := make(map[string]bool, count) | |
offset := 0 | |
for i, bm := range indexBitmap { | |
for bm > 0 { | |
// Get the least significant set bit (=1) | |
lsb := bm & -bm | |
// Remove the LSB from the bitmap | |
bm ^= lsb | |
// Get index from LSB with 64bit DeBruijn multiplication table | |
// This could be done even faster using Assembly: | |
// http://en.wikipedia.org/wiki/Find_first_set | |
index := index64[(lsb*0x03f79d71b4cb0a89)>>58] + i*64 | |
if index - offset <= len(word) && first[buf[offset]] { | |
dict[string(bytes.ToLower(buf[offset:index]))] = true | |
} | |
offset = index + 1 | |
} | |
} | |
return dict, nil | |
} | |
func permSubseq(dict map[string]bool, prefix, str string) (p []string) { | |
var charPermuted [256]bool | |
for i := 0; i < len(str); i++ { | |
if !charPermuted[str[i]] { | |
nStr := prefix + str[i:i+1] | |
if dict[nStr] { | |
p = append(p, nStr) | |
} | |
if len(str) > 1 { | |
p = append(p, permSubseq(dict, nStr, str[:i]+str[i+1:])...) | |
} | |
charPermuted[str[i]] = true | |
} | |
} | |
return p | |
} | |
func main() { | |
str := "racecardss" | |
dict, err := readDict("/usr/share/dict/words", str) | |
if err != nil { | |
return | |
} | |
p := permSubseq(dict, "", str) | |
fmt.Println(p) | |
fmt.Println(len(p)) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
58 index := index64[(lsb_0x03f79d71b4cb0a89)>>58] + i_64
This is brilliant. though I am wondering where does those magic numbers (0x03f79d71b4cb0a89, 58) come from?