Last active
July 11, 2024 20:27
-
-
Save NathanBaulch/cc26755dc89685fa209bf958e484c60d to your computer and use it in GitHub Desktop.
Optimized solution to APS #038 unique words with unique letters
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package main | |
import ( | |
"bufio" | |
"fmt" | |
"io" | |
"math" | |
"os" | |
"runtime" | |
"sort" | |
"sync" | |
"time" | |
) | |
func main() { | |
var words [][]byte | |
for _, path := range os.Args[1:] { | |
if file, err := os.Open(path); err != nil { | |
panic(err) | |
} else if b, err := io.ReadAll(file); err != nil { | |
panic(err) | |
} else { | |
_ = file.Close() | |
for i := 0; i < len(b); { | |
if advance, token, err := bufio.ScanLines(b[i:], true); err != nil { | |
panic(err) | |
} else { | |
words = append(words, token) | |
i += advance | |
} | |
} | |
} | |
} | |
println(fmt.Sprint(len(words)), "starting words") | |
now := time.Now() | |
for i := 0; i < len(words); i++ { | |
if len(words[i]) != 5 { | |
words[i] = words[len(words)-1] | |
words = words[:len(words)-1] | |
i-- | |
} | |
} | |
println(fmt.Sprint(len(words)), "5-letter words") | |
for i := 0; i < len(words); i++ { | |
for j := 0; j < 4; j++ { | |
if bytesContains(words[i][j+1:], words[i][j]) { | |
words[i] = words[len(words)-1] | |
words = words[:len(words)-1] | |
i-- | |
break | |
} | |
} | |
} | |
println(fmt.Sprint(len(words)), "words with unique characters") | |
// special ordering to quickly fail words with common letters | |
var common []byte | |
remaining := make([][]byte, len(words)) | |
copy(remaining, words) | |
for len(remaining) > 0 { | |
stats := make([]int, 26) | |
for _, w := range remaining { | |
for _, c := range w { | |
stats[c-'a']++ | |
} | |
} | |
var maxS int | |
var maxC byte | |
for c, s := range stats { | |
if s > maxS { | |
maxS = s | |
maxC = byte(c) | |
} | |
} | |
common = append(common, maxC+'a') | |
for i := 0; i < len(remaining); i++ { | |
if bytesContains(remaining[i], maxC+'a') { | |
remaining[i] = remaining[len(remaining)-1] | |
remaining = remaining[:len(remaining)-1] | |
i-- | |
} | |
} | |
} | |
sort.Slice(words, func(i, j int) bool { | |
for _, c := range common { | |
if bytesContains(words[i], c) { | |
if bytesContains(words[j], c) { | |
break | |
} | |
return true | |
} else if bytesContains(words[j], c) { | |
return false | |
} | |
} | |
return string(words[i]) < string(words[j]) | |
}) | |
anagrams := map[int][][]byte{} | |
for i := 0; i < len(words); i++ { | |
for j := i + 1; j < len(words)-1; j++ { | |
match := true | |
for _, c := range words[i] { | |
if match = bytesContains(words[j], c); !match { | |
break | |
} | |
} | |
if match { | |
anagrams[i] = append(anagrams[i], words[j]) | |
words = append(words[:j], words[j+1:]...) | |
j-- | |
} | |
} | |
} | |
println(fmt.Sprint(len(words)), "words excluding anagrams") | |
idx := make([]int32, len(words)) | |
for i, word := range words { | |
for _, c := range word { | |
idx[i] |= 1 << (c - 'a') | |
} | |
} | |
// utilize all available CPU cores | |
workers := runtime.GOMAXPROCS(0) | |
var wg sync.WaitGroup | |
wg.Add(workers) | |
results := make([][][]int, workers) | |
ch := make(chan int) | |
for i := 0; i < workers; i++ { | |
ii := i | |
go func() { | |
defer wg.Done() | |
for i0 := range ch { | |
for i1 := i0 + 1; i1 < len(words); i1++ { | |
if idx[i0]&idx[i1] == 0 { | |
x := idx[i0] | idx[i1] | |
for i2 := i1 + 1; i2 < len(words); i2++ { | |
if x&idx[i2] == 0 { | |
x := x | idx[i2] | |
for i3 := i2 + 1; i3 < len(words); i3++ { | |
if x&idx[i3] == 0 { | |
x := x | idx[i3] | |
for i4 := i3 + 1; i4 < len(words); i4++ { | |
if x&idx[i4] == 0 { | |
results[ii] = append(results[ii], []int{i0, i1, i2, i3, i4}) | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
} | |
}() | |
} | |
// distribute workers across workload for more accurate progress output | |
chunk := int(math.Ceil(float64(len(words)) / float64(workers))) | |
for i := 0; i < chunk; i++ { | |
for j := i; j < len(words); j += chunk { | |
ch <- j | |
} | |
} | |
close(ch) | |
wg.Wait() | |
// expand anagrams | |
var solns [][][]byte | |
for _, r := range results { | |
for _, s := range r { | |
from := len(solns) | |
soln := make([][]byte, 5) | |
for i, w := range s { | |
soln[i] = words[w] | |
} | |
solns = append(solns, soln) | |
to := len(solns) | |
for i, w := range s { | |
if a, ok := anagrams[w]; ok { | |
for j := range a { | |
for _, s := range solns[from:to] { | |
soln = make([][]byte, 5) | |
copy(soln, s) | |
soln[i] = a[j] | |
solns = append(solns, soln) | |
} | |
} | |
} | |
to = len(solns) | |
} | |
} | |
} | |
println(fmt.Sprint(len(solns)), "solutions in ", time.Now().Sub(now).String()) | |
for _, s := range solns { | |
for _, w := range s { | |
print(string(w) + " ") | |
} | |
println() | |
} | |
/* | |
> go run . 'f:\words_alpha.txt' | |
370099 starting words | |
15918 5-letter words | |
10173 words with unique characters | |
5976 words excluding anagrams | |
831 solutions in 15.7091592s | |
... | |
*/ | |
} | |
func bytesContains(s []byte, v byte) bool { | |
for _, vs := range s { | |
if v == vs { | |
return true | |
} | |
} | |
return false | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This gist does not seem to have a license declared in an obvious way, or at all. A license should be added (as this project seems to be intended as open source) so that users can run, modify, study, distribute, and use the code.
https://choosealicense.com/no-permission/