Created
May 19, 2020 00:30
Revisions
-
bored-engineer created this gist
May 19, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,234 @@ package main import ( "os" "fmt" "strings" "regexp/syntax" "unicode/utf8" ) // Maximum number of repeats if unterminated (ex: *, +) var MaxRepeats = 10 // syntax.OpNoMatch func opNoMatch(pattern *syntax.Regexp) []string { return nil } // syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText func opEmpty(pattern *syntax.Regexp) []string { return []string{""} } // syntax.OpLiteral func opLiteral(pattern *syntax.Regexp) []string { // Literal is returned as-is, just 1 value return []string{string(pattern.Rune)} } // syntax.OpCharClass func opCharClass(pattern *syntax.Regexp) []string { // pattern.Rune is already de-dupped var lits []string for idx := 0; idx < len(pattern.Rune); idx += 2 { for r := pattern.Rune[idx]; r <= pattern.Rune[idx+1]; r++ { lits = append(lits, string(r)) } } return lits } // syntax.OpAnyCharNotNL func opAnyCharNotNL(pattern *syntax.Regexp) []string { lits := make([]string, utf8.MaxRune - 1) idx := 0 for r := 0; r < utf8.MaxRune; idx++ { if rune(r) != rune('\n') { lits[idx] = string(rune(idx)) idx++ } } return lits } // syntax.OpAnyChar func opAnyChar(pattern *syntax.Regexp) []string { lits := make([]string, utf8.MaxRune) for idx := 0; idx < int(utf8.MaxRune); idx++ { lits[idx] = string(rune(idx)) } return lits } // syntax.OpCapture func opCapture(pattern *syntax.Regexp) ([]string, error) { return expand(pattern.Sub[0]) } // syntax.OpStar func opStar(pattern *syntax.Regexp) ([]string, error) { pattern.Min = 0 pattern.Max = MaxRepeats return opRepeat(pattern) } // syntax.OpPlus func opPlus(pattern *syntax.Regexp) ([]string, error) { pattern.Min = 1 pattern.Max = MaxRepeats return opRepeat(pattern) } // syntax.OpQuest func opQuest(pattern *syntax.Regexp) ([]string, error) { lits, err := expand(pattern.Sub[0]) if err != nil { return nil, err } return append(lits, ""), nil } // syntax.OpRepeat func opRepeat(pattern *syntax.Regexp) ([]string, error) { lits, err := expand(pattern.Sub[0]) if err != nil { return nil, err } if pattern.Max == -1 { pattern.Max = MaxRepeats } rlits := make([]string, len(lits) * (pattern.Max - pattern.Min)) idx := 0 for repeat := pattern.Min; repeat < pattern.Max; repeat++ { if repeat == 0 { rlits[idx] = "" idx++ rlits = rlits[:len(rlits) - len(lits) + 1] continue } for _, lit := range lits { rlits[idx] = strings.Repeat(lit, repeat) idx++ } } return rlits, nil } // syntax.OpConcat func opConcat(pattern *syntax.Regexp) ([]string, error) { // Ensure unique values uniq := make(map[string]struct{}) // recursively concat var recur func(prefixes []string, patterns []*syntax.Regexp) error recur = func(prefixes []string, patterns []*syntax.Regexp) error { lits, err := expand(patterns[0]) if err != nil { return err } swap := make([]string, len(lits) * len(prefixes)) idx := 0 for _, prefix := range prefixes { for _, lit := range lits { swap[idx] = prefix + lit idx++ } } if len(patterns) <= 1 { for _, lit := range swap { uniq[lit] = struct{}{} } return nil } else { return recur(swap, patterns[1:]) } } if err := recur([]string{""}, pattern.Sub); err != nil { return nil, err } lits := make([]string, len(uniq)) idx := 0 for lit := range uniq { lits[idx] = lit idx++ } return lits, nil } // syntax.OpAlternate func opAlternate(pattern *syntax.Regexp) ([]string, error) { // Ensure unique values uniq := make(map[string]struct{}) for _, sub := range pattern.Sub { // Each alternate stands on it's own lits, err := expand(sub) if err != nil { return nil, err } for _, lit := range lits { uniq[lit] = struct{}{} } } lits := make([]string, len(uniq)) idx := 0 for lit := range uniq { lits[idx] = lit idx++ } return lits, nil } // expand recursively expands the possible values for a regular expression func expand(pattern *syntax.Regexp) ([]string, error) { switch pattern.Op { case syntax.OpNoMatch: return opNoMatch(pattern), nil case syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText: return opEmpty(pattern), nil case syntax.OpLiteral: return opLiteral(pattern), nil case syntax.OpCharClass: return opCharClass(pattern), nil case syntax.OpAnyCharNotNL: return opAnyCharNotNL(pattern), nil case syntax.OpAnyChar: return opAnyChar(pattern), nil case syntax.OpCapture: return opCapture(pattern) case syntax.OpStar: return opStar(pattern) case syntax.OpPlus: return opPlus(pattern) case syntax.OpQuest: return opQuest(pattern) case syntax.OpRepeat: return opRepeat(pattern) case syntax.OpConcat: return opConcat(pattern) case syntax.OpAlternate: return opAlternate(pattern) default: return nil, fmt.Errorf("unhandled pattern operation %s", pattern.Op) } } // Entry Point func main() { if len(os.Args) < 2 { fmt.Printf("usage: %s <regexp>", os.Args[0]) } exp, err := syntax.Parse(os.Args[1], syntax.POSIX) if err != nil { fmt.Fprintf(os.Stderr, "failed to parse regexp: %s", err) os.Exit(1) } // Operate on the simplified expression as it's faster matches, err := expand(exp.Simplify()) if err != nil { fmt.Fprintf(os.Stderr, "failed to expand regexp: %s", err) os.Exit(1) } for _, match := range matches { fmt.Println(match) } }