Skip to content

Instantly share code, notes, and snippets.

@bored-engineer
Created May 19, 2020 00:30

Revisions

  1. bored-engineer created this gist May 19, 2020.
    234 changes: 234 additions & 0 deletions exrex.go
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,234 @@
    package main

    import (
    "os"
    "fmt"
    "strings"
    "regexp/syntax"
    "unicode/utf8"
    )

    // Maximum number of repeats if unterminated (ex: *, +)
    var MaxRepeats = 10

    // syntax.OpNoMatch
    func opNoMatch(pattern *syntax.Regexp) []string {
    return nil
    }

    // syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText
    func opEmpty(pattern *syntax.Regexp) []string {
    return []string{""}
    }

    // syntax.OpLiteral
    func opLiteral(pattern *syntax.Regexp) []string {
    // Literal is returned as-is, just 1 value
    return []string{string(pattern.Rune)}
    }

    // syntax.OpCharClass
    func opCharClass(pattern *syntax.Regexp) []string {
    // pattern.Rune is already de-dupped
    var lits []string
    for idx := 0; idx < len(pattern.Rune); idx += 2 {
    for r := pattern.Rune[idx]; r <= pattern.Rune[idx+1]; r++ {
    lits = append(lits, string(r))
    }
    }
    return lits
    }

    // syntax.OpAnyCharNotNL
    func opAnyCharNotNL(pattern *syntax.Regexp) []string {
    lits := make([]string, utf8.MaxRune - 1)
    idx := 0
    for r := 0; r < utf8.MaxRune; idx++ {
    if rune(r) != rune('\n') {
    lits[idx] = string(rune(idx))
    idx++
    }
    }
    return lits
    }

    // syntax.OpAnyChar
    func opAnyChar(pattern *syntax.Regexp) []string {
    lits := make([]string, utf8.MaxRune)
    for idx := 0; idx < int(utf8.MaxRune); idx++ {
    lits[idx] = string(rune(idx))
    }
    return lits
    }

    // syntax.OpCapture
    func opCapture(pattern *syntax.Regexp) ([]string, error) {
    return expand(pattern.Sub[0])
    }

    // syntax.OpStar
    func opStar(pattern *syntax.Regexp) ([]string, error) {
    pattern.Min = 0
    pattern.Max = MaxRepeats
    return opRepeat(pattern)
    }

    // syntax.OpPlus
    func opPlus(pattern *syntax.Regexp) ([]string, error) {
    pattern.Min = 1
    pattern.Max = MaxRepeats
    return opRepeat(pattern)
    }

    // syntax.OpQuest
    func opQuest(pattern *syntax.Regexp) ([]string, error) {
    lits, err := expand(pattern.Sub[0])
    if err != nil {
    return nil, err
    }
    return append(lits, ""), nil
    }

    // syntax.OpRepeat
    func opRepeat(pattern *syntax.Regexp) ([]string, error) {
    lits, err := expand(pattern.Sub[0])
    if err != nil {
    return nil, err
    }
    if pattern.Max == -1 {
    pattern.Max = MaxRepeats
    }
    rlits := make([]string, len(lits) * (pattern.Max - pattern.Min))
    idx := 0
    for repeat := pattern.Min; repeat < pattern.Max; repeat++ {
    if repeat == 0 {
    rlits[idx] = ""
    idx++
    rlits = rlits[:len(rlits) - len(lits) + 1]
    continue
    }
    for _, lit := range lits {
    rlits[idx] = strings.Repeat(lit, repeat)
    idx++
    }
    }
    return rlits, nil
    }

    // syntax.OpConcat
    func opConcat(pattern *syntax.Regexp) ([]string, error) {
    // Ensure unique values
    uniq := make(map[string]struct{})
    // recursively concat
    var recur func(prefixes []string, patterns []*syntax.Regexp) error
    recur = func(prefixes []string, patterns []*syntax.Regexp) error {
    lits, err := expand(patterns[0])
    if err != nil {
    return err
    }
    swap := make([]string, len(lits) * len(prefixes))
    idx := 0
    for _, prefix := range prefixes {
    for _, lit := range lits {
    swap[idx] = prefix + lit
    idx++
    }
    }
    if len(patterns) <= 1 {
    for _, lit := range swap {
    uniq[lit] = struct{}{}
    }
    return nil
    } else {
    return recur(swap, patterns[1:])
    }
    }
    if err := recur([]string{""}, pattern.Sub); err != nil {
    return nil, err
    }
    lits := make([]string, len(uniq))
    idx := 0
    for lit := range uniq {
    lits[idx] = lit
    idx++
    }
    return lits, nil
    }

    // syntax.OpAlternate
    func opAlternate(pattern *syntax.Regexp) ([]string, error) {
    // Ensure unique values
    uniq := make(map[string]struct{})
    for _, sub := range pattern.Sub {
    // Each alternate stands on it's own
    lits, err := expand(sub)
    if err != nil {
    return nil, err
    }
    for _, lit := range lits {
    uniq[lit] = struct{}{}
    }
    }
    lits := make([]string, len(uniq))
    idx := 0
    for lit := range uniq {
    lits[idx] = lit
    idx++
    }
    return lits, nil
    }

    // expand recursively expands the possible values for a regular expression
    func expand(pattern *syntax.Regexp) ([]string, error) {
    switch pattern.Op {
    case syntax.OpNoMatch:
    return opNoMatch(pattern), nil
    case syntax.OpEmptyMatch, syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText:
    return opEmpty(pattern), nil
    case syntax.OpLiteral:
    return opLiteral(pattern), nil
    case syntax.OpCharClass:
    return opCharClass(pattern), nil
    case syntax.OpAnyCharNotNL:
    return opAnyCharNotNL(pattern), nil
    case syntax.OpAnyChar:
    return opAnyChar(pattern), nil
    case syntax.OpCapture:
    return opCapture(pattern)
    case syntax.OpStar:
    return opStar(pattern)
    case syntax.OpPlus:
    return opPlus(pattern)
    case syntax.OpQuest:
    return opQuest(pattern)
    case syntax.OpRepeat:
    return opRepeat(pattern)
    case syntax.OpConcat:
    return opConcat(pattern)
    case syntax.OpAlternate:
    return opAlternate(pattern)
    default:
    return nil, fmt.Errorf("unhandled pattern operation %s", pattern.Op)
    }
    }

    // Entry Point
    func main() {
    if len(os.Args) < 2 {
    fmt.Printf("usage: %s <regexp>", os.Args[0])
    }
    exp, err := syntax.Parse(os.Args[1], syntax.POSIX)
    if err != nil {
    fmt.Fprintf(os.Stderr, "failed to parse regexp: %s", err)
    os.Exit(1)
    }
    // Operate on the simplified expression as it's faster
    matches, err := expand(exp.Simplify())
    if err != nil {
    fmt.Fprintf(os.Stderr, "failed to expand regexp: %s", err)
    os.Exit(1)
    }
    for _, match := range matches {
    fmt.Println(match)
    }
    }