Created
November 30, 2023 08:55
-
-
Save maciekmm/7877ca04371ab04839525d9c4e2610eb to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package rere | |
import ( | |
"errors" | |
"fmt" | |
"regexp/syntax" | |
"strings" | |
"golang.org/x/exp/rand" | |
) | |
var ran = rand.New(rand.NewSource(0)) | |
func Generate(expr string, max int) (string, error) { | |
reg, err := syntax.Parse(expr, syntax.Perl) | |
if err != nil { | |
return "", err | |
} | |
reg = reg.Simplify() | |
return generate(reg, max) | |
} | |
func generate(reg *syntax.Regexp, max int) (string, error) { | |
switch reg.Op { | |
case syntax.OpBeginLine, syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText, syntax.OpNoWordBoundary: | |
return "", nil | |
case syntax.OpWordBoundary: | |
return " ", nil | |
case syntax.OpEmptyMatch: // matches empty string | |
return "", nil | |
case syntax.OpLiteral: // matches Runes sequence | |
return string(reg.Rune), nil | |
case syntax.OpCharClass: // matches Runes interpreted as range pair list | |
from, to := reg.Rune[0], reg.Rune[1] | |
return string(runeBetween(from, to)), nil | |
case syntax.OpAnyCharNotNL, syntax.OpAnyChar: // matches any character except newline | |
return string(runeBetween('a', 'z')), nil | |
case syntax.OpCapture: | |
return generate(reg.Sub[0], max) | |
case syntax.OpStar: // matches 0 or more instances of subexp | |
return repeatSub(reg.Sub[0], 0, max) | |
case syntax.OpPlus: // matches 1 or more instances of subexp | |
return repeatSub(reg.Sub[0], 1, max) | |
case syntax.OpQuest: // matches 0 or 1 instances of subexp | |
return repeatSub(reg.Sub[0], 0, 1) | |
case syntax.OpRepeat: // matches min to max instances of subexp | |
maxVal := reg.Max | |
if maxVal == -1 { | |
maxVal = max | |
} | |
return repeatSub(reg.Sub[0], reg.Min, maxVal) | |
case syntax.OpConcat: // matches concatenation of subexps | |
var builder strings.Builder | |
for _, sub := range reg.Sub { | |
gen, err := generate(sub, max) | |
if err != nil { | |
return "", err | |
} | |
builder.WriteString(gen) | |
} | |
return builder.String(), nil | |
case syntax.OpAlternate: // matches alternation of subexps | |
sub := reg.Sub[ran.Intn(len(reg.Sub))] | |
return generate(sub, max) | |
default: | |
fmt.Printf("op: %s, Rune: %s", reg.Op, string(reg.Rune)) | |
return "", errors.New("not implemented") | |
} | |
} | |
func repeatSub(reg *syntax.Regexp, min, max int) (string, error) { | |
ret, err := generate(reg, max) | |
if err != nil { | |
return "", err | |
} | |
howMany := min + rand.Intn(max-min+1) | |
return strings.Repeat(ret, howMany), nil | |
} | |
func runeBetween(from, to rune) rune { | |
return rune(int32(from) + ran.Int31n(int32(to-from+1))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment