Created
April 7, 2024 18:14
-
-
Save makenowjust/0651fe23a694a22e3de92df4f2cbdd68 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// An implementation of Glushkov's construction in Scala. | |
// | |
// See https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm. | |
/** Nfa represents an non-deterministic finite state automaton. | |
* | |
* @tparam A | |
* an alphabet type | |
* @tparam Q | |
* a state type | |
* | |
* @param init | |
* an initial state | |
* @param acceptSet | |
* a set of accepting states | |
* @param transition | |
* a transition mapping | |
*/ | |
final case class Nfa[A, Q]( | |
init: Q, | |
acceptSet: Set[Q], | |
transition: Map[(Q, A), Set[Q]] | |
) | |
/** Indexed is a pair of a value and an index. | |
* | |
* @param value | |
* a value | |
* @param index | |
* an index | |
*/ | |
final case class Indexed[A](value: A, index: Int) | |
/** Re represents a regular expression. | |
* | |
* @tparam A | |
* an alphabet type | |
*/ | |
enum Re[A]: | |
/** Lit is a literal. | |
* | |
* @param value | |
* a literal value | |
*/ | |
case Lit(value: A) | |
/** Cat is a concatenetion of regular expressions. | |
* | |
* @param left | |
* a left regular expression | |
* @param right | |
* a right regular expression | |
*/ | |
case Cat(left: Re[A], right: Re[A]) | |
/** Alt is a alternation of regular expressions. | |
* | |
* @param left | |
* a left regular expression | |
* @param right | |
* a right regular expression | |
*/ | |
case Alt(left: Re[A], right: Re[A]) | |
/** Star is a Kleene repetition of a regular expression. | |
* | |
* @param re | |
* a regular expression | |
*/ | |
case Star(re: Re[A]) | |
/** Returns a new regular expression replaced literals with indexed ones. | |
* | |
* @return | |
* the linearized regular expression | |
*/ | |
def linearize: Re[Indexed[A]] = | |
var index = 0 | |
def loop(node: Re[A]): Re[Indexed[A]] = node match | |
case Lit(value) => | |
index += 1 | |
Lit(Indexed(value, index)) | |
case Cat(left, right) => | |
Cat(loop(left), loop(right)) | |
case Alt(left, right) => | |
Alt(loop(left), loop(right)) | |
case Star(re) => | |
Star(loop(re)) | |
loop(this) | |
/** Checks whether this contains the epsilon string. | |
* | |
* @return | |
* true if this contains the epsilon string, or false otherwise | |
*/ | |
def containsEps: Boolean = this match | |
case Lit(value) => false | |
case Cat(left, right) => left.containsEps && right.containsEps | |
case Alt(left, right) => left.containsEps || right.containsEps | |
case Star(node) => true | |
/** Returns the set containing the first literals. | |
* | |
* @return | |
* the set containing the first literals | |
*/ | |
def firstSet: Set[A] = this match | |
case Lit(value) => Set(value) | |
case Cat(left, right) => | |
if left.containsEps then left.firstSet ++ right.firstSet | |
else left.firstSet | |
case Alt(left, right) => left.firstSet ++ right.firstSet | |
case Star(re) => re.firstSet | |
/** Returns the set containing the last literals. | |
* | |
* @return | |
* the set containing the last literals | |
*/ | |
def lastSet: Set[A] = this match | |
case Lit(value) => Set(value) | |
case Cat(left, right) => | |
if right.containsEps then left.lastSet ++ right.lastSet | |
else right.lastSet | |
case Alt(left, right) => left.lastSet ++ right.lastSet | |
case Star(re) => re.lastSet | |
/** Returns the set containing the 2-length factors. | |
* | |
* @return | |
* the set containing the 2-length factors | |
*/ | |
def factorSet: Set[(A, A)] = this match | |
case Lit(_) => Set.empty | |
case Cat(left, right) => | |
left.factorSet ++ | |
right.factorSet ++ | |
left.lastSet.flatMap(a => right.firstSet.map(b => (a, b))) | |
case Alt(left, right) => left.factorSet ++ right.factorSet | |
case Star(re) => | |
re.factorSet ++ | |
re.lastSet.flatMap(a => re.firstSet.map(b => (a, b))) | |
/** Returns the NFA corresponding to this by using Glushkov's construction. | |
* | |
* @return | |
* the NFA corresponding to this | |
*/ | |
def toNfa: Nfa[A, Option[Indexed[A]]] = | |
type Q = Option[Indexed[A]] | |
val linearized = linearize | |
val init: Q = None | |
val acceptSet: Set[Q] = | |
linearized.lastSet | |
.map(Some(_)) ++ Option.when(linearized.containsEps)(None).toSet | |
val transitionEdges: Set[(Q, A, Q)] = | |
linearized.firstSet.map(q => (None, q.value, Some(q))) ++ | |
linearized.factorSet.map((p, q) => (Some(p), q.value, Some(q))) | |
val transition = | |
transitionEdges.groupMap((p, a, _) => (p, a))((_, _, q) => q) | |
Nfa(init, acceptSet, transition) | |
@main | |
def main(): Unit = | |
val r = Re.Alt( | |
Re.Star(Re.Cat(Re.Lit('a'), Re.Star(Re.Cat(Re.Lit('a'), Re.Lit('b'))))), | |
Re.Star(Re.Cat(Re.Lit('b'), Re.Lit('a'))) | |
) | |
println(r.toNfa) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment