Skip to content

Instantly share code, notes, and snippets.

@makenowjust
Created April 7, 2024 18:14
Show Gist options
  • Save makenowjust/0651fe23a694a22e3de92df4f2cbdd68 to your computer and use it in GitHub Desktop.
Save makenowjust/0651fe23a694a22e3de92df4f2cbdd68 to your computer and use it in GitHub Desktop.
// An implementation of Glushkov's construction in Scala.
//
// See https://en.wikipedia.org/wiki/Glushkov%27s_construction_algorithm.
/** Nfa represents an non-deterministic finite state automaton.
*
* @tparam A
* an alphabet type
* @tparam Q
* a state type
*
* @param init
* an initial state
* @param acceptSet
* a set of accepting states
* @param transition
* a transition mapping
*/
final case class Nfa[A, Q](
init: Q,
acceptSet: Set[Q],
transition: Map[(Q, A), Set[Q]]
)
/** Indexed is a pair of a value and an index.
*
* @param value
* a value
* @param index
* an index
*/
final case class Indexed[A](value: A, index: Int)
/** Re represents a regular expression.
*
* @tparam A
* an alphabet type
*/
enum Re[A]:
/** Lit is a literal.
*
* @param value
* a literal value
*/
case Lit(value: A)
/** Cat is a concatenetion of regular expressions.
*
* @param left
* a left regular expression
* @param right
* a right regular expression
*/
case Cat(left: Re[A], right: Re[A])
/** Alt is a alternation of regular expressions.
*
* @param left
* a left regular expression
* @param right
* a right regular expression
*/
case Alt(left: Re[A], right: Re[A])
/** Star is a Kleene repetition of a regular expression.
*
* @param re
* a regular expression
*/
case Star(re: Re[A])
/** Returns a new regular expression replaced literals with indexed ones.
*
* @return
* the linearized regular expression
*/
def linearize: Re[Indexed[A]] =
var index = 0
def loop(node: Re[A]): Re[Indexed[A]] = node match
case Lit(value) =>
index += 1
Lit(Indexed(value, index))
case Cat(left, right) =>
Cat(loop(left), loop(right))
case Alt(left, right) =>
Alt(loop(left), loop(right))
case Star(re) =>
Star(loop(re))
loop(this)
/** Checks whether this contains the epsilon string.
*
* @return
* true if this contains the epsilon string, or false otherwise
*/
def containsEps: Boolean = this match
case Lit(value) => false
case Cat(left, right) => left.containsEps && right.containsEps
case Alt(left, right) => left.containsEps || right.containsEps
case Star(node) => true
/** Returns the set containing the first literals.
*
* @return
* the set containing the first literals
*/
def firstSet: Set[A] = this match
case Lit(value) => Set(value)
case Cat(left, right) =>
if left.containsEps then left.firstSet ++ right.firstSet
else left.firstSet
case Alt(left, right) => left.firstSet ++ right.firstSet
case Star(re) => re.firstSet
/** Returns the set containing the last literals.
*
* @return
* the set containing the last literals
*/
def lastSet: Set[A] = this match
case Lit(value) => Set(value)
case Cat(left, right) =>
if right.containsEps then left.lastSet ++ right.lastSet
else right.lastSet
case Alt(left, right) => left.lastSet ++ right.lastSet
case Star(re) => re.lastSet
/** Returns the set containing the 2-length factors.
*
* @return
* the set containing the 2-length factors
*/
def factorSet: Set[(A, A)] = this match
case Lit(_) => Set.empty
case Cat(left, right) =>
left.factorSet ++
right.factorSet ++
left.lastSet.flatMap(a => right.firstSet.map(b => (a, b)))
case Alt(left, right) => left.factorSet ++ right.factorSet
case Star(re) =>
re.factorSet ++
re.lastSet.flatMap(a => re.firstSet.map(b => (a, b)))
/** Returns the NFA corresponding to this by using Glushkov's construction.
*
* @return
* the NFA corresponding to this
*/
def toNfa: Nfa[A, Option[Indexed[A]]] =
type Q = Option[Indexed[A]]
val linearized = linearize
val init: Q = None
val acceptSet: Set[Q] =
linearized.lastSet
.map(Some(_)) ++ Option.when(linearized.containsEps)(None).toSet
val transitionEdges: Set[(Q, A, Q)] =
linearized.firstSet.map(q => (None, q.value, Some(q))) ++
linearized.factorSet.map((p, q) => (Some(p), q.value, Some(q)))
val transition =
transitionEdges.groupMap((p, a, _) => (p, a))((_, _, q) => q)
Nfa(init, acceptSet, transition)
@main
def main(): Unit =
val r = Re.Alt(
Re.Star(Re.Cat(Re.Lit('a'), Re.Star(Re.Cat(Re.Lit('a'), Re.Lit('b'))))),
Re.Star(Re.Cat(Re.Lit('b'), Re.Lit('a')))
)
println(r.toNfa)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment