Skip to content

Instantly share code, notes, and snippets.

@matklad
Forked from axefrog/0.suffixtree.cs
Created May 7, 2012 05:34

Revisions

  1. matklad revised this gist May 7, 2012. 1 changed file with 590 additions and 471 deletions.
    1,061 changes: 590 additions & 471 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -6,482 +6,601 @@

    namespace SuffixTreeAlgorithm
    {
    public class SuffixTree
    {
    static void Main()
    public class SuffixTree
    {
    public SuffixTree(string word)
    {
    SuffixTree.Create("aabaaa");
    Word = word;
    RootNode = new Node(this);
    ActiveNode = RootNode;
    //Message = TreeUpdate;
    }

    public char? CanonizationChar { get; set; }
    public string Word { get; private set; }
    private int CurrentSuffixStartIndex { get; set; }
    private int CurrentSuffixEndIndex { get; set; }
    private Node LastCreatedNodeInCurrentIteration { get; set; }
    private int UnresolvedSuffixes { get; set; }
    public Node RootNode { get; private set; }
    private Node ActiveNode { get; set; }
    private Edge ActiveEdge { get; set; }
    private int DistanceIntoActiveEdge { get; set; }
    private char LastCharacterOfCurrentSuffix { get; set; }
    private int NextNodeNumber { get; set; }
    private int NextEdgeNumber { get; set; }

    public static void Main()
    {
    Test();
    Console.ReadKey();
    }

    private void TreeUpdate(string f, object[] args){
    public static void Test()
    {
    const int testCount = 1000;
    const int strLength = 20;
    var r = new Random();
    for (int testNo = 0; testNo < testCount; testNo++)
    {
    var sb = new StringBuilder();
    for (int i = 0; i < strLength; i++)
    {
    char c = r.Next(2) == 0 ? 'a' : 'b';
    sb.Append(c);
    }
    String s = sb.ToString();
    int st = CountSubstringsWithSuffixTree(s);
    int bf = CountSubstringsWithBruteForce(s);
    if (st != bf)
    {
    Console.WriteLine("Error");
    Console.WriteLine(s);
    Console.WriteLine(st);
    Console.WriteLine(bf);
    break;
    }
    }
    Console.WriteLine("Test ended");
    }

    public static int CountSubstringsWithSuffixTree(string s)
    {
    SuffixTree t = Create(s);
    return CountEdgesLength(t.RootNode) - s.Length - 1;
    }

    private static int CountEdgesLength(Node node)
    {
    int ans = 0;
    foreach (Edge edge in node.Edges.Values)
    {
    ans += edge.Length;
    if (edge.Tail != null)
    {
    ans += CountEdgesLength(edge.Tail);
    }
    }
    return ans;
    }

    public static int CountSubstringsWithBruteForce(string s)
    {
    var substrs = new HashSet<string>();
    for (int i = 0; i < s.Length; i++)
    {
    for (int l = 1; l <= s.Length - i; l++)
    {
    string subString = s.Substring(i, l);
    substrs.Add(subString);
    }
    }
    return substrs.Count();
    }

    private static void TreeUpdate(string f, object[] args)
    {
    Console.WriteLine(f, args);
    }
    public char? CanonizationChar { get; set; }
    public string Word { get; private set; }
    private int CurrentSuffixStartIndex { get; set; }
    private int CurrentSuffixEndIndex { get; set; }
    private Node LastCreatedNodeInCurrentIteration { get; set; }
    private int UnresolvedSuffixes { get; set; }
    public Node RootNode { get; private set; }
    private Node ActiveNode { get; set; }
    private Edge ActiveEdge { get; set; }
    private int DistanceIntoActiveEdge { get; set; }
    private char LastCharacterOfCurrentSuffix { get; set; }
    private int NextNodeNumber { get; set; }
    private int NextEdgeNumber { get; set; }

    public SuffixTree(string word)
    {
    Word = word;
    RootNode = new Node(this);
    ActiveNode = RootNode;
    Message = TreeUpdate;
    }

    public event Action<SuffixTree> Changed;
    private void TriggerChanged()
    {
    var handler = Changed;
    if(handler != null)
    handler(this);
    }

    public event Action<string, object[]> Message;
    private void SendMessage(string format, params object[] args)
    {
    var handler = Message;
    if(handler != null)
    handler(format, args);
    }

    public static SuffixTree Create(string word, char canonizationChar = '$')
    {
    var tree = new SuffixTree(word);
    tree.Build(canonizationChar);
    return tree;
    }

    public void Build(char canonizationChar)
    {
    var n = Word.IndexOf(Word[Word.Length - 1]);
    var mustCanonize = n < Word.Length - 1;
    if(mustCanonize)
    {
    CanonizationChar = canonizationChar;
    Word = string.Concat(Word, canonizationChar);
    }

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    SendMessage("=== ITERATION {0} ===", CurrentSuffixEndIndex);
    LastCreatedNodeInCurrentIteration = null;
    LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex];

    for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++)
    {
    var wasImplicitlyAdded = !AddNextSuffix();

    public event Action<SuffixTree> Changed;

    private void TriggerChanged()
    {
    Action<SuffixTree> handler = Changed;
    if (handler != null)
    handler(this);
    }

    public event Action<string, object[]> Message;

    private void SendMessage(string format, params object[] args)
    {
    Action<string, object[]> handler = Message;
    if (handler != null)
    handler(format, args);
    }

    public static SuffixTree Create(string word, char canonizationChar = '$')
    {
    var tree = new SuffixTree(word);
    tree.Build(canonizationChar);
    return tree;
    }

    public void Build(char canonizationChar)
    {
    CanonizationChar = canonizationChar;
    Word = string.Concat(Word, canonizationChar);
    for (CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    SendMessage("=== ITERATION {0} ===", CurrentSuffixEndIndex);
    LastCreatedNodeInCurrentIteration = null;
    LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex];

    for (CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes;
    CurrentSuffixStartIndex <= CurrentSuffixEndIndex;
    CurrentSuffixStartIndex++)
    {
    bool wasImplicitlyAdded = !AddNextSuffix();
    SendMessage("\n{0}", RenderTree());
    if(wasImplicitlyAdded)
    {
    UnresolvedSuffixes++;
    break;
    }
    if(UnresolvedSuffixes > 0)
    UnresolvedSuffixes--;
    }
    }
    }

    private bool AddNextSuffix()
    {
    var suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}");
    SendMessage("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex);
    SendMessage(" => ActiveNode: {0}", ActiveNode);
    SendMessage(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString());
    SendMessage(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes);
    if(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    throw new Exception("BOUNDARY EXCEEDED");

    if(ActiveEdge != null)
    return AddCurrentSuffixToActiveEdge();

    if(GetExistingEdgeAndSetAsActive())
    return false;

    ActiveNode.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();
    return true;
    }

    private bool GetExistingEdgeAndSetAsActive()
    {
    Edge edge;
    if(ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge))
    {
    SendMessage("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix);
    ActiveEdge = edge;
    DistanceIntoActiveEdge = 1;
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);

    return true;
    }
    SendMessage("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix);
    return false;
    }

    private bool AddCurrentSuffixToActiveEdge()
    {
    var nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge];
    if(nextCharacterOnEdge == LastCharacterOfCurrentSuffix)
    {
    SendMessage("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix);
    DistanceIntoActiveEdge++;
    TriggerChanged();

    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);

    return false;
    }

    SplitActiveEdge();
    ActiveEdge.Tail.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();

    return true;
    }

    private void UpdateActivePointAfterAddingNewEdge()
    {
    if(ReferenceEquals(ActiveNode, RootNode))
    {
    if(DistanceIntoActiveEdge > 0)
    {
    SendMessage("New edge has been added and the active node is root. The active edge will now be updated.");
    DistanceIntoActiveEdge--;
    SendMessage(" => DistanceIntoActiveEdge decremented to: {0}", DistanceIntoActiveEdge);
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1);
    }
    }
    else
    UpdateActivePointToLinkedNodeOrRoot();
    }

    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    {
    var walkDistance = 0;
    while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    SendMessage("Active point is at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail ?? RootNode;
    if(DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    {
    walkDistance += ActiveEdge.Length;
    var c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    TriggerChanged();
    }
    }

    private void SplitActiveEdge()
    {
    ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();
    if(LastCreatedNodeInCurrentIteration != null)
    {
    LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail;
    SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail);
    TriggerChanged();
    }
    LastCreatedNodeInCurrentIteration = ActiveEdge.Tail;
    }

    private void UpdateActivePointToLinkedNodeOrRoot()
    {
    SendMessage("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString());
    if(ActiveNode.LinkedNode != null)
    {
    ActiveNode = ActiveNode.LinkedNode;
    SendMessage(" => ActiveNode is now: {0}", ActiveNode);
    }
    else
    {
    ActiveNode = RootNode;
    SendMessage(" => ActiveNode is now ROOT", ActiveNode);
    }
    TriggerChanged();

    if(ActiveEdge != null)
    {
    var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];
    TriggerChanged();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }
    }

    public string RenderTree()
    {
    var writer = new StringWriter();
    RootNode.RenderTree(writer, "");
    return writer.ToString();
    }

    public string WriteDotGraph()
    {
    var sb = new StringBuilder();
    sb.AppendLine("digraph {");
    sb.AppendLine("rankdir = LR;");
    sb.AppendLine("edge [arrowsize=0.5,fontsize=11];");
    for(var i = 0; i < NextNodeNumber; i++)
    sb.AppendFormat("node{0} [label=\"{0}\",style=filled,fillcolor={1},shape=circle,width=.1,height=.1,fontsize=11,margin=0.01];",
    i, ActiveNode.NodeNumber == i ? "cyan" : "lightgrey").AppendLine();
    RootNode.WriteDotGraph(sb);
    sb.AppendLine("}");
    return sb.ToString();
    }

    public HashSet<string> ExtractAllSubstrings()
    {
    var set = new HashSet<string>();
    ExtractAllSubstrings("", set, RootNode);
    return set;
    }

    private void ExtractAllSubstrings(string str, HashSet<string> set, Node node)
    {
    foreach(var edge in node.Edges.Values)
    {
    var edgeStr = edge.StringWithoutCanonizationChar;
    var edgeLength = !edge.EndIndex.HasValue && CanonizationChar.HasValue ? edge.Length - 1 : edge.Length; // assume tailing canonization char
    for(var length = 1; length <= edgeLength; length++)
    set.Add(string.Concat(str, edgeStr.Substring(0, length)));
    if(edge.Tail != null)
    ExtractAllSubstrings(string.Concat(str, edge.StringWithoutCanonizationChar), set, edge.Tail);
    }
    }

    public List<string> ExtractSubstringsForIndexing(int? maxLength = null)
    {
    var list = new List<string>();
    ExtractSubstringsForIndexing("", list, maxLength ?? Word.Length, RootNode);
    return list;
    }

    private void ExtractSubstringsForIndexing(string str, List<string> list, int len, Node node)
    {
    foreach(var edge in node.Edges.Values)
    {
    var newstr = string.Concat(str, Word.Substring(edge.StartIndex, Math.Min(len, edge.Length)));
    if(len > edge.Length && edge.Tail != null)
    ExtractSubstringsForIndexing(newstr, list, len - edge.Length, edge.Tail);
    else
    list.Add(newstr);
    }
    }

    public class Edge
    {
    private readonly SuffixTree _tree;

    public Edge(SuffixTree tree, Node head)
    {
    _tree = tree;
    Head = head;
    StartIndex = tree.CurrentSuffixEndIndex;
    EdgeNumber = _tree.NextEdgeNumber++;
    }

    public Node Head { get; private set; }
    public Node Tail { get; private set; }
    public int StartIndex { get; private set; }
    public int? EndIndex { get; set; }
    public int EdgeNumber { get; private set; }
    public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } }

    public Edge SplitAtIndex(int index)
    {
    _tree.SendMessage("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]);
    var newEdge = new Edge(_tree, Head);
    var newNode = new Node(_tree);
    newEdge.Tail = newNode;
    newEdge.StartIndex = StartIndex;
    newEdge.EndIndex = index - 1;
    Head = newNode;
    StartIndex = index;
    newNode.Edges.Add(_tree.Word[StartIndex], this);
    newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge;
    _tree.SendMessage(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this);
    return newEdge;
    }

    public override string ToString()
    {
    return string.Concat(_tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1), "(",
    StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")");
    }

    public string StringWithoutCanonizationChar
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex - (_tree.CanonizationChar.HasValue ? 1 : 0)) - StartIndex + 1); }
    }

    public string String
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); }
    }

    public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    {
    var strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1);
    writer.Write(strEdge);
    if(Tail == null)
    writer.WriteLine();
    else
    {
    var line = new string(RenderChars.HorizontalLine, maxEdgeLength - strEdge.Length + 1);
    writer.Write(line);
    Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length)));
    }
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if(Tail == null)
    sb.AppendFormat("leaf{0} [label=\"\",shape=point]", EdgeNumber).AppendLine();
    string label, weight, color;
    if(_tree.ActiveEdge != null && ReferenceEquals(this, _tree.ActiveEdge))
    {
    if(_tree.ActiveEdge.Length == 0)
    label = "";
    else if(_tree.DistanceIntoActiveEdge > Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\"><B>" + String + "</B> (" + _tree.DistanceIntoActiveEdge + ")</FONT>>";
    else if(_tree.DistanceIntoActiveEdge == Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\">" + String + "</FONT>>";
    else if(_tree.DistanceIntoActiveEdge > 0)
    label = "<<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\"><TR><TD><FONT COLOR=\"blue\"><B>" + String.Substring(0, _tree.DistanceIntoActiveEdge) + "</B></FONT></TD><TD COLOR=\"black\">" + String.Substring(_tree.DistanceIntoActiveEdge) + "</TD></TR></TABLE>>";
    else
    label = "\"" + String + "\"";
    color = "blue";
    weight = "5";
    }
    else
    {
    label = "\"" + String + "\"";
    color = "black";
    weight = "3";
    }
    var tail = Tail == null ? "leaf" + EdgeNumber : "node" + Tail.NodeNumber;
    sb.AppendFormat("node{0} -> {1} [label={2},weight={3},color={4},size=11]", Head.NodeNumber, tail, label, weight, color).AppendLine();
    if(Tail != null)
    Tail.WriteDotGraph(sb);
    }
    }

    public class Node
    {
    private readonly SuffixTree _tree;

    public Node(SuffixTree tree)
    {
    _tree = tree;
    Edges = new Dictionary<char, Edge>();
    NodeNumber = _tree.NextNodeNumber++;
    }

    public Dictionary<char, Edge> Edges { get; private set; }
    public Node LinkedNode { get; set; }
    public int NodeNumber { get; private set; }

    public void AddNewEdge()
    {
    _tree.SendMessage("Adding new edge to {0}", this);
    var edge = new Edge(_tree, this);
    Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge);
    _tree.SendMessage(" => {0} --> {1}", this, edge);
    }

    public void RenderTree(TextWriter writer, string prefix)
    {
    var strNode = string.Concat("(", NodeNumber.ToString(new string('0', _tree.NextNodeNumber.ToString().Length)), ")");
    writer.Write(strNode);
    var edges = Edges.Select(kvp => kvp.Value).OrderBy(e => _tree.Word[e.StartIndex]).ToArray();
    if(edges.Any())
    {
    var prefixWithNodePadding = prefix + new string(' ', strNode.Length);
    var maxEdgeLength = edges.Max(e => (e.EndIndex ?? _tree.CurrentSuffixEndIndex) - e.StartIndex + 1);
    for(var i = 0; i < edges.Length; i++)
    {
    char connector, extender = ' ';
    if(i == 0)
    {
    if(edges.Length > 1)
    {
    connector = RenderChars.TJunctionDown;
    extender = RenderChars.VerticalLine;
    }
    else
    connector = RenderChars.HorizontalLine;
    }
    else
    {
    writer.Write(prefixWithNodePadding);
    if(i == edges.Length - 1)
    connector = RenderChars.CornerRight;
    else
    {
    connector = RenderChars.TJunctionRight;
    extender = RenderChars.VerticalLine;
    }
    }
    writer.Write(string.Concat(connector, RenderChars.HorizontalLine));
    var newPrefix = string.Concat(prefixWithNodePadding, extender, ' ');
    edges[i].RenderTree(writer, newPrefix, maxEdgeLength);
    }
    }
    }

    public override string ToString()
    {
    return string.Concat("node #", NodeNumber);
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if(LinkedNode != null)
    sb.AppendFormat("node{0} -> node{1} [label=\"\",weight=.01,style=dotted]", NodeNumber, LinkedNode.NodeNumber).AppendLine();
    foreach(var edge in Edges.Values)
    edge.WriteDotGraph(sb);
    }
    }

    public static class RenderChars
    {
    public const char TJunctionDown = '┬';
    public const char HorizontalLine = '─';
    public const char VerticalLine = '│';
    public const char TJunctionRight = '├';
    public const char CornerRight = '└';
    }
    }
    if (wasImplicitlyAdded)
    {
    UnresolvedSuffixes++;
    break;
    }
    if (UnresolvedSuffixes > 0)
    UnresolvedSuffixes--;
    }
    }
    }

    private bool AddNextSuffix()
    {
    string suffix =
    string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex),
    "{", Word[CurrentSuffixEndIndex], "}");
    SendMessage("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix,
    CurrentSuffixStartIndex, CurrentSuffixEndIndex);
    SendMessage(" => ActiveNode: {0}", ActiveNode);
    SendMessage(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString());
    SendMessage(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes);
    if (ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    throw new Exception("BOUNDARY EXCEEDED");

    if (ActiveEdge != null)
    return AddCurrentSuffixToActiveEdge();
    if (LastCreatedNodeInCurrentIteration != null)
    {
    LastCreatedNodeInCurrentIteration.LinkedNode = ActiveNode;
    SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveNode);
    TriggerChanged();
    }

    if (GetExistingEdgeAndSetAsActive())
    return false;

    ActiveNode.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();
    return true;
    }

    private bool GetExistingEdgeAndSetAsActive()
    {
    Edge edge;
    if (ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge))
    {
    SendMessage("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode,
    LastCharacterOfCurrentSuffix);
    ActiveEdge = edge;
    DistanceIntoActiveEdge = 1;
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);

    return true;
    }
    SendMessage("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix);
    return false;
    }

    private bool AddCurrentSuffixToActiveEdge()
    {
    char nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge];
    if (nextCharacterOnEdge == LastCharacterOfCurrentSuffix)
    {
    SendMessage("The next character on the current edge is '{0}' (suffix added implicitly)",
    LastCharacterOfCurrentSuffix);
    DistanceIntoActiveEdge++;
    TriggerChanged();

    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);
    return false;
    }

    SplitActiveEdge();
    ActiveEdge.Tail.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();

    return true;
    }

    private void UpdateActivePointAfterAddingNewEdge()
    {
    if (ReferenceEquals(ActiveNode, RootNode))
    {
    if (DistanceIntoActiveEdge > 0)
    {
    SendMessage(
    "New edge has been added and the active node is root. The active edge will now be updated.");
    DistanceIntoActiveEdge--;
    SendMessage(" => DistanceIntoActiveEdge decremented to: {0}", DistanceIntoActiveEdge);
    ActiveEdge = DistanceIntoActiveEdge == 0
    ? null
    : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1);
    }
    }
    else
    UpdateActivePointToLinkedNodeOrRoot();
    }

    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    {
    int walkDistance = 0;
    while (ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    SendMessage(
    "Active point is at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail ?? RootNode;
    if (DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    {
    walkDistance += ActiveEdge.Length;
    char c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    TriggerChanged();
    }
    }

    private void SplitActiveEdge()
    {
    ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();
    if (LastCreatedNodeInCurrentIteration != null)
    {
    LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail;
    SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail);
    TriggerChanged();
    }
    LastCreatedNodeInCurrentIteration = ActiveEdge.Tail;

    LastCreatedNodeInCurrentIteration = ActiveEdge.Tail;
    }



    private void UpdateActivePointToLinkedNodeOrRoot()
    {
    SendMessage("The linked node for active node {0} is {1}", ActiveNode,
    ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString());
    if (ActiveNode.LinkedNode != null)
    {
    ActiveNode = ActiveNode.LinkedNode;
    SendMessage(" => ActiveNode is now: {0}", ActiveNode);
    }
    else
    {
    ActiveNode = RootNode;
    SendMessage(" => ActiveNode is now ROOT", ActiveNode);
    }
    TriggerChanged();

    if (ActiveEdge != null)
    {
    int firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];
    TriggerChanged();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }
    }

    public string RenderTree()
    {
    var writer = new StringWriter();
    RootNode.RenderTree(writer, "");
    return writer.ToString();
    }

    public string WriteDotGraph()
    {
    var sb = new StringBuilder();
    sb.AppendLine("digraph {");
    sb.AppendLine("rankdir = LR;");
    sb.AppendLine("edge [arrowsize=0.5,fontsize=11];");
    for (int i = 0; i < NextNodeNumber; i++)
    sb.AppendFormat(
    "node{0} [label=\"{0}\",style=filled,fillcolor={1},shape=circle,width=.1,height=.1,fontsize=11,margin=0.01];",
    i, ActiveNode.NodeNumber == i ? "cyan" : "lightgrey").AppendLine();
    RootNode.WriteDotGraph(sb);
    sb.AppendLine("}");
    return sb.ToString();
    }

    public HashSet<string> ExtractAllSubstrings()
    {
    var set = new HashSet<string>();
    ExtractAllSubstrings("", set, RootNode);
    return set;
    }

    private void ExtractAllSubstrings(string str, HashSet<string> set, Node node)
    {
    foreach (Edge edge in node.Edges.Values)
    {
    string edgeStr = edge.StringWithoutCanonizationChar;
    int edgeLength = !edge.EndIndex.HasValue && CanonizationChar.HasValue ? edge.Length - 1 : edge.Length;
    // assume tailing canonization char
    for (int length = 1; length <= edgeLength; length++)
    set.Add(string.Concat(str, edgeStr.Substring(0, length)));
    if (edge.Tail != null)
    ExtractAllSubstrings(string.Concat(str, edge.StringWithoutCanonizationChar), set, edge.Tail);
    }
    }

    public List<string> ExtractSubstringsForIndexing(int? maxLength = null)
    {
    var list = new List<string>();
    ExtractSubstringsForIndexing("", list, maxLength ?? Word.Length, RootNode);
    return list;
    }

    private void ExtractSubstringsForIndexing(string str, List<string> list, int len, Node node)
    {
    foreach (Edge edge in node.Edges.Values)
    {
    string newstr = string.Concat(str, Word.Substring(edge.StartIndex, Math.Min(len, edge.Length)));
    if (len > edge.Length && edge.Tail != null)
    ExtractSubstringsForIndexing(newstr, list, len - edge.Length, edge.Tail);
    else
    list.Add(newstr);
    }
    }

    #region Nested type: Edge

    public class Edge
    {
    private readonly SuffixTree _tree;

    public Edge(SuffixTree tree, Node head)
    {
    _tree = tree;
    Head = head;
    StartIndex = tree.CurrentSuffixEndIndex;
    EdgeNumber = _tree.NextEdgeNumber++;
    }

    public Node Head { get; private set; }
    public Node Tail { get; private set; }
    public int StartIndex { get; private set; }
    public int? EndIndex { get; set; }
    public int EdgeNumber { get; private set; }

    public int Length
    {
    get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; }
    }

    public string StringWithoutCanonizationChar
    {
    get
    {
    return _tree.Word.Substring(StartIndex,
    (EndIndex ??
    _tree.CurrentSuffixEndIndex - (_tree.CanonizationChar.HasValue ? 1 : 0)) -
    StartIndex + 1);
    }
    }

    public string String
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); }
    }

    public Edge SplitAtIndex(int index)
    {
    _tree.SendMessage("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]);
    var newEdge = new Edge(_tree, Head);
    var newNode = new Node(_tree);
    newEdge.Tail = newNode;
    newEdge.StartIndex = StartIndex;
    newEdge.EndIndex = index - 1;
    Head = newNode;
    StartIndex = index;
    newNode.Edges.Add(_tree.Word[StartIndex], this);
    newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge;
    _tree.SendMessage(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode,
    this);
    return newEdge;
    }

    public override string ToString()
    {
    return
    string.Concat(
    _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1),
    "(",
    StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")");
    }

    public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    {
    string strEdge = _tree.Word.Substring(StartIndex,
    (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1);
    writer.Write(strEdge);
    if (Tail == null)
    writer.WriteLine();
    else
    {
    var line = new string(RenderChars.HorizontalLine, maxEdgeLength - strEdge.Length + 1);
    writer.Write(line);
    Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length)));
    }
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if (Tail == null)
    sb.AppendFormat("leaf{0} [label=\"\",shape=point]", EdgeNumber).AppendLine();
    string label, weight, color;
    if (_tree.ActiveEdge != null && ReferenceEquals(this, _tree.ActiveEdge))
    {
    if (_tree.ActiveEdge.Length == 0)
    label = "";
    else if (_tree.DistanceIntoActiveEdge > Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\"><B>" + String + "</B> (" +
    _tree.DistanceIntoActiveEdge + ")</FONT>>";
    else if (_tree.DistanceIntoActiveEdge == Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\">" + String + "</FONT>>";
    else if (_tree.DistanceIntoActiveEdge > 0)
    label =
    "<<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\"><TR><TD><FONT COLOR=\"blue\"><B>" +
    String.Substring(0, _tree.DistanceIntoActiveEdge) + "</B></FONT></TD><TD COLOR=\"black\">" +
    String.Substring(_tree.DistanceIntoActiveEdge) + "</TD></TR></TABLE>>";
    else
    label = "\"" + String + "\"";
    color = "blue";
    weight = "5";
    }
    else
    {
    label = "\"" + String + "\"";
    color = "black";
    weight = "3";
    }
    string tail = Tail == null ? "leaf" + EdgeNumber : "node" + Tail.NodeNumber;
    sb.AppendFormat("node{0} -> {1} [label={2},weight={3},color={4},size=11]", Head.NodeNumber, tail, label,
    weight, color).AppendLine();
    if (Tail != null)
    Tail.WriteDotGraph(sb);
    }
    }

    #endregion

    #region Nested type: Node

    public class Node
    {
    private readonly SuffixTree _tree;

    public Node(SuffixTree tree)
    {
    _tree = tree;
    Edges = new Dictionary<char, Edge>();
    NodeNumber = _tree.NextNodeNumber++;
    }

    public Dictionary<char, Edge> Edges { get; private set; }
    public Node LinkedNode { get; set; }
    public int NodeNumber { get; private set; }

    public void AddNewEdge()
    {
    _tree.SendMessage("Adding new edge to {0}", this);
    var edge = new Edge(_tree, this);
    Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge);
    _tree.SendMessage(" => {0} --> {1}", this, edge);
    }

    public void RenderTree(TextWriter writer, string prefix)
    {
    string strNode = string.Concat("(",
    NodeNumber.ToString(new string('0',
    _tree.NextNodeNumber.ToString().Length)),
    ")");
    writer.Write(strNode);
    Edge[] edges = Edges.Select(kvp => kvp.Value).OrderBy(e => _tree.Word[e.StartIndex]).ToArray();
    if (edges.Any())
    {
    string prefixWithNodePadding = prefix + new string(' ', strNode.Length);
    int maxEdgeLength = edges.Max(e => (e.EndIndex ?? _tree.CurrentSuffixEndIndex) - e.StartIndex + 1);
    for (int i = 0; i < edges.Length; i++)
    {
    char connector, extender = ' ';
    if (i == 0)
    {
    if (edges.Length > 1)
    {
    connector = RenderChars.TJunctionDown;
    extender = RenderChars.VerticalLine;
    }
    else
    connector = RenderChars.HorizontalLine;
    }
    else
    {
    writer.Write(prefixWithNodePadding);
    if (i == edges.Length - 1)
    connector = RenderChars.CornerRight;
    else
    {
    connector = RenderChars.TJunctionRight;
    extender = RenderChars.VerticalLine;
    }
    }
    writer.Write(string.Concat(connector, RenderChars.HorizontalLine));
    string newPrefix = string.Concat(prefixWithNodePadding, extender, ' ');
    edges[i].RenderTree(writer, newPrefix, maxEdgeLength);
    }
    }
    }

    public override string ToString()
    {
    return string.Concat("node #", NodeNumber);
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if (LinkedNode != null)
    sb.AppendFormat("node{0} -> node{1} [label=\"\",weight=.01,style=dotted]", NodeNumber,
    LinkedNode.NodeNumber).AppendLine();
    foreach (Edge edge in Edges.Values)
    edge.WriteDotGraph(sb);
    }
    }

    #endregion

    #region Nested type: RenderChars

    public static class RenderChars
    {
    public const char TJunctionDown = '┬';
    public const char HorizontalLine = '─';
    public const char VerticalLine = '│';
    public const char TJunctionRight = '├';
    public const char CornerRight = '└';
    }

    #endregion
    }
    }
  2. matklad revised this gist May 7, 2012. 1 changed file with 11 additions and 0 deletions.
    11 changes: 11 additions & 0 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -8,6 +8,15 @@ namespace SuffixTreeAlgorithm
    {
    public class SuffixTree
    {
    static void Main()
    {
    SuffixTree.Create("aabaaa");
    Console.ReadKey();
    }

    private void TreeUpdate(string f, object[] args){
    Console.WriteLine(f, args);
    }
    public char? CanonizationChar { get; set; }
    public string Word { get; private set; }
    private int CurrentSuffixStartIndex { get; set; }
    @@ -27,6 +36,7 @@ public SuffixTree(string word)
    Word = word;
    RootNode = new Node(this);
    ActiveNode = RootNode;
    Message = TreeUpdate;
    }

    public event Action<SuffixTree> Changed;
    @@ -71,6 +81,7 @@ public void Build(char canonizationChar)
    for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++)
    {
    var wasImplicitlyAdded = !AddNextSuffix();
    SendMessage("\n{0}", RenderTree());
    if(wasImplicitlyAdded)
    {
    UnresolvedSuffixes++;
  3. @axefrog axefrog revised this gist May 6, 2012. 1 changed file with 149 additions and 29 deletions.
    178 changes: 149 additions & 29 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -2,11 +2,13 @@
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;
    using System.Text;

    namespace SuffixTreeAlgorithm
    {
    public class SuffixTree
    {
    public char? CanonizationChar { get; set; }
    public string Word { get; private set; }
    private int CurrentSuffixStartIndex { get; set; }
    private int CurrentSuffixEndIndex { get; set; }
    @@ -18,39 +20,57 @@ public class SuffixTree
    private int DistanceIntoActiveEdge { get; set; }
    private char LastCharacterOfCurrentSuffix { get; set; }
    private int NextNodeNumber { get; set; }
    private int NextEdgeNumber { get; set; }

    private SuffixTree(string word)
    public SuffixTree(string word)
    {
    Word = word;
    RootNode = new Node(this);
    ActiveNode = RootNode;
    }

    public event Action<SuffixTree> Changed;
    private void TriggerChanged()
    {
    var handler = Changed;
    if(handler != null)
    handler(this);
    }

    public event Action<string, object[]> Message;
    private void SendMessage(string format, params object[] args)
    {
    var handler = Message;
    if(handler != null)
    handler(format, args);
    }

    public static SuffixTree Create(string word, char canonizationChar = '$')
    {
    var tree = new SuffixTree(word);
    tree.Build(canonizationChar);
    return tree;
    }

    private void Build(char canonizationChar)
    public void Build(char canonizationChar)
    {
    var n = Word.IndexOf(Word[Word.Length - 1]);
    var mustCanonize = n < Word.Length - 1;
    if(mustCanonize)
    {
    CanonizationChar = canonizationChar;
    Word = string.Concat(Word, canonizationChar);
    }

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    Console.WriteLine("=== ITERATION {0} ===", CurrentSuffixEndIndex);
    SendMessage("=== ITERATION {0} ===", CurrentSuffixEndIndex);
    LastCreatedNodeInCurrentIteration = null;
    LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex];

    for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++)
    {
    var wasImplicitlyAdded = !AddNextSuffix();
    Console.WriteLine();
    Console.WriteLine(RenderTree());
    if(wasImplicitlyAdded)
    {
    UnresolvedSuffixes++;
    @@ -65,11 +85,11 @@ private void Build(char canonizationChar)
    private bool AddNextSuffix()
    {
    var suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}");
    Console.WriteLine("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex);
    Console.WriteLine(" => ActiveNode: {0}", ActiveNode);
    Console.WriteLine(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString());
    Console.WriteLine(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge);
    Console.WriteLine(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes);
    SendMessage("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex);
    SendMessage(" => ActiveNode: {0}", ActiveNode);
    SendMessage(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString());
    SendMessage(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes);
    if(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    throw new Exception("BOUNDARY EXCEEDED");

    @@ -80,6 +100,8 @@ private bool AddNextSuffix()
    return false;

    ActiveNode.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();
    return true;
    }
    @@ -89,17 +111,19 @@ private bool GetExistingEdgeAndSetAsActive()
    Edge edge;
    if(ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge))
    {
    Console.WriteLine("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix);
    SendMessage("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix);
    ActiveEdge = edge;
    DistanceIntoActiveEdge = 1;
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);
    Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge);
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    Console.WriteLine(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    SendMessage(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);

    return true;
    }
    Console.WriteLine("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix);
    SendMessage("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix);
    return false;
    }

    @@ -108,16 +132,20 @@ private bool AddCurrentSuffixToActiveEdge()
    var nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge];
    if(nextCharacterOnEdge == LastCharacterOfCurrentSuffix)
    {
    Console.WriteLine("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix);
    SendMessage("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix);
    DistanceIntoActiveEdge++;
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    TriggerChanged();

    SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);

    return false;
    }

    SplitActiveEdge();
    ActiveEdge.Tail.AddNewEdge();
    TriggerChanged();

    UpdateActivePointAfterAddingNewEdge();

    return true;
    @@ -129,8 +157,13 @@ private void UpdateActivePointAfterAddingNewEdge()
    {
    if(DistanceIntoActiveEdge > 0)
    {
    SendMessage("New edge has been added and the active node is root. The active edge will now be updated.");
    DistanceIntoActiveEdge--;
    SendMessage(" => DistanceIntoActiveEdge decremented to: {0}", DistanceIntoActiveEdge);
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1);
    }
    }
    @@ -143,9 +176,9 @@ private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOri
    var walkDistance = 0;
    while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    SendMessage("Active point is at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail;
    ActiveNode = ActiveEdge.Tail ?? RootNode;
    if(DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    @@ -154,38 +187,44 @@ private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOri
    var c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    TriggerChanged();
    }
    }

    private void SplitActiveEdge()
    {
    ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge);
    Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge);
    SendMessage(" => ActiveEdge is now: {0}", ActiveEdge);
    TriggerChanged();
    if(LastCreatedNodeInCurrentIteration != null)
    {
    LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail;
    Console.WriteLine(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail);
    SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail);
    TriggerChanged();
    }
    LastCreatedNodeInCurrentIteration = ActiveEdge.Tail;
    }

    private void UpdateActivePointToLinkedNodeOrRoot()
    {
    Console.WriteLine("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString());
    SendMessage("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString());
    if(ActiveNode.LinkedNode != null)
    {
    ActiveNode = ActiveNode.LinkedNode;
    Console.WriteLine(" => ActiveNode is now: {0}", ActiveNode);
    SendMessage(" => ActiveNode is now: {0}", ActiveNode);
    }
    else
    {
    ActiveNode = RootNode;
    SendMessage(" => ActiveNode is now ROOT", ActiveNode);
    }
    TriggerChanged();

    if(ActiveEdge != null)
    {
    var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];
    TriggerChanged();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }
    }
    @@ -197,6 +236,40 @@ public string RenderTree()
    return writer.ToString();
    }

    public string WriteDotGraph()
    {
    var sb = new StringBuilder();
    sb.AppendLine("digraph {");
    sb.AppendLine("rankdir = LR;");
    sb.AppendLine("edge [arrowsize=0.5,fontsize=11];");
    for(var i = 0; i < NextNodeNumber; i++)
    sb.AppendFormat("node{0} [label=\"{0}\",style=filled,fillcolor={1},shape=circle,width=.1,height=.1,fontsize=11,margin=0.01];",
    i, ActiveNode.NodeNumber == i ? "cyan" : "lightgrey").AppendLine();
    RootNode.WriteDotGraph(sb);
    sb.AppendLine("}");
    return sb.ToString();
    }

    public HashSet<string> ExtractAllSubstrings()
    {
    var set = new HashSet<string>();
    ExtractAllSubstrings("", set, RootNode);
    return set;
    }

    private void ExtractAllSubstrings(string str, HashSet<string> set, Node node)
    {
    foreach(var edge in node.Edges.Values)
    {
    var edgeStr = edge.StringWithoutCanonizationChar;
    var edgeLength = !edge.EndIndex.HasValue && CanonizationChar.HasValue ? edge.Length - 1 : edge.Length; // assume tailing canonization char
    for(var length = 1; length <= edgeLength; length++)
    set.Add(string.Concat(str, edgeStr.Substring(0, length)));
    if(edge.Tail != null)
    ExtractAllSubstrings(string.Concat(str, edge.StringWithoutCanonizationChar), set, edge.Tail);
    }
    }

    public List<string> ExtractSubstringsForIndexing(int? maxLength = null)
    {
    var list = new List<string>();
    @@ -225,17 +298,19 @@ public Edge(SuffixTree tree, Node head)
    _tree = tree;
    Head = head;
    StartIndex = tree.CurrentSuffixEndIndex;
    EdgeNumber = _tree.NextEdgeNumber++;
    }

    public Node Head { get; private set; }
    public Node Tail { get; private set; }
    public int StartIndex { get; private set; }
    public int? EndIndex { get; set; }
    public int EdgeNumber { get; private set; }
    public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } }

    public Edge SplitAtIndex(int index)
    {
    Console.WriteLine("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]);
    _tree.SendMessage("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]);
    var newEdge = new Edge(_tree, Head);
    var newNode = new Node(_tree);
    newEdge.Tail = newNode;
    @@ -245,7 +320,7 @@ public Edge SplitAtIndex(int index)
    StartIndex = index;
    newNode.Edges.Add(_tree.Word[StartIndex], this);
    newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge;
    Console.WriteLine(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this);
    _tree.SendMessage(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this);
    return newEdge;
    }

    @@ -255,9 +330,14 @@ public override string ToString()
    StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")");
    }

    public string StringWithoutCanonizationChar
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex - (_tree.CanonizationChar.HasValue ? 1 : 0)) - StartIndex + 1); }
    }

    public string String
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1); }
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); }
    }

    public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    @@ -273,6 +353,38 @@ public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length)));
    }
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if(Tail == null)
    sb.AppendFormat("leaf{0} [label=\"\",shape=point]", EdgeNumber).AppendLine();
    string label, weight, color;
    if(_tree.ActiveEdge != null && ReferenceEquals(this, _tree.ActiveEdge))
    {
    if(_tree.ActiveEdge.Length == 0)
    label = "";
    else if(_tree.DistanceIntoActiveEdge > Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\"><B>" + String + "</B> (" + _tree.DistanceIntoActiveEdge + ")</FONT>>";
    else if(_tree.DistanceIntoActiveEdge == Length)
    label = "<<FONT COLOR=\"red\" SIZE=\"11\">" + String + "</FONT>>";
    else if(_tree.DistanceIntoActiveEdge > 0)
    label = "<<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\"><TR><TD><FONT COLOR=\"blue\"><B>" + String.Substring(0, _tree.DistanceIntoActiveEdge) + "</B></FONT></TD><TD COLOR=\"black\">" + String.Substring(_tree.DistanceIntoActiveEdge) + "</TD></TR></TABLE>>";
    else
    label = "\"" + String + "\"";
    color = "blue";
    weight = "5";
    }
    else
    {
    label = "\"" + String + "\"";
    color = "black";
    weight = "3";
    }
    var tail = Tail == null ? "leaf" + EdgeNumber : "node" + Tail.NodeNumber;
    sb.AppendFormat("node{0} -> {1} [label={2},weight={3},color={4},size=11]", Head.NodeNumber, tail, label, weight, color).AppendLine();
    if(Tail != null)
    Tail.WriteDotGraph(sb);
    }
    }

    public class Node
    @@ -292,10 +404,10 @@ public Node(SuffixTree tree)

    public void AddNewEdge()
    {
    Console.WriteLine("Adding new edge to {0}", this);
    _tree.SendMessage("Adding new edge to {0}", this);
    var edge = new Edge(_tree, this);
    Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge);
    Console.WriteLine(" => {0} --> {1}", this, edge);
    _tree.SendMessage(" => {0} --> {1}", this, edge);
    }

    public void RenderTree(TextWriter writer, string prefix)
    @@ -342,6 +454,14 @@ public override string ToString()
    {
    return string.Concat("node #", NodeNumber);
    }

    public void WriteDotGraph(StringBuilder sb)
    {
    if(LinkedNode != null)
    sb.AppendFormat("node{0} -> node{1} [label=\"\",weight=.01,style=dotted]", NodeNumber, LinkedNode.NodeNumber).AppendLine();
    foreach(var edge in Edges.Values)
    edge.WriteDotGraph(sb);
    }
    }

    public static class RenderChars
    @@ -353,4 +473,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
  4. @axefrog axefrog revised this gist May 2, 2012. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -131,7 +131,7 @@ private void UpdateActivePointAfterAddingNewEdge()
    {
    DistanceIntoActiveEdge--;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge == null ? 0 : ActiveEdge.StartIndex);
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1);
    }
    }
    else
    @@ -353,4 +353,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
  5. @axefrog axefrog revised this gist Apr 27, 2012. 2 changed files with 346 additions and 4 deletions.
    26 changes: 22 additions & 4 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -38,7 +38,7 @@ private void Build(char canonizationChar)
    var n = Word.IndexOf(Word[Word.Length - 1]);
    var mustCanonize = n < Word.Length - 1;
    if(mustCanonize)
    Word = string.Concat(Word, canonizationChar);
    Word = string.Concat(Word, canonizationChar);

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    @@ -130,9 +130,8 @@ private void UpdateActivePointAfterAddingNewEdge()
    if(DistanceIntoActiveEdge > 0)
    {
    DistanceIntoActiveEdge--;
    var firstIndex = ActiveEdge.StartIndex;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex);
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge == null ? 0 : ActiveEdge.StartIndex);
    }
    }
    else
    @@ -198,6 +197,25 @@ public string RenderTree()
    return writer.ToString();
    }

    public List<string> ExtractSubstringsForIndexing(int? maxLength = null)
    {
    var list = new List<string>();
    ExtractSubstringsForIndexing("", list, maxLength ?? Word.Length, RootNode);
    return list;
    }

    private void ExtractSubstringsForIndexing(string str, List<string> list, int len, Node node)
    {
    foreach(var edge in node.Edges.Values)
    {
    var newstr = string.Concat(str, Word.Substring(edge.StartIndex, Math.Min(len, edge.Length)));
    if(len > edge.Length && edge.Tail != null)
    ExtractSubstringsForIndexing(newstr, list, len - edge.Length, edge.Tail);
    else
    list.Add(newstr);
    }
    }

    public class Edge
    {
    private readonly SuffixTree _tree;
    @@ -335,4 +353,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
    324 changes: 324 additions & 0 deletions 8.output.almasamolmaz.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,324 @@
    === ITERATION 0 ===
    The next suffix of 'almasamolmaz' to add is '{a}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' not found
    Adding new edge to node #0
    => node #0 --> a(0,#)

    (0)──a

    === ITERATION 1 ===
    The next suffix of 'almasamolmaz' to add is '{l}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'l' not found
    Adding new edge to node #0
    => node #0 --> l(1,#)

    (0)┬─al
    └─l

    === ITERATION 2 ===
    The next suffix of 'almasamolmaz' to add is '{m}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'm' not found
    Adding new edge to node #0
    => node #0 --> m(2,#)

    (0)┬─alm
    ├─lm
    └─m

    === ITERATION 3 ===
    The next suffix of 'almasamolmaz' to add is '{a}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: alma(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─alma
    ├─lma
    └─ma

    === ITERATION 4 ===
    The next suffix of 'almasamolmaz' to add is 'a{s}' at indices 3,4
    => ActiveNode: node #0
    => ActiveEdge: almas(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge almas(0,#) at index 1 ('l')
    => Hierarchy is now: node #0 --> a(0,0) --> node #1 --> lmas(1,#)
    => ActiveEdge is now: a(0,0)
    Adding new edge to node #1
    => node #1 --> s(4,#)

    (0)┬─a────(1)┬─lmas
    │ └─s
    ├─lmas
    └─mas

    The next suffix of 'almasamolmaz' to add is '{s}' at indices 4,4
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 's' not found
    Adding new edge to node #0
    => node #0 --> s(4,#)

    (0)┬─a────(1)┬─lmas
    │ └─s
    ├─lmas
    ├─mas
    └─s

    === ITERATION 5 ===
    The next suffix of 'almasamolmaz' to add is '{a}' at indices 5,5
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─a─────(1)┬─lmasa
    │ └─sa
    ├─lmasa
    ├─masa
    └─sa

    === ITERATION 6 ===
    The next suffix of 'almasamolmaz' to add is 'a{m}' at indices 5,6
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'm' not found
    Adding new edge to node #1
    => node #1 --> m(6,#)
    The linked node for active node node #1 is [null]

    (0)┬─a──────(1)┬─lmasam
    │ ├─m
    │ └─sam
    ├─lmasam
    ├─masam
    └─sam

    The next suffix of 'almasamolmaz' to add is '{m}' at indices 6,6
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'm' found. Values adjusted to:
    => ActiveEdge is now: masam(2,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─a──────(1)┬─lmasam
    │ ├─m
    │ └─sam
    ├─lmasam
    ├─masam
    └─sam

    === ITERATION 7 ===
    The next suffix of 'almasamolmaz' to add is 'm{o}' at indices 6,7
    => ActiveNode: node #0
    => ActiveEdge: masamo(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge masamo(2,#) at index 3 ('a')
    => Hierarchy is now: node #0 --> m(2,2) --> node #2 --> asamo(3,#)
    => ActiveEdge is now: m(2,2)
    Adding new edge to node #2
    => node #2 --> o(7,#)

    (0)┬─a───────(1)┬─lmasamo
    │ ├─mo
    │ └─samo
    ├─lmasamo
    ├─m───────(2)┬─asamo
    │ └─o
    └─samo

    The next suffix of 'almasamolmaz' to add is '{o}' at indices 7,7
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' not found
    Adding new edge to node #0
    => node #0 --> o(7,#)

    (0)┬─a───────(1)┬─lmasamo
    │ ├─mo
    │ └─samo
    ├─lmasamo
    ├─m───────(2)┬─asamo
    │ └─o
    ├─o
    └─samo

    === ITERATION 8 ===
    The next suffix of 'almasamolmaz' to add is '{l}' at indices 8,8
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'l' found. Values adjusted to:
    => ActiveEdge is now: lmasamol(1,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─a────────(1)┬─lmasamol
    │ ├─mol
    │ └─samol
    ├─lmasamol
    ├─m────────(2)┬─asamol
    │ └─ol
    ├─ol
    └─samol

    === ITERATION 9 ===
    The next suffix of 'almasamolmaz' to add is 'l{m}' at indices 8,9
    => ActiveNode: node #0
    => ActiveEdge: lmasamolm(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'm' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─a─────────(1)┬─lmasamolm
    │ ├─molm
    │ └─samolm
    ├─lmasamolm
    ├─m─────────(2)┬─asamolm
    │ └─olm
    ├─olm
    └─samolm

    === ITERATION 10 ===
    The next suffix of 'almasamolmaz' to add is 'lm{a}' at indices 8,10
    => ActiveNode: node #0
    => ActiveEdge: lmasamolma(1,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    The next character on the current edge is 'a' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3

    (0)┬─a──────────(1)┬─lmasamolma
    │ ├─molma
    │ └─samolma
    ├─lmasamolma
    ├─m──────────(2)┬─asamolma
    │ └─olma
    ├─olma
    └─samolma

    === ITERATION 11 ===
    The next suffix of 'almasamolmaz' to add is 'lma{z}' at indices 8,11
    => ActiveNode: node #0
    => ActiveEdge: lmasamolmaz(1,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 3
    Splitting edge lmasamolmaz(1,#) at index 4 ('s')
    => Hierarchy is now: node #0 --> lma(1,3) --> node #3 --> samolmaz(4,#)
    => ActiveEdge is now: lma(1,3)
    Adding new edge to node #3
    => node #3 --> z(11,#)
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─a────────(1)┬─lmasamolmaz
    │ ├─molmaz
    │ └─samolmaz
    ├─lma──────(3)┬─samolmaz
    │ └─z
    ├─m────────(2)┬─asamolmaz
    │ └─olmaz
    ├─olmaz
    └─samolmaz

    The next suffix of 'almasamolmaz' to add is 'ma{z}' at indices 9,11
    => ActiveNode: node #2
    => ActiveEdge: asamolmaz(3,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge asamolmaz(3,#) at index 4 ('s')
    => Hierarchy is now: node #2 --> a(3,3) --> node #4 --> samolmaz(4,#)
    => ActiveEdge is now: a(3,3)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> z(11,#)
    The linked node for active node node #2 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─a────────(1)┬─lmasamolmaz
    │ ├─molmaz
    │ └─samolmaz
    ├─lma──────(3)┬─samolmaz
    │ └─z
    ├─m────────(2)┬─a─────(4)┬─samolmaz
    │ │ └─z
    │ └─olmaz
    ├─olmaz
    └─samolmaz

    The next suffix of 'almasamolmaz' to add is 'a{z}' at indices 10,11
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'z' not found
    Adding new edge to node #1
    => node #1 --> z(11,#)
    The linked node for active node node #1 is [null]

    (0)┬─a────────(1)┬─lmasamolmaz
    │ ├─molmaz
    │ ├─samolmaz
    │ └─z
    ├─lma──────(3)┬─samolmaz
    │ └─z
    ├─m────────(2)┬─a─────(4)┬─samolmaz
    │ │ └─z
    │ └─olmaz
    ├─olmaz
    └─samolmaz

    The next suffix of 'almasamolmaz' to add is '{z}' at indices 11,11
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'z' not found
    Adding new edge to node #0
    => node #0 --> z(11,#)

    (0)┬─a────────(1)┬─lmasamolmaz
    │ ├─molmaz
    │ ├─samolmaz
    │ └─z
    ├─lma──────(3)┬─samolmaz
    │ └─z
    ├─m────────(2)┬─a─────(4)┬─samolmaz
    │ │ └─z
    │ └─olmaz
    ├─olmaz
    ├─samolmaz
    └─z
  6. @axefrog axefrog revised this gist Apr 15, 2012. 1 changed file with 190 additions and 241 deletions.
    431 changes: 190 additions & 241 deletions 6.output.ooooooooo.txt
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,5 @@
    === ITERATION 0 ===
    The next suffix of 'okoooooooo$' to add is '{o}' at indices 0,0
    The next suffix of 'ooooooooo$' to add is '{o}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    @@ -11,310 +11,259 @@ Adding new edge to node #0
    (0)──o

    === ITERATION 1 ===
    The next suffix of 'okoooooooo$' to add is '{k}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'k' not found
    Adding new edge to node #0
    => node #0 --> k(1,#)

    (0)┬─k
    └─ok

    === ITERATION 2 ===
    The next suffix of 'okoooooooo$' to add is '{o}' at indices 2,2
    The next suffix of 'ooooooooo$' to add is '{o}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: oko(0,#)
    => ActiveEdge is now: oo(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─ko
    └─oko
    (0)──oo

    === ITERATION 3 ===
    The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 2,3
    === ITERATION 2 ===
    The next suffix of 'ooooooooo$' to add is 'o{o}' at indices 1,2
    => ActiveNode: node #0
    => ActiveEdge: okoo(0,#)
    => ActiveEdge: ooo(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge okoo(0,#) at index 1 ('k')
    => Hierarchy is now: node #0 --> o(0,0) --> node #1 --> koo(1,#)
    => ActiveEdge is now: o(0,0)
    Adding new edge to node #1
    => node #1 --> o(3,#)
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─koo
    └─o───(1)┬─koo
    └─o
    (0)──ooo

    The next suffix of 'okoooooooo$' to add is '{o}' at indices 3,3
    === ITERATION 3 ===
    The next suffix of 'ooooooooo$' to add is 'oo{o}' at indices 1,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0
    => ActiveEdge: oooo(0,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3

    (0)┬─koo
    └─o───(1)┬─koo
    └─o
    (0)──oooo

    === ITERATION 4 ===
    The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 3,4
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: oo(3,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 1
    The next suffix of 'ooooooooo$' to add is 'ooo{o}' at indices 1,4
    => ActiveNode: node #0
    => ActiveEdge: ooooo(0,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 3
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 4

    (0)┬─kooo
    └─o────(1)┬─kooo
    └─oo
    (0)──ooooo

    === ITERATION 5 ===
    The next suffix of 'okoooooooo$' to add is 'oo{o}' at indices 3,5
    => ActiveNode: node #1
    => ActiveEdge: ooo(3,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    The next suffix of 'ooooooooo$' to add is 'oooo{o}' at indices 1,5
    => ActiveNode: node #0
    => ActiveEdge: oooooo(0,#)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 4
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2
    => DistanceIntoActiveEdge is now: 5

    (0)┬─koooo
    └─o─────(1)┬─koooo
    └─ooo
    (0)──oooooo

    === ITERATION 6 ===
    The next suffix of 'okoooooooo$' to add is 'ooo{o}' at indices 3,6
    => ActiveNode: node #1
    => ActiveEdge: oooo(3,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    The next suffix of 'ooooooooo$' to add is 'ooooo{o}' at indices 1,6
    => ActiveNode: node #0
    => ActiveEdge: ooooooo(0,#)
    => DistanceIntoActiveEdge: 5
    => UnresolvedSuffixes: 5
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3
    => DistanceIntoActiveEdge is now: 6

    (0)┬─kooooo
    └─o──────(1)┬─kooooo
    └─oooo
    (0)──ooooooo

    === ITERATION 7 ===
    The next suffix of 'okoooooooo$' to add is 'oooo{o}' at indices 3,7
    => ActiveNode: node #1
    => ActiveEdge: ooooo(3,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 4
    The next suffix of 'ooooooooo$' to add is 'oooooo{o}' at indices 1,7
    => ActiveNode: node #0
    => ActiveEdge: oooooooo(0,#)
    => DistanceIntoActiveEdge: 6
    => UnresolvedSuffixes: 6
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 4
    => DistanceIntoActiveEdge is now: 7

    (0)┬─koooooo
    └─o───────(1)┬─koooooo
    └─ooooo
    (0)──oooooooo

    === ITERATION 8 ===
    The next suffix of 'okoooooooo$' to add is 'ooooo{o}' at indices 3,8
    => ActiveNode: node #1
    => ActiveEdge: oooooo(3,#)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 5
    The next suffix of 'ooooooooo$' to add is 'ooooooo{o}' at indices 1,8
    => ActiveNode: node #0
    => ActiveEdge: ooooooooo(0,#)
    => DistanceIntoActiveEdge: 7
    => UnresolvedSuffixes: 7
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 5
    => DistanceIntoActiveEdge is now: 8

    (0)┬─kooooooo
    └─o────────(1)┬─kooooooo
    └─oooooo
    (0)──ooooooooo

    === ITERATION 9 ===
    The next suffix of 'okoooooooo$' to add is 'oooooo{o}' at indices 3,9
    => ActiveNode: node #1
    => ActiveEdge: ooooooo(3,#)
    => DistanceIntoActiveEdge: 5
    => UnresolvedSuffixes: 6
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 6
    The next suffix of 'ooooooooo$' to add is 'oooooooo{$}' at indices 1,9
    => ActiveNode: node #0
    => ActiveEdge: ooooooooo$(0,#)
    => DistanceIntoActiveEdge: 8
    => UnresolvedSuffixes: 8
    Splitting edge ooooooooo$(0,#) at index 8 ('o')
    => Hierarchy is now: node #0 --> oooooooo(0,7) --> node #1 --> o$(8,#)
    => ActiveEdge is now: oooooooo(0,7)
    Adding new edge to node #1
    => node #1 --> $(9,#)

    (0)┬─koooooooo
    └─o─────────(1)┬─koooooooo
    └─ooooooo
    (0)──oooooooo─(1)┬─$
    └─o$

    === ITERATION 10 ===
    The next suffix of 'okoooooooo$' to add is 'ooooooo{$}' at indices 3,10
    => ActiveNode: node #1
    => ActiveEdge: ooooooo$(3,#)
    => DistanceIntoActiveEdge: 6
    The next suffix of 'ooooooooo$' to add is 'ooooooo{$}' at indices 2,9
    => ActiveNode: node #0
    => ActiveEdge: oooooooo(0,7)
    => DistanceIntoActiveEdge: 7
    => UnresolvedSuffixes: 7
    Splitting edge ooooooo$(3,#) at index 9 ('o')
    => Hierarchy is now: node #1 --> oooooo(3,8) --> node #2 --> o$(9,#)
    => ActiveEdge is now: oooooo(3,8)
    Splitting edge oooooooo(0,7) at index 7 ('o')
    => Hierarchy is now: node #0 --> ooooooo(0,6) --> node #2 --> o(7,7)
    => ActiveEdge is now: ooooooo(0,6)
    => Connected node #1 to node #2
    Adding new edge to node #2
    => node #2 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => node #2 --> $(9,#)

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oooooo─────(2)┬─$
    └─o$
    (0)──ooooooo─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oooooo{$}' at indices 4,10
    => ActiveNode: node #1
    => ActiveEdge: oooooo(3,8)
    => DistanceIntoActiveEdge: 5
    The next suffix of 'ooooooooo$' to add is 'oooooo{$}' at indices 3,9
    => ActiveNode: node #0
    => ActiveEdge: ooooooo(0,6)
    => DistanceIntoActiveEdge: 6
    => UnresolvedSuffixes: 6
    Splitting edge oooooo(3,8) at index 8 ('o')
    => Hierarchy is now: node #1 --> ooooo(3,7) --> node #3 --> o(8,8)
    => ActiveEdge is now: ooooo(3,7)
    Splitting edge ooooooo(0,6) at index 6 ('o')
    => Hierarchy is now: node #0 --> oooooo(0,5) --> node #3 --> o(6,6)
    => ActiveEdge is now: oooooo(0,5)
    => Connected node #2 to node #3
    Adding new edge to node #3
    => node #3 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─ooooo──────(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'ooooo{$}' at indices 5,10
    => ActiveNode: node #1
    => ActiveEdge: ooooo(3,7)
    => DistanceIntoActiveEdge: 4
    => node #3 --> $(9,#)

    (0)──oooooo─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is 'ooooo{$}' at indices 4,9
    => ActiveNode: node #0
    => ActiveEdge: oooooo(0,5)
    => DistanceIntoActiveEdge: 5
    => UnresolvedSuffixes: 5
    Splitting edge ooooo(3,7) at index 7 ('o')
    => Hierarchy is now: node #1 --> oooo(3,6) --> node #4 --> o(7,7)
    => ActiveEdge is now: oooo(3,6)
    Splitting edge oooooo(0,5) at index 5 ('o')
    => Hierarchy is now: node #0 --> ooooo(0,4) --> node #4 --> o(5,5)
    => ActiveEdge is now: ooooo(0,4)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oooo───────(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oooo{$}' at indices 6,10
    => ActiveNode: node #1
    => ActiveEdge: oooo(3,6)
    => DistanceIntoActiveEdge: 3
    => node #4 --> $(9,#)

    (0)──ooooo─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is 'oooo{$}' at indices 5,9
    => ActiveNode: node #0
    => ActiveEdge: ooooo(0,4)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 4
    Splitting edge oooo(3,6) at index 6 ('o')
    => Hierarchy is now: node #1 --> ooo(3,5) --> node #5 --> o(6,6)
    => ActiveEdge is now: ooo(3,5)
    Splitting edge ooooo(0,4) at index 4 ('o')
    => Hierarchy is now: node #0 --> oooo(0,3) --> node #5 --> o(4,4)
    => ActiveEdge is now: oooo(0,3)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─ooo────────(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'ooo{$}' at indices 7,10
    => ActiveNode: node #1
    => ActiveEdge: ooo(3,5)
    => DistanceIntoActiveEdge: 2
    => node #5 --> $(9,#)

    (0)──oooo─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is 'ooo{$}' at indices 6,9
    => ActiveNode: node #0
    => ActiveEdge: oooo(0,3)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 3
    Splitting edge ooo(3,5) at index 5 ('o')
    => Hierarchy is now: node #1 --> oo(3,4) --> node #6 --> o(5,5)
    => ActiveEdge is now: oo(3,4)
    Splitting edge oooo(0,3) at index 3 ('o')
    => Hierarchy is now: node #0 --> ooo(0,2) --> node #6 --> o(3,3)
    => ActiveEdge is now: ooo(0,2)
    => Connected node #5 to node #6
    Adding new edge to node #6
    => node #6 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oo─────────(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oo{$}' at indices 8,10
    => ActiveNode: node #1
    => ActiveEdge: oo(3,4)
    => DistanceIntoActiveEdge: 1
    => node #6 --> $(9,#)

    (0)──ooo─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is 'oo{$}' at indices 7,9
    => ActiveNode: node #0
    => ActiveEdge: ooo(0,2)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    Splitting edge oo(3,4) at index 4 ('o')
    => Hierarchy is now: node #1 --> o(3,3) --> node #7 --> o(4,4)
    => ActiveEdge is now: o(3,3)
    Splitting edge ooo(0,2) at index 2 ('o')
    => Hierarchy is now: node #0 --> oo(0,1) --> node #7 --> o(2,2)
    => ActiveEdge is now: oo(0,1)
    => Connected node #6 to node #7
    Adding new edge to node #7
    => node #7 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'o{$}' at indices 9,10
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => node #7 --> $(9,#)

    (0)──oo─(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is 'o{$}' at indices 8,9
    => ActiveNode: node #0
    => ActiveEdge: oo(0,1)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with '$' not found
    Adding new edge to node #1
    => node #1 --> $(10,#)
    The linked node for active node node #1 is [null]

    (0)┬─koooooooo$
    └─o──────────(1)┬─$
    ├─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is '{$}' at indices 10,10
    Splitting edge oo(0,1) at index 1 ('o')
    => Hierarchy is now: node #0 --> o(0,0) --> node #8 --> o(1,1)
    => ActiveEdge is now: o(0,0)
    => Connected node #7 to node #8
    Adding new edge to node #8
    => node #8 --> $(9,#)

    (0)──o─(8)┬─$
    └─o─(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$

    The next suffix of 'ooooooooo$' to add is '{$}' at indices 9,9
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with '$' not found
    Adding new edge to node #0
    => node #0 --> $(10,#)
    => node #0 --> $(9,#)

    (0)┬─$
    ├─koooooooo$
    └─o──────────(1)┬─$
    ├─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$
    └─o─(8)┬─$
    └─o─(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o─(1)┬─$
    └─o$
  7. @axefrog axefrog revised this gist Apr 15, 2012. 7 changed files with 1928 additions and 1 deletion.
    7 changes: 6 additions & 1 deletion 1.test.cs
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,9 @@
    static void Main()
    {
    var tree = SuffixTree.Create("abcabxabcd");
    SuffixTree.Create("abcabxabcd");
    SuffixTree.Create("abcdefabxybcdmnabcdex");
    SuffixTree.Create("abcadak");
    SuffixTree.Create("dedododeeodo");
    SuffixTree.Create("ooooooooo");
    SuffixTree.Create("mississippi");
    }
    File renamed without changes.
    716 changes: 716 additions & 0 deletions 3.output.abcdefabxybcdmnabcdex.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,716 @@
    === ITERATION 0 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' not found
    Adding new edge to node #0
    => node #0 --> a(0,#)

    (0)──a

    === ITERATION 1 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{b}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'b' not found
    Adding new edge to node #0
    => node #0 --> b(1,#)

    (0)┬─ab
    └─b

    === ITERATION 2 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{c}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'c' not found
    Adding new edge to node #0
    => node #0 --> c(2,#)

    (0)┬─abc
    ├─bc
    └─c

    === ITERATION 3 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{d}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' not found
    Adding new edge to node #0
    => node #0 --> d(3,#)

    (0)┬─abcd
    ├─bcd
    ├─cd
    └─d

    === ITERATION 4 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{e}' at indices 4,4
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'e' not found
    Adding new edge to node #0
    => node #0 --> e(4,#)

    (0)┬─abcde
    ├─bcde
    ├─cde
    ├─de
    └─e

    === ITERATION 5 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{f}' at indices 5,5
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'f' not found
    Adding new edge to node #0
    => node #0 --> f(5,#)

    (0)┬─abcdef
    ├─bcdef
    ├─cdef
    ├─def
    ├─ef
    └─f

    === ITERATION 6 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 6,6
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: abcdefa(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─abcdefa
    ├─bcdefa
    ├─cdefa
    ├─defa
    ├─efa
    └─fa

    === ITERATION 7 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'a{b}' at indices 6,7
    => ActiveNode: node #0
    => ActiveEdge: abcdefab(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'b' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─abcdefab
    ├─bcdefab
    ├─cdefab
    ├─defab
    ├─efab
    └─fab

    === ITERATION 8 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'ab{x}' at indices 6,8
    => ActiveNode: node #0
    => ActiveEdge: abcdefabx(0,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    Splitting edge abcdefabx(0,#) at index 2 ('c')
    => Hierarchy is now: node #0 --> ab(0,1) --> node #1 --> cdefabx(2,#)
    => ActiveEdge is now: ab(0,1)
    Adding new edge to node #1
    => node #1 --> x(8,#)

    (0)┬─ab───────(1)┬─cdefabx
    │ └─x
    ├─bcdefabx
    ├─cdefabx
    ├─defabx
    ├─efabx
    └─fabx

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'b{x}' at indices 7,8
    => ActiveNode: node #0
    => ActiveEdge: bcdefabx(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge bcdefabx(1,#) at index 2 ('c')
    => Hierarchy is now: node #0 --> b(1,1) --> node #2 --> cdefabx(2,#)
    => ActiveEdge is now: b(1,1)
    => Connected node #1 to node #2
    Adding new edge to node #2
    => node #2 --> x(8,#)

    (0)┬─ab──────(1)┬─cdefabx
    │ └─x
    ├─b───────(2)┬─cdefabx
    │ └─x
    ├─cdefabx
    ├─defabx
    ├─efabx
    └─fabx

    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{x}' at indices 8,8
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'x' not found
    Adding new edge to node #0
    => node #0 --> x(8,#)

    (0)┬─ab──────(1)┬─cdefabx
    │ └─x
    ├─b───────(2)┬─cdefabx
    │ └─x
    ├─cdefabx
    ├─defabx
    ├─efabx
    ├─fabx
    └─x

    === ITERATION 9 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{y}' at indices 9,9
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'y' not found
    Adding new edge to node #0
    => node #0 --> y(9,#)

    (0)┬─ab───────(1)┬─cdefabxy
    │ └─xy
    ├─b────────(2)┬─cdefabxy
    │ └─xy
    ├─cdefabxy
    ├─defabxy
    ├─efabxy
    ├─fabxy
    ├─xy
    └─y

    === ITERATION 10 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{b}' at indices 10,10
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'b' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─ab────────(1)┬─cdefabxyb
    │ └─xyb
    ├─b─────────(2)┬─cdefabxyb
    │ └─xyb
    ├─cdefabxyb
    ├─defabxyb
    ├─efabxyb
    ├─fabxyb
    ├─xyb
    └─yb

    === ITERATION 11 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'b{c}' at indices 10,11
    => ActiveNode: node #2
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #2 starting with 'c' found. Values adjusted to:
    => ActiveEdge is now: cdefabxybc(2,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 1

    (0)┬─ab─────────(1)┬─cdefabxybc
    │ └─xybc
    ├─b──────────(2)┬─cdefabxybc
    │ └─xybc
    ├─cdefabxybc
    ├─defabxybc
    ├─efabxybc
    ├─fabxybc
    ├─xybc
    └─ybc

    === ITERATION 12 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bc{d}' at indices 10,12
    => ActiveNode: node #2
    => ActiveEdge: cdefabxybcd(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    The next character on the current edge is 'd' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─ab──────────(1)┬─cdefabxybcd
    │ └─xybcd
    ├─b───────────(2)┬─cdefabxybcd
    │ └─xybcd
    ├─cdefabxybcd
    ├─defabxybcd
    ├─efabxybcd
    ├─fabxybcd
    ├─xybcd
    └─ybcd

    === ITERATION 13 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bcd{m}' at indices 10,13
    => ActiveNode: node #2
    => ActiveEdge: cdefabxybcdm(2,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    Splitting edge cdefabxybcdm(2,#) at index 4 ('e')
    => Hierarchy is now: node #2 --> cd(2,3) --> node #3 --> efabxybcdm(4,#)
    => ActiveEdge is now: cd(2,3)
    Adding new edge to node #3
    => node #3 --> m(13,#)
    The linked node for active node node #2 is [null]

    (0)┬─ab───────────(1)┬─cdefabxybcdm
    │ └─xybcdm
    ├─b────────────(2)┬─cd─────(3)┬─efabxybcdm
    │ │ └─m
    │ └─xybcdm
    ├─cdefabxybcdm
    ├─defabxybcdm
    ├─efabxybcdm
    ├─fabxybcdm
    ├─xybcdm
    └─ybcdm

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'cd{m}' at indices 11,13
    => ActiveNode: node #0
    => ActiveEdge: cdefabxybcdm(2,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    Splitting edge cdefabxybcdm(2,#) at index 4 ('e')
    => Hierarchy is now: node #0 --> cd(2,3) --> node #4 --> efabxybcdm(4,#)
    => ActiveEdge is now: cd(2,3)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> m(13,#)

    (0)┬─ab──────────(1)┬─cdefabxybcdm
    │ └─xybcdm
    ├─b───────────(2)┬─cd─────(3)┬─efabxybcdm
    │ │ └─m
    │ └─xybcdm
    ├─cd──────────(4)┬─efabxybcdm
    │ └─m
    ├─defabxybcdm
    ├─efabxybcdm
    ├─fabxybcdm
    ├─xybcdm
    └─ybcdm

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'd{m}' at indices 12,13
    => ActiveNode: node #0
    => ActiveEdge: defabxybcdm(3,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge defabxybcdm(3,#) at index 4 ('e')
    => Hierarchy is now: node #0 --> d(3,3) --> node #5 --> efabxybcdm(4,#)
    => ActiveEdge is now: d(3,3)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> m(13,#)

    (0)┬─ab─────────(1)┬─cdefabxybcdm
    │ └─xybcdm
    ├─b──────────(2)┬─cd─────(3)┬─efabxybcdm
    │ │ └─m
    │ └─xybcdm
    ├─cd─────────(4)┬─efabxybcdm
    │ └─m
    ├─d──────────(5)┬─efabxybcdm
    │ └─m
    ├─efabxybcdm
    ├─fabxybcdm
    ├─xybcdm
    └─ybcdm

    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{m}' at indices 13,13
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'm' not found
    Adding new edge to node #0
    => node #0 --> m(13,#)

    (0)┬─ab─────────(1)┬─cdefabxybcdm
    │ └─xybcdm
    ├─b──────────(2)┬─cd─────(3)┬─efabxybcdm
    │ │ └─m
    │ └─xybcdm
    ├─cd─────────(4)┬─efabxybcdm
    │ └─m
    ├─d──────────(5)┬─efabxybcdm
    │ └─m
    ├─efabxybcdm
    ├─fabxybcdm
    ├─m
    ├─xybcdm
    └─ybcdm

    === ITERATION 14 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{n}' at indices 14,14
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'n' not found
    Adding new edge to node #0
    => node #0 --> n(14,#)

    (0)┬─ab──────────(1)┬─cdefabxybcdmn
    │ └─xybcdmn
    ├─b───────────(2)┬─cd──────(3)┬─efabxybcdmn
    │ │ └─mn
    │ └─xybcdmn
    ├─cd──────────(4)┬─efabxybcdmn
    │ └─mn
    ├─d───────────(5)┬─efabxybcdmn
    │ └─mn
    ├─efabxybcdmn
    ├─fabxybcdmn
    ├─mn
    ├─n
    ├─xybcdmn
    └─ybcdmn

    === ITERATION 15 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 15,15
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: ab(0,1)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─ab───────────(1)┬─cdefabxybcdmna
    │ └─xybcdmna
    ├─b────────────(2)┬─cd───────(3)┬─efabxybcdmna
    │ │ └─mna
    │ └─xybcdmna
    ├─cd───────────(4)┬─efabxybcdmna
    │ └─mna
    ├─d────────────(5)┬─efabxybcdmna
    │ └─mna
    ├─efabxybcdmna
    ├─fabxybcdmna
    ├─mna
    ├─na
    ├─xybcdmna
    └─ybcdmna

    === ITERATION 16 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'a{b}' at indices 15,16
    => ActiveNode: node #0
    => ActiveEdge: ab(0,1)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'b' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─ab────────────(1)┬─cdefabxybcdmnab
    │ └─xybcdmnab
    ├─b─────────────(2)┬─cd────────(3)┬─efabxybcdmnab
    │ │ └─mnab
    │ └─xybcdmnab
    ├─cd────────────(4)┬─efabxybcdmnab
    │ └─mnab
    ├─d─────────────(5)┬─efabxybcdmnab
    │ └─mnab
    ├─efabxybcdmnab
    ├─fabxybcdmnab
    ├─mnab
    ├─nab
    ├─xybcdmnab
    └─ybcdmnab

    === ITERATION 17 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'ab{c}' at indices 15,17
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 2
    Existing edge for node #1 starting with 'c' found. Values adjusted to:
    => ActiveEdge is now: cdefabxybcdmnabc(2,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 2

    (0)┬─ab─────────────(1)┬─cdefabxybcdmnabc
    │ └─xybcdmnabc
    ├─b──────────────(2)┬─cd─────────(3)┬─efabxybcdmnabc
    │ │ └─mnabc
    │ └─xybcdmnabc
    ├─cd─────────────(4)┬─efabxybcdmnabc
    │ └─mnabc
    ├─d──────────────(5)┬─efabxybcdmnabc
    │ └─mnabc
    ├─efabxybcdmnabc
    ├─fabxybcdmnabc
    ├─mnabc
    ├─nabc
    ├─xybcdmnabc
    └─ybcdmnabc

    === ITERATION 18 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abc{d}' at indices 15,18
    => ActiveNode: node #1
    => ActiveEdge: cdefabxybcdmnabcd(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 3
    The next character on the current edge is 'd' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─ab──────────────(1)┬─cdefabxybcdmnabcd
    │ └─xybcdmnabcd
    ├─b───────────────(2)┬─cd──────────(3)┬─efabxybcdmnabcd
    │ │ └─mnabcd
    │ └─xybcdmnabcd
    ├─cd──────────────(4)┬─efabxybcdmnabcd
    │ └─mnabcd
    ├─d───────────────(5)┬─efabxybcdmnabcd
    │ └─mnabcd
    ├─efabxybcdmnabcd
    ├─fabxybcdmnabcd
    ├─mnabcd
    ├─nabcd
    ├─xybcdmnabcd
    └─ybcdmnabcd

    === ITERATION 19 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abcd{e}' at indices 15,19
    => ActiveNode: node #1
    => ActiveEdge: cdefabxybcdmnabcde(2,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 4
    The next character on the current edge is 'e' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3

    (0)┬─ab───────────────(1)┬─cdefabxybcdmnabcde
    │ └─xybcdmnabcde
    ├─b────────────────(2)┬─cd───────────(3)┬─efabxybcdmnabcde
    │ │ └─mnabcde
    │ └─xybcdmnabcde
    ├─cd───────────────(4)┬─efabxybcdmnabcde
    │ └─mnabcde
    ├─d────────────────(5)┬─efabxybcdmnabcde
    │ └─mnabcde
    ├─efabxybcdmnabcde
    ├─fabxybcdmnabcde
    ├─mnabcde
    ├─nabcde
    ├─xybcdmnabcde
    └─ybcdmnabcde

    === ITERATION 20 ===
    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abcde{x}' at indices 15,20
    => ActiveNode: node #1
    => ActiveEdge: cdefabxybcdmnabcdex(2,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 5
    Splitting edge cdefabxybcdmnabcdex(2,#) at index 5 ('f')
    => Hierarchy is now: node #1 --> cde(2,4) --> node #6 --> fabxybcdmnabcdex(5,#)
    => ActiveEdge is now: cde(2,4)
    Adding new edge to node #6
    => node #6 --> x(20,#)
    The linked node for active node node #1 is node #2
    => ActiveNode is now: node #2
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b─────────────────(2)┬─cd────────────(3)┬─efabxybcdmnabcdex
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd────────────────(4)┬─efabxybcdmnabcdex
    │ └─mnabcdex
    ├─d─────────────────(5)┬─efabxybcdmnabcdex
    │ └─mnabcdex
    ├─efabxybcdmnabcdex
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bcde{x}' at indices 16,20
    => ActiveNode: node #3
    => ActiveEdge: efabxybcdmnabcdex(4,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 4
    Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f')
    => Hierarchy is now: node #3 --> e(4,4) --> node #7 --> fabxybcdmnabcdex(5,#)
    => ActiveEdge is now: e(4,4)
    => Connected node #6 to node #7
    Adding new edge to node #7
    => node #7 --> x(20,#)
    The linked node for active node node #3 is node #4
    => ActiveNode is now: node #4

    (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b─────────────────(2)┬─cd────────────(3)┬─e────────(7)┬─fabxybcdmnabcdex
    │ │ │ └─x
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd────────────────(4)┬─efabxybcdmnabcdex
    │ └─mnabcdex
    ├─d─────────────────(5)┬─efabxybcdmnabcdex
    │ └─mnabcdex
    ├─efabxybcdmnabcdex
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'cde{x}' at indices 17,20
    => ActiveNode: node #4
    => ActiveEdge: efabxybcdmnabcdex(4,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 3
    Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f')
    => Hierarchy is now: node #4 --> e(4,4) --> node #8 --> fabxybcdmnabcdex(5,#)
    => ActiveEdge is now: e(4,4)
    => Connected node #7 to node #8
    Adding new edge to node #8
    => node #8 --> x(20,#)
    The linked node for active node node #4 is node #5
    => ActiveNode is now: node #5

    (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b─────────────────(2)┬─cd────────────(3)┬─e────────(7)┬─fabxybcdmnabcdex
    │ │ │ └─x
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd────────────────(4)┬─e────────(8)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─d─────────────────(5)┬─efabxybcdmnabcdex
    │ └─mnabcdex
    ├─efabxybcdmnabcdex
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'de{x}' at indices 18,20
    => ActiveNode: node #5
    => ActiveEdge: efabxybcdmnabcdex(4,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f')
    => Hierarchy is now: node #5 --> e(4,4) --> node #9 --> fabxybcdmnabcdex(5,#)
    => ActiveEdge is now: e(4,4)
    => Connected node #8 to node #9
    Adding new edge to node #9
    => node #9 --> x(20,#)
    The linked node for active node node #5 is [null]

    (00)┬─ab────────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b─────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex
    │ │ │ └─x
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd────────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─d─────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─efabxybcdmnabcdex
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex

    The next suffix of 'abcdefabxybcdmnabcdex' to add is 'e{x}' at indices 19,20
    => ActiveNode: node #0
    => ActiveEdge: efabxybcdmnabcdex(4,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f')
    => Hierarchy is now: node #0 --> e(4,4) --> node #10 --> fabxybcdmnabcdex(5,#)
    => ActiveEdge is now: e(4,4)
    => Connected node #9 to node #10
    Adding new edge to node #10
    => node #10 --> x(20,#)

    (00)┬─ab───────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex
    │ │ │ └─x
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd───────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─d────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─e────────────────(10)┬─fabxybcdmnabcdex
    │ └─x
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex

    The next suffix of 'abcdefabxybcdmnabcdex' to add is '{x}' at indices 20,20
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'x' found. Values adjusted to:
    => ActiveEdge is now: xybcdmnabcdex(8,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (00)┬─ab───────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─xybcdmnabcdex
    ├─b────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex
    │ │ │ └─x
    │ │ └─mnabcdex
    │ └─xybcdmnabcdex
    ├─cd───────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─d────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex
    │ │ └─x
    │ └─mnabcdex
    ├─e────────────────(10)┬─fabxybcdmnabcdex
    │ └─x
    ├─fabxybcdmnabcdex
    ├─mnabcdex
    ├─nabcdex
    ├─xybcdmnabcdex
    └─ybcdmnabcdex
    138 changes: 138 additions & 0 deletions 4.output.abcadak.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,138 @@
    === ITERATION 0 ===
    The next suffix of 'abcadak' to add is '{a}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' not found
    Adding new edge to node #0
    => node #0 --> a(0,#)

    (0)──a

    === ITERATION 1 ===
    The next suffix of 'abcadak' to add is '{b}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'b' not found
    Adding new edge to node #0
    => node #0 --> b(1,#)

    (0)┬─ab
    └─b

    === ITERATION 2 ===
    The next suffix of 'abcadak' to add is '{c}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'c' not found
    Adding new edge to node #0
    => node #0 --> c(2,#)

    (0)┬─abc
    ├─bc
    └─c

    === ITERATION 3 ===
    The next suffix of 'abcadak' to add is '{a}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: abca(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─abca
    ├─bca
    └─ca

    === ITERATION 4 ===
    The next suffix of 'abcadak' to add is 'a{d}' at indices 3,4
    => ActiveNode: node #0
    => ActiveEdge: abcad(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge abcad(0,#) at index 1 ('b')
    => Hierarchy is now: node #0 --> a(0,0) --> node #1 --> bcad(1,#)
    => ActiveEdge is now: a(0,0)
    Adding new edge to node #1
    => node #1 --> d(4,#)

    (0)┬─a────(1)┬─bcad
    │ └─d
    ├─bcad
    └─cad

    The next suffix of 'abcadak' to add is '{d}' at indices 4,4
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' not found
    Adding new edge to node #0
    => node #0 --> d(4,#)

    (0)┬─a────(1)┬─bcad
    │ └─d
    ├─bcad
    ├─cad
    └─d

    === ITERATION 5 ===
    The next suffix of 'abcadak' to add is '{a}' at indices 5,5
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─a─────(1)┬─bcada
    │ └─da
    ├─bcada
    ├─cada
    └─da

    === ITERATION 6 ===
    The next suffix of 'abcadak' to add is 'a{k}' at indices 5,6
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'k' not found
    Adding new edge to node #1
    => node #1 --> k(6,#)
    The linked node for active node node #1 is [null]

    (0)┬─a──────(1)┬─bcadak
    │ ├─dak
    │ └─k
    ├─bcadak
    ├─cadak
    └─dak

    The next suffix of 'abcadak' to add is '{k}' at indices 6,6
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'k' not found
    Adding new edge to node #0
    => node #0 --> k(6,#)

    (0)┬─a──────(1)┬─bcadak
    │ ├─dak
    │ └─k
    ├─bcadak
    ├─cadak
    ├─dak
    └─k
    409 changes: 409 additions & 0 deletions 5.output.dedododeeodo.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,409 @@
    === ITERATION 0 ===
    The next suffix of 'dedododeeodo$' to add is '{d}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' not found
    Adding new edge to node #0
    => node #0 --> d(0,#)

    (0)──d

    === ITERATION 1 ===
    The next suffix of 'dedododeeodo$' to add is '{e}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'e' not found
    Adding new edge to node #0
    => node #0 --> e(1,#)

    (0)┬─de
    └─e

    === ITERATION 2 ===
    The next suffix of 'dedododeeodo$' to add is '{d}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' found. Values adjusted to:
    => ActiveEdge is now: ded(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─ded
    └─ed

    === ITERATION 3 ===
    The next suffix of 'dedododeeodo$' to add is 'd{o}' at indices 2,3
    => ActiveNode: node #0
    => ActiveEdge: dedo(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge dedo(0,#) at index 1 ('e')
    => Hierarchy is now: node #0 --> d(0,0) --> node #1 --> edo(1,#)
    => ActiveEdge is now: d(0,0)
    Adding new edge to node #1
    => node #1 --> o(3,#)

    (0)┬─d───(1)┬─edo
    │ └─o
    └─edo

    The next suffix of 'dedododeeodo$' to add is '{o}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' not found
    Adding new edge to node #0
    => node #0 --> o(3,#)

    (0)┬─d───(1)┬─edo
    │ └─o
    ├─edo
    └─o

    === ITERATION 4 ===
    The next suffix of 'dedododeeodo$' to add is '{d}' at indices 4,4
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─d────(1)┬─edod
    │ └─od
    ├─edod
    └─od

    === ITERATION 5 ===
    The next suffix of 'dedododeeodo$' to add is 'd{o}' at indices 4,5
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: odo(3,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 1

    (0)┬─d─────(1)┬─edodo
    │ └─odo
    ├─edodo
    └─odo

    === ITERATION 6 ===
    The next suffix of 'dedododeeodo$' to add is 'do{d}' at indices 4,6
    => ActiveNode: node #1
    => ActiveEdge: odod(3,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    The next character on the current edge is 'd' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─d──────(1)┬─edodod
    │ └─odod
    ├─edodod
    └─odod

    === ITERATION 7 ===
    The next suffix of 'dedododeeodo$' to add is 'dod{e}' at indices 4,7
    => ActiveNode: node #1
    => ActiveEdge: odode(3,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    Splitting edge odode(3,#) at index 5 ('o')
    => Hierarchy is now: node #1 --> od(3,4) --> node #2 --> ode(5,#)
    => ActiveEdge is now: od(3,4)
    Adding new edge to node #2
    => node #2 --> e(7,#)
    The linked node for active node node #1 is [null]

    (0)┬─d───────(1)┬─edodode
    │ └─od──────(2)┬─e
    │ └─ode
    ├─edodode
    └─odode

    The next suffix of 'dedododeeodo$' to add is 'od{e}' at indices 5,7
    => ActiveNode: node #0
    => ActiveEdge: odode(3,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    Splitting edge odode(3,#) at index 5 ('o')
    => Hierarchy is now: node #0 --> od(3,4) --> node #3 --> ode(5,#)
    => ActiveEdge is now: od(3,4)
    => Connected node #2 to node #3
    Adding new edge to node #3
    => node #3 --> e(7,#)
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─d───────(1)┬─edodode
    │ └─od──────(2)┬─e
    │ └─ode
    ├─edodode
    └─od──────(3)┬─e
    └─ode

    The next suffix of 'dedododeeodo$' to add is 'd{e}' at indices 6,7
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'e' found. Values adjusted to:
    => ActiveEdge is now: edodode(1,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 1

    (0)┬─d───────(1)┬─edodode
    │ └─od──────(2)┬─e
    │ └─ode
    ├─edodode
    └─od──────(3)┬─e
    └─ode

    === ITERATION 8 ===
    The next suffix of 'dedododeeodo$' to add is 'de{e}' at indices 6,8
    => ActiveNode: node #1
    => ActiveEdge: edododee(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge edododee(1,#) at index 2 ('d')
    => Hierarchy is now: node #1 --> e(1,1) --> node #4 --> dododee(2,#)
    => ActiveEdge is now: e(1,1)
    Adding new edge to node #4
    => node #4 --> e(8,#)
    The linked node for active node node #1 is [null]

    (0)┬─d────────(1)┬─e──(4)┬─dododee
    │ │ └─e
    │ └─od─(2)┬─ee
    │ └─odee
    ├─edododee
    └─od───────(3)┬─ee
    └─odee

    The next suffix of 'dedododeeodo$' to add is 'e{e}' at indices 7,8
    => ActiveNode: node #0
    => ActiveEdge: edododee(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge edododee(1,#) at index 2 ('d')
    => Hierarchy is now: node #0 --> e(1,1) --> node #5 --> dododee(2,#)
    => ActiveEdge is now: e(1,1)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> e(8,#)

    (0)┬─d──(1)┬─e──(4)┬─dododee
    │ │ └─e
    │ └─od─(2)┬─ee
    │ └─odee
    ├─e──(5)┬─dododee
    │ └─e
    └─od─(3)┬─ee
    └─odee

    The next suffix of 'dedododeeodo$' to add is '{e}' at indices 8,8
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'e' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─d──(1)┬─e──(4)┬─dododee
    │ │ └─e
    │ └─od─(2)┬─ee
    │ └─odee
    ├─e──(5)┬─dododee
    │ └─e
    └─od─(3)┬─ee
    └─odee

    === ITERATION 9 ===
    The next suffix of 'dedododeeodo$' to add is 'e{o}' at indices 8,9
    => ActiveNode: node #5
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #5 starting with 'o' not found
    Adding new edge to node #5
    => node #5 --> o(9,#)
    The linked node for active node node #5 is [null]

    (0)┬─d──(1)┬─e──(4)┬─dododeeo
    │ │ └─eo
    │ └─od─(2)┬─eeo
    │ └─odeeo
    ├─e──(5)┬─dododeeo
    │ ├─eo
    │ └─o
    └─od─(3)┬─eeo
    └─odeeo

    The next suffix of 'dedododeeodo$' to add is '{o}' at indices 9,9
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: od(3,4)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─d──(1)┬─e──(4)┬─dododeeo
    │ │ └─eo
    │ └─od─(2)┬─eeo
    │ └─odeeo
    ├─e──(5)┬─dododeeo
    │ ├─eo
    │ └─o
    └─od─(3)┬─eeo
    └─odeeo

    === ITERATION 10 ===
    The next suffix of 'dedododeeodo$' to add is 'o{d}' at indices 9,10
    => ActiveNode: node #0
    => ActiveEdge: od(3,4)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'd' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─d──(1)┬─e──(4)┬─dododeeod
    │ │ └─eod
    │ └─od─(2)┬─eeod
    │ └─odeeod
    ├─e──(5)┬─dododeeod
    │ ├─eod
    │ └─od
    └─od─(3)┬─eeod
    └─odeeod

    === ITERATION 11 ===
    The next suffix of 'dedododeeodo$' to add is 'od{o}' at indices 9,11
    => ActiveNode: node #3
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 2
    Existing edge for node #3 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: odeeodo(5,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 2

    (0)┬─d──(1)┬─e──(4)┬─dododeeodo
    │ │ └─eodo
    │ └─od─(2)┬─eeodo
    │ └─odeeodo
    ├─e──(5)┬─dododeeodo
    │ ├─eodo
    │ └─odo
    └─od─(3)┬─eeodo
    └─odeeodo

    === ITERATION 12 ===
    The next suffix of 'dedododeeodo$' to add is 'odo{$}' at indices 9,12
    => ActiveNode: node #3
    => ActiveEdge: odeeodo$(5,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 3
    Splitting edge odeeodo$(5,#) at index 6 ('d')
    => Hierarchy is now: node #3 --> o(5,5) --> node #6 --> deeodo$(6,#)
    => ActiveEdge is now: o(5,5)
    Adding new edge to node #6
    => node #6 --> $(12,#)
    The linked node for active node node #3 is [null]

    (0)┬─d──(1)┬─e──(4)┬─dododeeodo$
    │ │ └─eodo$
    │ └─od─(2)┬─eeodo$
    │ └─odeeodo$
    ├─e──(5)┬─dododeeodo$
    │ ├─eodo$
    │ └─odo$
    └─od─(3)┬─eeodo$
    └─o──────(6)┬─$
    └─deeodo$

    The next suffix of 'dedododeeodo$' to add is 'do{$}' at indices 10,12
    => ActiveNode: node #0
    => ActiveEdge: od(3,4)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge od(3,4) at index 4 ('d')
    => Hierarchy is now: node #0 --> o(3,3) --> node #7 --> d(4,4)
    => ActiveEdge is now: o(3,3)
    => Connected node #6 to node #7
    Adding new edge to node #7
    => node #7 --> $(12,#)

    (0)┬─d─(1)┬─e──(4)┬─dododeeodo$
    │ │ └─eodo$
    │ └─od─(2)┬─eeodo$
    │ └─odeeodo$
    ├─e─(5)┬─dododeeodo$
    │ ├─eodo$
    │ └─odo$
    └─o─(7)┬─$
    └─d─(3)┬─eeodo$
    └─o──────(6)┬─$
    └─deeodo$

    The next suffix of 'dedododeeodo$' to add is 'o{$}' at indices 11,12
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #0 starting with '$' not found
    Adding new edge to node #0
    => node #0 --> $(12,#)

    (0)┬─$
    ├─d─(1)┬─e──(4)┬─dododeeodo$
    │ │ └─eodo$
    │ └─od─(2)┬─eeodo$
    │ └─odeeodo$
    ├─e─(5)┬─dododeeodo$
    │ ├─eodo$
    │ └─odo$
    └─o─(7)┬─$
    └─d─(3)┬─eeodo$
    └─o──────(6)┬─$
    └─deeodo$

    The next suffix of 'dedododeeodo$' to add is '{$}' at indices 12,12
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with '$' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─$
    ├─d─(1)┬─e──(4)┬─dododeeodo$
    │ │ └─eodo$
    │ └─od─(2)┬─eeodo$
    │ └─odeeodo$
    ├─e─(5)┬─dododeeodo$
    │ ├─eodo$
    │ └─odo$
    └─o─(7)┬─$
    └─d─(3)┬─eeodo$
    └─o──────(6)┬─$
    └─deeodo$
    320 changes: 320 additions & 0 deletions 6.output.ooooooooo.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,320 @@
    === ITERATION 0 ===
    The next suffix of 'okoooooooo$' to add is '{o}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' not found
    Adding new edge to node #0
    => node #0 --> o(0,#)

    (0)──o

    === ITERATION 1 ===
    The next suffix of 'okoooooooo$' to add is '{k}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'k' not found
    Adding new edge to node #0
    => node #0 --> k(1,#)

    (0)┬─k
    └─ok

    === ITERATION 2 ===
    The next suffix of 'okoooooooo$' to add is '{o}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: oko(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─ko
    └─oko

    === ITERATION 3 ===
    The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 2,3
    => ActiveNode: node #0
    => ActiveEdge: okoo(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge okoo(0,#) at index 1 ('k')
    => Hierarchy is now: node #0 --> o(0,0) --> node #1 --> koo(1,#)
    => ActiveEdge is now: o(0,0)
    Adding new edge to node #1
    => node #1 --> o(3,#)

    (0)┬─koo
    └─o───(1)┬─koo
    └─o

    The next suffix of 'okoooooooo$' to add is '{o}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'o' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─koo
    └─o───(1)┬─koo
    └─o

    === ITERATION 4 ===
    The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 3,4
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with 'o' found. Values adjusted to:
    => ActiveEdge is now: oo(3,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 1

    (0)┬─kooo
    └─o────(1)┬─kooo
    └─oo

    === ITERATION 5 ===
    The next suffix of 'okoooooooo$' to add is 'oo{o}' at indices 3,5
    => ActiveNode: node #1
    => ActiveEdge: ooo(3,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─koooo
    └─o─────(1)┬─koooo
    └─ooo

    === ITERATION 6 ===
    The next suffix of 'okoooooooo$' to add is 'ooo{o}' at indices 3,6
    => ActiveNode: node #1
    => ActiveEdge: oooo(3,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3

    (0)┬─kooooo
    └─o──────(1)┬─kooooo
    └─oooo

    === ITERATION 7 ===
    The next suffix of 'okoooooooo$' to add is 'oooo{o}' at indices 3,7
    => ActiveNode: node #1
    => ActiveEdge: ooooo(3,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 4
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 4

    (0)┬─koooooo
    └─o───────(1)┬─koooooo
    └─ooooo

    === ITERATION 8 ===
    The next suffix of 'okoooooooo$' to add is 'ooooo{o}' at indices 3,8
    => ActiveNode: node #1
    => ActiveEdge: oooooo(3,#)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 5
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 5

    (0)┬─kooooooo
    └─o────────(1)┬─kooooooo
    └─oooooo

    === ITERATION 9 ===
    The next suffix of 'okoooooooo$' to add is 'oooooo{o}' at indices 3,9
    => ActiveNode: node #1
    => ActiveEdge: ooooooo(3,#)
    => DistanceIntoActiveEdge: 5
    => UnresolvedSuffixes: 6
    The next character on the current edge is 'o' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 6

    (0)┬─koooooooo
    └─o─────────(1)┬─koooooooo
    └─ooooooo

    === ITERATION 10 ===
    The next suffix of 'okoooooooo$' to add is 'ooooooo{$}' at indices 3,10
    => ActiveNode: node #1
    => ActiveEdge: ooooooo$(3,#)
    => DistanceIntoActiveEdge: 6
    => UnresolvedSuffixes: 7
    Splitting edge ooooooo$(3,#) at index 9 ('o')
    => Hierarchy is now: node #1 --> oooooo(3,8) --> node #2 --> o$(9,#)
    => ActiveEdge is now: oooooo(3,8)
    Adding new edge to node #2
    => node #2 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oooooo─────(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oooooo{$}' at indices 4,10
    => ActiveNode: node #1
    => ActiveEdge: oooooo(3,8)
    => DistanceIntoActiveEdge: 5
    => UnresolvedSuffixes: 6
    Splitting edge oooooo(3,8) at index 8 ('o')
    => Hierarchy is now: node #1 --> ooooo(3,7) --> node #3 --> o(8,8)
    => ActiveEdge is now: ooooo(3,7)
    => Connected node #2 to node #3
    Adding new edge to node #3
    => node #3 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─ooooo──────(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'ooooo{$}' at indices 5,10
    => ActiveNode: node #1
    => ActiveEdge: ooooo(3,7)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 5
    Splitting edge ooooo(3,7) at index 7 ('o')
    => Hierarchy is now: node #1 --> oooo(3,6) --> node #4 --> o(7,7)
    => ActiveEdge is now: oooo(3,6)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oooo───────(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oooo{$}' at indices 6,10
    => ActiveNode: node #1
    => ActiveEdge: oooo(3,6)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 4
    Splitting edge oooo(3,6) at index 6 ('o')
    => Hierarchy is now: node #1 --> ooo(3,5) --> node #5 --> o(6,6)
    => ActiveEdge is now: ooo(3,5)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─ooo────────(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'ooo{$}' at indices 7,10
    => ActiveNode: node #1
    => ActiveEdge: ooo(3,5)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    Splitting edge ooo(3,5) at index 5 ('o')
    => Hierarchy is now: node #1 --> oo(3,4) --> node #6 --> o(5,5)
    => ActiveEdge is now: oo(3,4)
    => Connected node #5 to node #6
    Adding new edge to node #6
    => node #6 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─oo─────────(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'oo{$}' at indices 8,10
    => ActiveNode: node #1
    => ActiveEdge: oo(3,4)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge oo(3,4) at index 4 ('o')
    => Hierarchy is now: node #1 --> o(3,3) --> node #7 --> o(4,4)
    => ActiveEdge is now: o(3,3)
    => Connected node #6 to node #7
    Adding new edge to node #7
    => node #7 --> $(10,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─koooooooo$
    └─o──────────(1)┬─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is 'o{$}' at indices 9,10
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #1 starting with '$' not found
    Adding new edge to node #1
    => node #1 --> $(10,#)
    The linked node for active node node #1 is [null]

    (0)┬─koooooooo$
    └─o──────────(1)┬─$
    ├─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$

    The next suffix of 'okoooooooo$' to add is '{$}' at indices 10,10
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with '$' not found
    Adding new edge to node #0
    => node #0 --> $(10,#)

    (0)┬─$
    ├─koooooooo$
    └─o──────────(1)┬─$
    ├─koooooooo$
    └─o──────────(7)┬─$
    └─o─(6)┬─$
    └─o─(5)┬─$
    └─o─(4)┬─$
    └─o─(3)┬─$
    └─o─(2)┬─$
    └─o$
    339 changes: 339 additions & 0 deletions 7.output.mississippi.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,339 @@
    === ITERATION 0 ===
    The next suffix of 'mississippi$' to add is '{m}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'm' not found
    Adding new edge to node #0
    => node #0 --> m(0,#)

    (0)──m

    === ITERATION 1 ===
    The next suffix of 'mississippi$' to add is '{i}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'i' not found
    Adding new edge to node #0
    => node #0 --> i(1,#)

    (0)┬─i
    └─mi

    === ITERATION 2 ===
    The next suffix of 'mississippi$' to add is '{s}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 's' not found
    Adding new edge to node #0
    => node #0 --> s(2,#)

    (0)┬─is
    ├─mis
    └─s

    === ITERATION 3 ===
    The next suffix of 'mississippi$' to add is '{s}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 's' found. Values adjusted to:
    => ActiveEdge is now: ss(2,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─iss
    ├─miss
    └─ss

    === ITERATION 4 ===
    The next suffix of 'mississippi$' to add is 's{i}' at indices 3,4
    => ActiveNode: node #0
    => ActiveEdge: ssi(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge ssi(2,#) at index 3 ('s')
    => Hierarchy is now: node #0 --> s(2,2) --> node #1 --> si(3,#)
    => ActiveEdge is now: s(2,2)
    Adding new edge to node #1
    => node #1 --> i(4,#)

    (0)┬─issi
    ├─missi
    └─s─────(1)┬─i
    └─si

    The next suffix of 'mississippi$' to add is '{i}' at indices 4,4
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'i' found. Values adjusted to:
    => ActiveEdge is now: issi(1,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─issi
    ├─missi
    └─s─────(1)┬─i
    └─si

    === ITERATION 5 ===
    The next suffix of 'mississippi$' to add is 'i{s}' at indices 4,5
    => ActiveNode: node #0
    => ActiveEdge: issis(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 's' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─issis
    ├─missis
    └─s──────(1)┬─is
    └─sis

    === ITERATION 6 ===
    The next suffix of 'mississippi$' to add is 'is{s}' at indices 4,6
    => ActiveNode: node #0
    => ActiveEdge: ississ(1,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    The next character on the current edge is 's' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 3

    (0)┬─ississ
    ├─mississ
    └─s───────(1)┬─iss
    └─siss

    === ITERATION 7 ===
    The next suffix of 'mississippi$' to add is 'iss{i}' at indices 4,7
    => ActiveNode: node #0
    => ActiveEdge: ississi(1,#)
    => DistanceIntoActiveEdge: 3
    => UnresolvedSuffixes: 3
    The next character on the current edge is 'i' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 4

    (0)┬─ississi
    ├─mississi
    └─s────────(1)┬─issi
    └─sissi

    === ITERATION 8 ===
    The next suffix of 'mississippi$' to add is 'issi{p}' at indices 4,8
    => ActiveNode: node #0
    => ActiveEdge: ississip(1,#)
    => DistanceIntoActiveEdge: 4
    => UnresolvedSuffixes: 4
    Splitting edge ississip(1,#) at index 5 ('s')
    => Hierarchy is now: node #0 --> issi(1,4) --> node #2 --> ssip(5,#)
    => ActiveEdge is now: issi(1,4)
    Adding new edge to node #2
    => node #2 --> p(8,#)
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─issi──────(2)┬─p
    │ └─ssip
    ├─mississip
    └─s─────────(1)┬─issip
    └─sissip

    The next suffix of 'mississippi$' to add is 'ssi{p}' at indices 5,8
    => ActiveNode: node #1
    => ActiveEdge: sissip(3,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 3
    Splitting edge sissip(3,#) at index 5 ('s')
    => Hierarchy is now: node #1 --> si(3,4) --> node #3 --> ssip(5,#)
    => ActiveEdge is now: si(3,4)
    => Connected node #2 to node #3
    Adding new edge to node #3
    => node #3 --> p(8,#)
    The linked node for active node node #1 is [null]
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─issi──────(2)┬─p
    │ └─ssip
    ├─mississip
    └─s─────────(1)┬─issip
    └─si────(3)┬─p
    └─ssip

    The next suffix of 'mississippi$' to add is 'si{p}' at indices 6,8
    => ActiveNode: node #1
    => ActiveEdge: issip(4,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge issip(4,#) at index 5 ('s')
    => Hierarchy is now: node #1 --> i(4,4) --> node #4 --> ssip(5,#)
    => ActiveEdge is now: i(4,4)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> p(8,#)
    The linked node for active node node #1 is [null]

    (0)┬─issi──────(2)┬─p
    │ └─ssip
    ├─mississip
    └─s─────────(1)┬─i──(4)┬─p
    │ └─ssip
    └─si─(3)┬─p
    └─ssip

    The next suffix of 'mississippi$' to add is 'i{p}' at indices 7,8
    => ActiveNode: node #0
    => ActiveEdge: issi(1,4)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge issi(1,4) at index 2 ('s')
    => Hierarchy is now: node #0 --> i(1,1) --> node #5 --> ssi(2,4)
    => ActiveEdge is now: i(1,1)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> p(8,#)

    (0)┬─i─────────(5)┬─p
    │ └─ssi─(2)┬─p
    │ └─ssip
    ├─mississip
    └─s─────────(1)┬─i──(4)┬─p
    │ └─ssip
    └─si─(3)┬─p
    └─ssip

    The next suffix of 'mississippi$' to add is '{p}' at indices 8,8
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'p' not found
    Adding new edge to node #0
    => node #0 --> p(8,#)

    (0)┬─i─────────(5)┬─p
    │ └─ssi─(2)┬─p
    │ └─ssip
    ├─mississip
    ├─p
    └─s─────────(1)┬─i──(4)┬─p
    │ └─ssip
    └─si─(3)┬─p
    └─ssip

    === ITERATION 9 ===
    The next suffix of 'mississippi$' to add is '{p}' at indices 9,9
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'p' found. Values adjusted to:
    => ActiveEdge is now: pp(8,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─i──────────(5)┬─pp
    │ └─ssi─(2)┬─pp
    │ └─ssipp
    ├─mississipp
    ├─pp
    └─s──────────(1)┬─i──(4)┬─pp
    │ └─ssipp
    └─si─(3)┬─pp
    └─ssipp

    === ITERATION 10 ===
    The next suffix of 'mississippi$' to add is 'p{i}' at indices 9,10
    => ActiveNode: node #0
    => ActiveEdge: ppi(8,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge ppi(8,#) at index 9 ('p')
    => Hierarchy is now: node #0 --> p(8,8) --> node #6 --> pi(9,#)
    => ActiveEdge is now: p(8,8)
    Adding new edge to node #6
    => node #6 --> i(10,#)

    (0)┬─i───────────(5)┬─ppi
    │ └─ssi─(2)┬─ppi
    │ └─ssippi
    ├─mississippi
    ├─p───────────(6)┬─i
    │ └─pi
    └─s───────────(1)┬─i──(4)┬─ppi
    │ └─ssippi
    └─si─(3)┬─ppi
    └─ssippi

    The next suffix of 'mississippi$' to add is '{i}' at indices 10,10
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'i' found. Values adjusted to:
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary
    => ActiveEdge is now:
    => DistanceIntoActiveEdge is now: 0
    => UnresolvedSuffixes is now: 0

    (0)┬─i───────────(5)┬─ppi
    │ └─ssi─(2)┬─ppi
    │ └─ssippi
    ├─mississippi
    ├─p───────────(6)┬─i
    │ └─pi
    └─s───────────(1)┬─i──(4)┬─ppi
    │ └─ssippi
    └─si─(3)┬─ppi
    └─ssippi

    === ITERATION 11 ===
    The next suffix of 'mississippi$' to add is 'i{$}' at indices 10,11
    => ActiveNode: node #5
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 1
    Existing edge for node #5 starting with '$' not found
    Adding new edge to node #5
    => node #5 --> $(11,#)
    The linked node for active node node #5 is [null]

    (0)┬─i────────────(5)┬─$
    │ ├─ppi$
    │ └─ssi──(2)┬─ppi$
    │ └─ssippi$
    ├─mississippi$
    ├─p────────────(6)┬─i$
    │ └─pi$
    └─s────────────(1)┬─i──(4)┬─ppi$
    │ └─ssippi$
    └─si─(3)┬─ppi$
    └─ssippi$

    The next suffix of 'mississippi$' to add is '{$}' at indices 11,11
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with '$' not found
    Adding new edge to node #0
    => node #0 --> $(11,#)

    (0)┬─$
    ├─i────────────(5)┬─$
    │ ├─ppi$
    │ └─ssi──(2)┬─ppi$
    │ └─ssippi$
    ├─mississippi$
    ├─p────────────(6)┬─i$
    │ └─pi$
    └─s────────────(1)┬─i──(4)┬─ppi$
    │ └─ssippi$
    └─si─(3)┬─ppi$
    └─ssippi$
  8. @axefrog axefrog revised this gist Apr 15, 2012. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    using System.IO;
    using System.Linq;

    namespace Shiny
    namespace SuffixTreeAlgorithm
    {
    public class SuffixTree
    {
    @@ -335,4 +335,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
  9. @axefrog axefrog revised this gist Apr 15, 2012. 1 changed file with 22 additions and 11 deletions.
    33 changes: 22 additions & 11 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    using System.IO;
    using System.Linq;

    namespace SuffixTreeAlgorithm
    namespace Shiny
    {
    public class SuffixTree
    {
    @@ -38,7 +38,7 @@ private void Build(char canonizationChar)
    var n = Word.IndexOf(Word[Word.Length - 1]);
    var mustCanonize = n < Word.Length - 1;
    if(mustCanonize)
    Word = string.Concat(Word, canonizationChar);
    Word = string.Concat(Word, canonizationChar);

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    @@ -80,6 +80,7 @@ private bool AddNextSuffix()
    return false;

    ActiveNode.AddNewEdge();
    UpdateActivePointAfterAddingNewEdge();
    return true;
    }

    @@ -117,18 +118,25 @@ private bool AddCurrentSuffixToActiveEdge()

    SplitActiveEdge();
    ActiveEdge.Tail.AddNewEdge();
    UpdateActivePointAfterAddingNewEdge();

    return true;
    }

    private void UpdateActivePointAfterAddingNewEdge()
    {
    if(ReferenceEquals(ActiveNode, RootNode))
    {
    DistanceIntoActiveEdge--;
    var firstIndex = ActiveEdge.StartIndex;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex);
    if(DistanceIntoActiveEdge > 0)
    {
    DistanceIntoActiveEdge--;
    var firstIndex = ActiveEdge.StartIndex;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex);
    }
    }
    else
    UpdateActivePointToLinkedNodeOrRoot();

    return true;
    }

    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    @@ -174,10 +182,13 @@ private void UpdateActivePointToLinkedNodeOrRoot()
    {
    ActiveNode = RootNode;
    }
    var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];

    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    if(ActiveEdge != null)
    {
    var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }
    }

    public string RenderTree()
  10. @axefrog axefrog revised this gist Apr 15, 2012. 2 changed files with 31 additions and 37 deletions.
    64 changes: 29 additions & 35 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,10 +3,7 @@
    using System.IO;
    using System.Linq;

    // Implemented by Nathan Ridley ([email protected]) based on the explanation by jogojapan at:
    // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english

    namespace Shiny
    namespace SuffixTreeAlgorithm
    {
    public class SuffixTree
    {
    @@ -15,7 +12,7 @@ public class SuffixTree
    private int CurrentSuffixEndIndex { get; set; }
    private Node LastCreatedNodeInCurrentIteration { get; set; }
    private int UnresolvedSuffixes { get; set; }
    private Node RootNode { get; set; }
    public Node RootNode { get; private set; }
    private Node ActiveNode { get; set; }
    private Edge ActiveEdge { get; set; }
    private int DistanceIntoActiveEdge { get; set; }
    @@ -39,7 +36,8 @@ public static SuffixTree Create(string word, char canonizationChar = '$')
    private void Build(char canonizationChar)
    {
    var n = Word.IndexOf(Word[Word.Length - 1]);
    if(n < Word.Length - 1)
    var mustCanonize = n < Word.Length - 1;
    if(mustCanonize)
    Word = string.Concat(Word, canonizationChar);

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    @@ -93,7 +91,7 @@ private bool GetExistingEdgeAndSetAsActive()
    Console.WriteLine("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix);
    ActiveEdge = edge;
    DistanceIntoActiveEdge = 1;
    NormalizeActivePointIfNowAtEdgeBoundary();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);
    Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge);
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    Console.WriteLine(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);
    @@ -112,7 +110,8 @@ private bool AddCurrentSuffixToActiveEdge()
    Console.WriteLine("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix);
    DistanceIntoActiveEdge++;
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    NormalizeActivePointIfNowAtEdgeBoundary();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex);

    return false;
    }

    @@ -122,23 +121,32 @@ private bool AddCurrentSuffixToActiveEdge()
    if(ReferenceEquals(ActiveNode, RootNode))
    {
    DistanceIntoActiveEdge--;
    var firstIndex = ActiveEdge.StartIndex;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtEdgeBoundary();
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex);
    }
    else
    UpdateActivePointToLinkedNodeOrRoot();

    return true;
    }

    private void NormalizeActivePointIfNowAtEdgeBoundary()
    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    {
    if(ActiveEdge != null && DistanceIntoActiveEdge == ActiveEdge.Length)
    var walkDistance = 0;
    while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    Console.WriteLine("Active point is now at edge boundary and will be reset to the tail node of the active edge");
    DistanceIntoActiveEdge = 0;
    ActiveNode = ActiveEdge.Tail ?? RootNode;
    ActiveEdge = null;
    Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail;
    if(DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    {
    walkDistance += ActiveEdge.Length;
    var c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    }
    }

    @@ -172,25 +180,6 @@ private void UpdateActivePointToLinkedNodeOrRoot()
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }

    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    {
    var walkDistance = 0;
    while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail;
    if(DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    {
    walkDistance += ActiveEdge.Length;
    var c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    }
    }

    public string RenderTree()
    {
    var writer = new StringWriter();
    @@ -237,6 +226,11 @@ public override string ToString()
    StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")");
    }

    public string String
    {
    get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1); }
    }

    public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    {
    var strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1);
    @@ -330,4 +324,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
    4 changes: 2 additions & 2 deletions 2.output.txt
    Original file line number Diff line number Diff line change
    @@ -142,7 +142,7 @@ The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 6,7
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'b' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2
    Active point is now at edge boundary and will be reset to the tail node of the active edge
    Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary

    (0)┬─ab─────(1)┬─cabxab
    │ └─xab
    @@ -253,4 +253,4 @@ Adding new edge to node #0
    ├─c─────(5)┬─abxabcd
    │ └─d
    ├─d
    └─xabcd
    └─xabcd
  11. @axefrog axefrog revised this gist Apr 15, 2012. 1 changed file with 2 additions and 1 deletion.
    3 changes: 2 additions & 1 deletion 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,8 @@
    using System.IO;
    using System.Linq;

    // Implemented based on the explanation at http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english
    // Implemented by Nathan Ridley ([email protected]) based on the explanation by jogojapan at:
    // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english

    namespace Shiny
    {
  12. @axefrog axefrog revised this gist Apr 15, 2012. 1 changed file with 3 additions and 1 deletion.
    4 changes: 3 additions & 1 deletion 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,6 +3,8 @@
    using System.IO;
    using System.Linq;

    // Implemented based on the explanation at http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english

    namespace Shiny
    {
    public class SuffixTree
    @@ -327,4 +329,4 @@ public static class RenderChars
    public const char CornerRight = '└';
    }
    }
    }
    }
  13. @axefrog axefrog revised this gist Apr 15, 2012. 3 changed files with 529 additions and 322 deletions.
    437 changes: 272 additions & 165 deletions 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,221 +3,328 @@
    using System.IO;
    using System.Linq;

    namespace StringAlgorithms
    namespace Shiny
    {
    public class SuffixTree
    {
    // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english
    public string Word { get; private set; }
    private int CurrentSuffixStartIndex { get; set; }
    private int CurrentSuffixEndIndex { get; set; }
    private Node LastCreatedNodeInCurrentIteration { get; set; }
    private int UnresolvedSuffixes { get; set; }
    private Node RootNode { get; set; }
    private Node ActiveNode { get; set; }
    private Edge ActiveEdge { get; set; }
    private int DistanceIntoActiveEdge { get; set; }
    private char LastCharacterOfCurrentSuffix { get; set; }
    private int NextNodeNumber { get; set; }

    private ActivePoint _activePoint;

    public string Word { get; set; }
    public Node RootNode { get; set; }
    public int InsertionPoint { get; set; }
    public int NextNodeNumber { get; set; }

    public SuffixTree(string word)
    private SuffixTree(string word)
    {
    Word = word;
    RootNode = new Node { NodeNumber = NextNodeNumber++ };
    Build();
    RootNode = new Node(this);
    ActiveNode = RootNode;
    }

    private void Build()
    public static SuffixTree Create(string word, char canonizationChar = '$')
    {
    _activePoint = new ActivePoint
    {
    ActiveNode = RootNode,
    ActiveEdgeStartsWith = '\0',
    ActiveLength = 0
    };
    var remainder = 1;

    InsertionPoint = 0;
    for(InsertionPoint = 0; InsertionPoint < Word.Length; InsertionPoint++)
    {
    var c = Word[InsertionPoint];
    Console.WriteLine("Adding: '" + c.ToString().ToUpper() + "'");
    var suffixesToInsert = remainder;
    Node previousNode = null;
    var implicitComplete = false;
    Console.WriteLine("Remainder is " + remainder);
    for(var i = 0; i < suffixesToInsert && !implicitComplete; i++)
    {
    Console.WriteLine("The active point is currently: " + _activePoint);
    Edge edge;
    if(_activePoint.ActiveEdgeStartsWith != '\0')
    {
    Console.WriteLine("Looking for edge starting with '" + _activePoint.ActiveEdgeStartsWith.ToString().ToUpper() + "'");
    edge = _activePoint.ActiveNode.Edges[_activePoint.ActiveEdgeStartsWith];
    if(Word[edge.StartIndex + _activePoint.ActiveLength] == c)
    {
    Console.WriteLine("Found implicit match in edge " + edge);
    _activePoint.ActiveLength++;
    remainder++;
    if(_activePoint.ActiveLength == edge.Length)
    {
    Console.WriteLine("Match is at end of edge; active point will be moved to the connecting node.");
    _activePoint.ActiveNode = edge.Tail;
    _activePoint.ActiveEdgeStartsWith = '\0';
    _activePoint.ActiveLength = 0;
    }
    implicitComplete = true;
    }
    else
    {
    Console.Write("Splitting edge " + edge + " --> ");
    edge.EndIndex = edge.StartIndex + _activePoint.ActiveLength - 1;
    edge.Tail = new Node { Stem = edge, NodeNumber = NextNodeNumber++ };
    var subdivisionStartIndex = edge.EndIndex.Value + 1;
    var subdividedStartChar = Word[subdivisionStartIndex];
    edge.Tail.Edges.Add(subdividedStartChar, new Edge(this) { StartIndex = subdivisionStartIndex, Head = edge.Tail });
    edge.Tail.Edges.Add(c, new Edge(this) { StartIndex = InsertionPoint, Head = edge.Tail });
    Console.WriteLine(edge + " => " + edge.Tail.NodeNumber + " => " + edge.Tail.Edges.First().Value + " / " + edge.Tail.Edges.Last().Value);
    if(i > 0 && previousNode != null)
    {
    Console.WriteLine("Connected node " + previousNode.NodeNumber + " ---> node " + edge.Tail.NodeNumber);
    previousNode.LinkedNode = edge.Tail;
    }
    previousNode = edge.Tail;
    var tree = new SuffixTree(word);
    tree.Build(canonizationChar);
    return tree;
    }

    remainder--;
    if(_activePoint.ActiveNode.NodeNumber > 0)
    {
    Console.Write("Changed the active node from " + _activePoint.ActiveNode.NodeNumber + " to ");
    _activePoint.ActiveNode = _activePoint.ActiveNode.LinkedNode ?? RootNode;
    Console.WriteLine(_activePoint.ActiveNode.NodeNumber);
    }
    else
    {
    _activePoint.ActiveLength--;
    _activePoint.ActiveEdgeStartsWith = _activePoint.ActiveLength == 0 ? '\0' : Word[InsertionPoint - _activePoint.ActiveLength];
    }
    }
    }
    else if(_activePoint.ActiveNode.Edges.TryGetValue(c, out edge))
    {
    Console.WriteLine("Found '" + c.ToString().ToUpper() + "' on edge " + edge + "; updating active point.");
    _activePoint.ActiveEdgeStartsWith = c;
    _activePoint.ActiveLength = 1;
    remainder++;
    implicitComplete = true;
    }
    else
    private void Build(char canonizationChar)
    {
    var n = Word.IndexOf(Word[Word.Length - 1]);
    if(n < Word.Length - 1)
    Word = string.Concat(Word, canonizationChar);

    for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++)
    {
    Console.WriteLine("=== ITERATION {0} ===", CurrentSuffixEndIndex);
    LastCreatedNodeInCurrentIteration = null;
    LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex];

    for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++)
    {
    var wasImplicitlyAdded = !AddNextSuffix();
    Console.WriteLine();
    Console.WriteLine(RenderTree());
    if(wasImplicitlyAdded)
    {
    Console.WriteLine("Inserting singular edge for '" + c.ToString().ToUpper() + "' from node " + _activePoint.ActiveNode.NodeNumber);
    edge = new Edge(this) { StartIndex = InsertionPoint, Head = _activePoint.ActiveNode };
    _activePoint.ActiveNode.Edges.Add(c, edge);
    UnresolvedSuffixes++;
    break;
    }
    Console.WriteLine(WriteText());
    if(UnresolvedSuffixes > 0)
    UnresolvedSuffixes--;
    }
    }
    }

    public string WriteText()
    private bool AddNextSuffix()
    {
    var writer = new StringWriter();
    RootNode.WriteText(writer, 0);
    return writer.ToString();
    }
    }
    var suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}");
    Console.WriteLine("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex);
    Console.WriteLine(" => ActiveNode: {0}", ActiveNode);
    Console.WriteLine(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString());
    Console.WriteLine(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge);
    Console.WriteLine(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes);
    if(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    throw new Exception("BOUNDARY EXCEEDED");

    public class Node
    {
    public Dictionary<char, Edge> Edges { get; private set; }
    public Node LinkedNode { get; set; }
    public Edge Stem { get; set; }
    public int NodeNumber { get; set; }
    if(ActiveEdge != null)
    return AddCurrentSuffixToActiveEdge();

    if(GetExistingEdgeAndSetAsActive())
    return false;

    public Node()
    ActiveNode.AddNewEdge();
    return true;
    }

    private bool GetExistingEdgeAndSetAsActive()
    {
    Edges = new Dictionary<char, Edge>();
    Edge edge;
    if(ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge))
    {
    Console.WriteLine("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix);
    ActiveEdge = edge;
    DistanceIntoActiveEdge = 1;
    NormalizeActivePointIfNowAtEdgeBoundary();
    Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge);
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    Console.WriteLine(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes);

    return true;
    }
    Console.WriteLine("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix);
    return false;
    }

    public void WriteText(TextWriter writer, int indent)
    private bool AddCurrentSuffixToActiveEdge()
    {
    writer.Write(NodeNumber);
    var nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge];
    if(nextCharacterOnEdge == LastCharacterOfCurrentSuffix)
    {
    Console.WriteLine("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix);
    DistanceIntoActiveEdge++;
    Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge);
    NormalizeActivePointIfNowAtEdgeBoundary();
    return false;
    }

    SplitActiveEdge();
    ActiveEdge.Tail.AddNewEdge();

    var i = 0;
    var maxEdgeLength = Edges.Values.Max(e => (e.EndIndex ?? e.Tree.InsertionPoint) - e.StartIndex);
    foreach(var edge in Edges.Values)
    if(ReferenceEquals(ActiveNode, RootNode))
    {
    if(i++ == 0)
    writer.Write(" +");
    else
    {
    writer.Write(" ");
    if(indent > 0)
    {
    writer.Write("|");
    writer.Write(new string(' ', indent - 1));
    }
    writer.Write("+");
    }
    edge.WriteText(writer, indent + 3, maxEdgeLength);
    DistanceIntoActiveEdge--;
    ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]];
    NormalizeActivePointIfNowAtEdgeBoundary();
    }
    else
    UpdateActivePointToLinkedNodeOrRoot();

    return true;
    }

    public int Depth
    private void NormalizeActivePointIfNowAtEdgeBoundary()
    {
    get { return Stem == null ? 0 : Stem.Length + Stem.Head.Depth; }
    if(ActiveEdge != null && DistanceIntoActiveEdge == ActiveEdge.Length)
    {
    Console.WriteLine("Active point is now at edge boundary and will be reset to the tail node of the active edge");
    DistanceIntoActiveEdge = 0;
    ActiveNode = ActiveEdge.Tail ?? RootNode;
    ActiveEdge = null;
    }
    }

    public override string ToString()
    private void SplitActiveEdge()
    {
    return "Node " + NodeNumber;
    ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge);
    Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge);
    if(LastCreatedNodeInCurrentIteration != null)
    {
    LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail;
    Console.WriteLine(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail);
    }
    LastCreatedNodeInCurrentIteration = ActiveEdge.Tail;
    }
    }

    public class Edge
    {
    public Edge(SuffixTree tree)
    private void UpdateActivePointToLinkedNodeOrRoot()
    {
    Tree = tree;
    }
    Console.WriteLine("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString());
    if(ActiveNode.LinkedNode != null)
    {
    ActiveNode = ActiveNode.LinkedNode;
    Console.WriteLine(" => ActiveNode is now: {0}", ActiveNode);
    }
    else
    {
    ActiveNode = RootNode;
    }
    var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex;
    ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]];

    public SuffixTree Tree { get; set; }
    public Node Head { get; set; }
    public Node Tail { get; set; }
    public int StartIndex { get; set; }
    public int? EndIndex { get; set; }
    NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge);
    }

    public override string ToString()
    private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge)
    {
    return string.Concat("[", Head.NodeNumber, ":'", Tree.Word.Substring(StartIndex, Length).ToUpper(), "']");
    var walkDistance = 0;
    while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length)
    {
    Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary");
    DistanceIntoActiveEdge -= ActiveEdge.Length;
    ActiveNode = ActiveEdge.Tail;
    if(DistanceIntoActiveEdge == 0)
    ActiveEdge = null;
    else
    {
    walkDistance += ActiveEdge.Length;
    var c = Word[firstIndexOfOriginalActiveEdge + walkDistance];
    ActiveEdge = ActiveNode.Edges[c];
    }
    }
    }

    public int Length
    public string RenderTree()
    {
    get { return (EndIndex ?? Math.Min(Tree.Word.Length - 1, Tree.InsertionPoint)) - StartIndex + 1; }
    var writer = new StringWriter();
    RootNode.RenderTree(writer, "");
    return writer.ToString();
    }

    public void WriteText(TextWriter writer, int indent, int maxEdgeLength)
    public class Edge
    {
    writer.Write(" - ");
    var str = Tree.Word.Substring(StartIndex, Length).ToUpper();
    writer.Write(str);
    if(Tail == null)
    writer.WriteLine("");
    else
    private readonly SuffixTree _tree;

    public Edge(SuffixTree tree, Node head)
    {
    writer.Write(" ");
    writer.Write(new string('-', 1 + maxEdgeLength - str.Length));
    writer.Write(" ");
    Tail.WriteText(writer, indent + maxEdgeLength + 6);
    _tree = tree;
    Head = head;
    StartIndex = tree.CurrentSuffixEndIndex;
    }

    public Node Head { get; private set; }
    public Node Tail { get; private set; }
    public int StartIndex { get; private set; }
    public int? EndIndex { get; set; }
    public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } }

    public Edge SplitAtIndex(int index)
    {
    Console.WriteLine("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]);
    var newEdge = new Edge(_tree, Head);
    var newNode = new Node(_tree);
    newEdge.Tail = newNode;
    newEdge.StartIndex = StartIndex;
    newEdge.EndIndex = index - 1;
    Head = newNode;
    StartIndex = index;
    newNode.Edges.Add(_tree.Word[StartIndex], this);
    newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge;
    Console.WriteLine(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this);
    return newEdge;
    }

    public override string ToString()
    {
    return string.Concat(_tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1), "(",
    StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")");
    }

    public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength)
    {
    var strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1);
    writer.Write(strEdge);
    if(Tail == null)
    writer.WriteLine();
    else
    {
    var line = new string(RenderChars.HorizontalLine, maxEdgeLength - strEdge.Length + 1);
    writer.Write(line);
    Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length)));
    }
    }
    }
    }

    public class ActivePoint
    {
    public Node ActiveNode { get; set; }
    public char ActiveEdgeStartsWith { get; set; }
    public int ActiveLength { get; set; }
    public class Node
    {
    private readonly SuffixTree _tree;

    public Node(SuffixTree tree)
    {
    _tree = tree;
    Edges = new Dictionary<char, Edge>();
    NodeNumber = _tree.NextNodeNumber++;
    }

    public Dictionary<char, Edge> Edges { get; private set; }
    public Node LinkedNode { get; set; }
    public int NodeNumber { get; private set; }

    public void AddNewEdge()
    {
    Console.WriteLine("Adding new edge to {0}", this);
    var edge = new Edge(_tree, this);
    Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge);
    Console.WriteLine(" => {0} --> {1}", this, edge);
    }

    public void RenderTree(TextWriter writer, string prefix)
    {
    var strNode = string.Concat("(", NodeNumber.ToString(new string('0', _tree.NextNodeNumber.ToString().Length)), ")");
    writer.Write(strNode);
    var edges = Edges.Select(kvp => kvp.Value).OrderBy(e => _tree.Word[e.StartIndex]).ToArray();
    if(edges.Any())
    {
    var prefixWithNodePadding = prefix + new string(' ', strNode.Length);
    var maxEdgeLength = edges.Max(e => (e.EndIndex ?? _tree.CurrentSuffixEndIndex) - e.StartIndex + 1);
    for(var i = 0; i < edges.Length; i++)
    {
    char connector, extender = ' ';
    if(i == 0)
    {
    if(edges.Length > 1)
    {
    connector = RenderChars.TJunctionDown;
    extender = RenderChars.VerticalLine;
    }
    else
    connector = RenderChars.HorizontalLine;
    }
    else
    {
    writer.Write(prefixWithNodePadding);
    if(i == edges.Length - 1)
    connector = RenderChars.CornerRight;
    else
    {
    connector = RenderChars.TJunctionRight;
    extender = RenderChars.VerticalLine;
    }
    }
    writer.Write(string.Concat(connector, RenderChars.HorizontalLine));
    var newPrefix = string.Concat(prefixWithNodePadding, extender, ' ');
    edges[i].RenderTree(writer, newPrefix, maxEdgeLength);
    }
    }
    }

    public override string ToString()
    {
    return string.Concat("node #", NodeNumber);
    }
    }

    public override string ToString()
    public static class RenderChars
    {
    return string.Concat("{", ActiveNode, ",'", ActiveEdgeStartsWith == '\0' ? "\\0" : ActiveEdgeStartsWith.ToString(), "',", ActiveLength, "}");
    public const char TJunctionDown = '┬';
    public const char HorizontalLine = '─';
    public const char VerticalLine = '│';
    public const char TJunctionRight = '├';
    public const char CornerRight = '└';
    }
    }
    }
    2 changes: 1 addition & 1 deletion 1.test.cs
    Original file line number Diff line number Diff line change
    @@ -1,4 +1,4 @@
    static void Main()
    {
    var tree = new SuffixTree("abcabxabcd");
    var tree = SuffixTree.Create("abcabxabcd");
    }
    412 changes: 256 additions & 156 deletions 2.output.txt
    Original file line number Diff line number Diff line change
    @@ -1,156 +1,256 @@
    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'A' from node 0
    0 + - A

    Adding: 'B'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'B' from node 0
    0 + - AB
    + - B

    Adding: 'C'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'C' from node 0
    0 + - ABC
    + - BC
    + - C

    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Found 'A' on edge [0:'ABCA']; updating active point.
    0 + - ABCA
    + - BCA
    + - CA

    Adding: 'B'
    Remainder is 2
    The active point is currently: {Node 0,'a',1}
    Looking for edge starting with 'A'
    Found implicit match in edge [0:'ABCAB']
    0 + - ABCAB
    + - BCAB
    + - CAB

    Adding: 'X'
    Remainder is 3
    The active point is currently: {Node 0,'a',2}
    Looking for edge starting with 'A'
    Splitting edge [0:'ABCABX'] --> [0:'AB'] => 1 => [1:'CABX'] / [1:'X']
    0 + - AB --- 1 + - CABX
    | + - X
    + - BCABX
    + - CABX

    The active point is currently: {Node 0,'b',1}
    Looking for edge starting with 'B'
    Splitting edge [0:'BCABX'] --> [0:'B'] => 2 => [2:'CABX'] / [2:'X']
    Connected node 1 ---> node 2
    0 + - AB -- 1 + - CABX
    | + - X
    + - B --- 2 + - CABX
    | + - X
    + - CABX

    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'X' from node 0
    0 + - AB -- 1 + - CABX
    | + - X
    + - B --- 2 + - CABX
    | + - X
    + - CABX
    + - X

    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Found 'A' on edge [0:'AB']; updating active point.
    0 + - AB --- 1 + - CABXA
    | + - XA
    + - B ---- 2 + - CABXA
    | + - XA
    + - CABXA
    + - XA

    Adding: 'B'
    Remainder is 2
    The active point is currently: {Node 0,'a',1}
    Looking for edge starting with 'A'
    Found implicit match in edge [0:'AB']
    Match is at end of edge; active point will be moved to the connecting node.
    0 + - AB ---- 1 + - CABXAB
    | + - XAB
    + - B ----- 2 + - CABXAB
    | + - XAB
    + - CABXAB
    + - XAB

    Adding: 'C'
    Remainder is 3
    The active point is currently: {Node 1,'\0',0}
    Found 'C' on edge [1:'CABXABC']; updating active point.
    0 + - AB ----- 1 + - CABXABC
    | + - XABC
    + - B ------ 2 + - CABXABC
    | + - XABC
    + - CABXABC
    + - XABC

    Adding: 'D'
    Remainder is 4
    The active point is currently: {Node 1,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [1:'CABXABCD'] --> [1:'C'] => 3 => [3:'ABXABCD'] / [3:'D']
    Changed the active node from 1 to 2
    0 + - AB ------ 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ------- 2 + - CABXABCD
    | + - XABCD
    + - CABXABCD
    + - XABCD

    The active point is currently: {Node 2,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [2:'CABXABCD'] --> [2:'C'] => 4 => [4:'ABXABCD'] / [4:'D']
    Connected node 3 ---> node 4
    Changed the active node from 2 to 0
    0 + - AB ------ 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ------- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - CABXABCD
    + - XABCD

    The active point is currently: {Node 0,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [0:'CABXABCD'] --> [0:'C'] => 5 => [5:'ABXABCD'] / [5:'D']
    Connected node 4 ---> node 5
    0 + - AB --- 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ---- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - C ---- 5 + - ABXABCD
    | + - D
    + - XABCD

    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'D' from node 0
    0 + - AB --- 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ---- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - C ---- 5 + - ABXABCD
    | + - D
    + - XABCD
    + - D
    === ITERATION 0 ===
    The next suffix of 'abcabxabcd' to add is '{a}' at indices 0,0
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' not found
    Adding new edge to node #0
    => node #0 --> a(0,#)

    (0)──a

    === ITERATION 1 ===
    The next suffix of 'abcabxabcd' to add is '{b}' at indices 1,1
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'b' not found
    Adding new edge to node #0
    => node #0 --> b(1,#)

    (0)┬─ab
    └─b

    === ITERATION 2 ===
    The next suffix of 'abcabxabcd' to add is '{c}' at indices 2,2
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'c' not found
    Adding new edge to node #0
    => node #0 --> c(2,#)

    (0)┬─abc
    ├─bc
    └─c

    === ITERATION 3 ===
    The next suffix of 'abcabxabcd' to add is '{a}' at indices 3,3
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: abca(0,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─abca
    ├─bca
    └─ca

    === ITERATION 4 ===
    The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 3,4
    => ActiveNode: node #0
    => ActiveEdge: abcab(0,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'b' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2

    (0)┬─abcab
    ├─bcab
    └─cab

    === ITERATION 5 ===
    The next suffix of 'abcabxabcd' to add is 'ab{x}' at indices 3,5
    => ActiveNode: node #0
    => ActiveEdge: abcabx(0,#)
    => DistanceIntoActiveEdge: 2
    => UnresolvedSuffixes: 2
    Splitting edge abcabx(0,#) at index 2 ('c')
    => Hierarchy is now: node #0 --> ab(0,1) --> node #1 --> cabx(2,#)
    => ActiveEdge is now: ab(0,1)
    Adding new edge to node #1
    => node #1 --> x(5,#)

    (0)┬─ab────(1)┬─cabx
    │ └─x
    ├─bcabx
    └─cabx

    The next suffix of 'abcabxabcd' to add is 'b{x}' at indices 4,5
    => ActiveNode: node #0
    => ActiveEdge: bcabx(1,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge bcabx(1,#) at index 2 ('c')
    => Hierarchy is now: node #0 --> b(1,1) --> node #2 --> cabx(2,#)
    => ActiveEdge is now: b(1,1)
    => Connected node #1 to node #2
    Adding new edge to node #2
    => node #2 --> x(5,#)

    (0)┬─ab───(1)┬─cabx
    │ └─x
    ├─b────(2)┬─cabx
    │ └─x
    └─cabx

    The next suffix of 'abcabxabcd' to add is '{x}' at indices 5,5
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'x' not found
    Adding new edge to node #0
    => node #0 --> x(5,#)

    (0)┬─ab───(1)┬─cabx
    │ └─x
    ├─b────(2)┬─cabx
    │ └─x
    ├─cabx
    └─x

    === ITERATION 6 ===
    The next suffix of 'abcabxabcd' to add is '{a}' at indices 6,6
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'a' found. Values adjusted to:
    => ActiveEdge is now: ab(0,1)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 0

    (0)┬─ab────(1)┬─cabxa
    │ └─xa
    ├─b─────(2)┬─cabxa
    │ └─xa
    ├─cabxa
    └─xa

    === ITERATION 7 ===
    The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 6,7
    => ActiveNode: node #0
    => ActiveEdge: ab(0,1)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    The next character on the current edge is 'b' (suffix added implicitly)
    => DistanceIntoActiveEdge is now: 2
    Active point is now at edge boundary and will be reset to the tail node of the active edge

    (0)┬─ab─────(1)┬─cabxab
    │ └─xab
    ├─b──────(2)┬─cabxab
    │ └─xab
    ├─cabxab
    └─xab

    === ITERATION 8 ===
    The next suffix of 'abcabxabcd' to add is 'ab{c}' at indices 6,8
    => ActiveNode: node #1
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 2
    Existing edge for node #1 starting with 'c' found. Values adjusted to:
    => ActiveEdge is now: cabxabc(2,#)
    => DistanceIntoActiveEdge is now: 1
    => UnresolvedSuffixes is now: 2

    (0)┬─ab──────(1)┬─cabxabc
    │ └─xabc
    ├─b───────(2)┬─cabxabc
    │ └─xabc
    ├─cabxabc
    └─xabc

    === ITERATION 9 ===
    The next suffix of 'abcabxabcd' to add is 'abc{d}' at indices 6,9
    => ActiveNode: node #1
    => ActiveEdge: cabxabcd(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 3
    Splitting edge cabxabcd(2,#) at index 3 ('a')
    => Hierarchy is now: node #1 --> c(2,2) --> node #3 --> abxabcd(3,#)
    => ActiveEdge is now: c(2,2)
    Adding new edge to node #3
    => node #3 --> d(9,#)
    The linked node for active node node #1 is node #2
    => ActiveNode is now: node #2

    (0)┬─ab───────(1)┬─c─────(3)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─b────────(2)┬─cabxabcd
    │ └─xabcd
    ├─cabxabcd
    └─xabcd

    The next suffix of 'abcabxabcd' to add is 'bc{d}' at indices 7,9
    => ActiveNode: node #2
    => ActiveEdge: cabxabcd(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 2
    Splitting edge cabxabcd(2,#) at index 3 ('a')
    => Hierarchy is now: node #2 --> c(2,2) --> node #4 --> abxabcd(3,#)
    => ActiveEdge is now: c(2,2)
    => Connected node #3 to node #4
    Adding new edge to node #4
    => node #4 --> d(9,#)
    The linked node for active node node #2 is [null]

    (0)┬─ab───────(1)┬─c─────(3)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─b────────(2)┬─c─────(4)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─cabxabcd
    └─xabcd

    The next suffix of 'abcabxabcd' to add is 'c{d}' at indices 8,9
    => ActiveNode: node #0
    => ActiveEdge: cabxabcd(2,#)
    => DistanceIntoActiveEdge: 1
    => UnresolvedSuffixes: 1
    Splitting edge cabxabcd(2,#) at index 3 ('a')
    => Hierarchy is now: node #0 --> c(2,2) --> node #5 --> abxabcd(3,#)
    => ActiveEdge is now: c(2,2)
    => Connected node #4 to node #5
    Adding new edge to node #5
    => node #5 --> d(9,#)

    (0)┬─ab────(1)┬─c─────(3)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─b─────(2)┬─c─────(4)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─c─────(5)┬─abxabcd
    │ └─d
    └─xabcd

    The next suffix of 'abcabxabcd' to add is '{d}' at indices 9,9
    => ActiveNode: node #0
    => ActiveEdge: none
    => DistanceIntoActiveEdge: 0
    => UnresolvedSuffixes: 0
    Existing edge for node #0 starting with 'd' not found
    Adding new edge to node #0
    => node #0 --> d(9,#)

    (0)┬─ab────(1)┬─c─────(3)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─b─────(2)┬─c─────(4)┬─abxabcd
    │ │ └─d
    │ └─xabcd
    ├─c─────(5)┬─abxabcd
    │ └─d
    ├─d
    └─xabcd
  14. @axefrog axefrog revised this gist Apr 13, 2012. 3 changed files with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion suffixtree.cs → 0.suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -3,7 +3,7 @@
    using System.IO;
    using System.Linq;

    namespace Shiny
    namespace StringAlgorithms
    {
    public class SuffixTree
    {
    File renamed without changes.
    File renamed without changes.
  15. @axefrog axefrog created this gist Apr 13, 2012.
    156 changes: 156 additions & 0 deletions output.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,156 @@
    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'A' from node 0
    0 + - A

    Adding: 'B'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'B' from node 0
    0 + - AB
    + - B

    Adding: 'C'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'C' from node 0
    0 + - ABC
    + - BC
    + - C

    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Found 'A' on edge [0:'ABCA']; updating active point.
    0 + - ABCA
    + - BCA
    + - CA

    Adding: 'B'
    Remainder is 2
    The active point is currently: {Node 0,'a',1}
    Looking for edge starting with 'A'
    Found implicit match in edge [0:'ABCAB']
    0 + - ABCAB
    + - BCAB
    + - CAB

    Adding: 'X'
    Remainder is 3
    The active point is currently: {Node 0,'a',2}
    Looking for edge starting with 'A'
    Splitting edge [0:'ABCABX'] --> [0:'AB'] => 1 => [1:'CABX'] / [1:'X']
    0 + - AB --- 1 + - CABX
    | + - X
    + - BCABX
    + - CABX

    The active point is currently: {Node 0,'b',1}
    Looking for edge starting with 'B'
    Splitting edge [0:'BCABX'] --> [0:'B'] => 2 => [2:'CABX'] / [2:'X']
    Connected node 1 ---> node 2
    0 + - AB -- 1 + - CABX
    | + - X
    + - B --- 2 + - CABX
    | + - X
    + - CABX

    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'X' from node 0
    0 + - AB -- 1 + - CABX
    | + - X
    + - B --- 2 + - CABX
    | + - X
    + - CABX
    + - X

    Adding: 'A'
    Remainder is 1
    The active point is currently: {Node 0,'\0',0}
    Found 'A' on edge [0:'AB']; updating active point.
    0 + - AB --- 1 + - CABXA
    | + - XA
    + - B ---- 2 + - CABXA
    | + - XA
    + - CABXA
    + - XA

    Adding: 'B'
    Remainder is 2
    The active point is currently: {Node 0,'a',1}
    Looking for edge starting with 'A'
    Found implicit match in edge [0:'AB']
    Match is at end of edge; active point will be moved to the connecting node.
    0 + - AB ---- 1 + - CABXAB
    | + - XAB
    + - B ----- 2 + - CABXAB
    | + - XAB
    + - CABXAB
    + - XAB

    Adding: 'C'
    Remainder is 3
    The active point is currently: {Node 1,'\0',0}
    Found 'C' on edge [1:'CABXABC']; updating active point.
    0 + - AB ----- 1 + - CABXABC
    | + - XABC
    + - B ------ 2 + - CABXABC
    | + - XABC
    + - CABXABC
    + - XABC

    Adding: 'D'
    Remainder is 4
    The active point is currently: {Node 1,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [1:'CABXABCD'] --> [1:'C'] => 3 => [3:'ABXABCD'] / [3:'D']
    Changed the active node from 1 to 2
    0 + - AB ------ 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ------- 2 + - CABXABCD
    | + - XABCD
    + - CABXABCD
    + - XABCD

    The active point is currently: {Node 2,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [2:'CABXABCD'] --> [2:'C'] => 4 => [4:'ABXABCD'] / [4:'D']
    Connected node 3 ---> node 4
    Changed the active node from 2 to 0
    0 + - AB ------ 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ------- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - CABXABCD
    + - XABCD

    The active point is currently: {Node 0,'c',1}
    Looking for edge starting with 'C'
    Splitting edge [0:'CABXABCD'] --> [0:'C'] => 5 => [5:'ABXABCD'] / [5:'D']
    Connected node 4 ---> node 5
    0 + - AB --- 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ---- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - C ---- 5 + - ABXABCD
    | + - D
    + - XABCD

    The active point is currently: {Node 0,'\0',0}
    Inserting singular edge for 'D' from node 0
    0 + - AB --- 1 + - C ---- 3 + - ABXABCD
    | + - D
    | + - XABCD
    + - B ---- 2 + - C ---- 4 + - ABXABCD
    | + - D
    | + - XABCD
    + - C ---- 5 + - ABXABCD
    | + - D
    + - XABCD
    + - D
    223 changes: 223 additions & 0 deletions suffixtree.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,223 @@
    using System;
    using System.Collections.Generic;
    using System.IO;
    using System.Linq;

    namespace Shiny
    {
    public class SuffixTree
    {
    // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english

    private ActivePoint _activePoint;

    public string Word { get; set; }
    public Node RootNode { get; set; }
    public int InsertionPoint { get; set; }
    public int NextNodeNumber { get; set; }

    public SuffixTree(string word)
    {
    Word = word;
    RootNode = new Node { NodeNumber = NextNodeNumber++ };
    Build();
    }

    private void Build()
    {
    _activePoint = new ActivePoint
    {
    ActiveNode = RootNode,
    ActiveEdgeStartsWith = '\0',
    ActiveLength = 0
    };
    var remainder = 1;

    InsertionPoint = 0;
    for(InsertionPoint = 0; InsertionPoint < Word.Length; InsertionPoint++)
    {
    var c = Word[InsertionPoint];
    Console.WriteLine("Adding: '" + c.ToString().ToUpper() + "'");
    var suffixesToInsert = remainder;
    Node previousNode = null;
    var implicitComplete = false;
    Console.WriteLine("Remainder is " + remainder);
    for(var i = 0; i < suffixesToInsert && !implicitComplete; i++)
    {
    Console.WriteLine("The active point is currently: " + _activePoint);
    Edge edge;
    if(_activePoint.ActiveEdgeStartsWith != '\0')
    {
    Console.WriteLine("Looking for edge starting with '" + _activePoint.ActiveEdgeStartsWith.ToString().ToUpper() + "'");
    edge = _activePoint.ActiveNode.Edges[_activePoint.ActiveEdgeStartsWith];
    if(Word[edge.StartIndex + _activePoint.ActiveLength] == c)
    {
    Console.WriteLine("Found implicit match in edge " + edge);
    _activePoint.ActiveLength++;
    remainder++;
    if(_activePoint.ActiveLength == edge.Length)
    {
    Console.WriteLine("Match is at end of edge; active point will be moved to the connecting node.");
    _activePoint.ActiveNode = edge.Tail;
    _activePoint.ActiveEdgeStartsWith = '\0';
    _activePoint.ActiveLength = 0;
    }
    implicitComplete = true;
    }
    else
    {
    Console.Write("Splitting edge " + edge + " --> ");
    edge.EndIndex = edge.StartIndex + _activePoint.ActiveLength - 1;
    edge.Tail = new Node { Stem = edge, NodeNumber = NextNodeNumber++ };
    var subdivisionStartIndex = edge.EndIndex.Value + 1;
    var subdividedStartChar = Word[subdivisionStartIndex];
    edge.Tail.Edges.Add(subdividedStartChar, new Edge(this) { StartIndex = subdivisionStartIndex, Head = edge.Tail });
    edge.Tail.Edges.Add(c, new Edge(this) { StartIndex = InsertionPoint, Head = edge.Tail });
    Console.WriteLine(edge + " => " + edge.Tail.NodeNumber + " => " + edge.Tail.Edges.First().Value + " / " + edge.Tail.Edges.Last().Value);
    if(i > 0 && previousNode != null)
    {
    Console.WriteLine("Connected node " + previousNode.NodeNumber + " ---> node " + edge.Tail.NodeNumber);
    previousNode.LinkedNode = edge.Tail;
    }
    previousNode = edge.Tail;

    remainder--;
    if(_activePoint.ActiveNode.NodeNumber > 0)
    {
    Console.Write("Changed the active node from " + _activePoint.ActiveNode.NodeNumber + " to ");
    _activePoint.ActiveNode = _activePoint.ActiveNode.LinkedNode ?? RootNode;
    Console.WriteLine(_activePoint.ActiveNode.NodeNumber);
    }
    else
    {
    _activePoint.ActiveLength--;
    _activePoint.ActiveEdgeStartsWith = _activePoint.ActiveLength == 0 ? '\0' : Word[InsertionPoint - _activePoint.ActiveLength];
    }
    }
    }
    else if(_activePoint.ActiveNode.Edges.TryGetValue(c, out edge))
    {
    Console.WriteLine("Found '" + c.ToString().ToUpper() + "' on edge " + edge + "; updating active point.");
    _activePoint.ActiveEdgeStartsWith = c;
    _activePoint.ActiveLength = 1;
    remainder++;
    implicitComplete = true;
    }
    else
    {
    Console.WriteLine("Inserting singular edge for '" + c.ToString().ToUpper() + "' from node " + _activePoint.ActiveNode.NodeNumber);
    edge = new Edge(this) { StartIndex = InsertionPoint, Head = _activePoint.ActiveNode };
    _activePoint.ActiveNode.Edges.Add(c, edge);
    }
    Console.WriteLine(WriteText());
    }
    }
    }

    public string WriteText()
    {
    var writer = new StringWriter();
    RootNode.WriteText(writer, 0);
    return writer.ToString();
    }
    }

    public class Node
    {
    public Dictionary<char, Edge> Edges { get; private set; }
    public Node LinkedNode { get; set; }
    public Edge Stem { get; set; }
    public int NodeNumber { get; set; }

    public Node()
    {
    Edges = new Dictionary<char, Edge>();
    }

    public void WriteText(TextWriter writer, int indent)
    {
    writer.Write(NodeNumber);

    var i = 0;
    var maxEdgeLength = Edges.Values.Max(e => (e.EndIndex ?? e.Tree.InsertionPoint) - e.StartIndex);
    foreach(var edge in Edges.Values)
    {
    if(i++ == 0)
    writer.Write(" +");
    else
    {
    writer.Write(" ");
    if(indent > 0)
    {
    writer.Write("|");
    writer.Write(new string(' ', indent - 1));
    }
    writer.Write("+");
    }
    edge.WriteText(writer, indent + 3, maxEdgeLength);
    }
    }

    public int Depth
    {
    get { return Stem == null ? 0 : Stem.Length + Stem.Head.Depth; }
    }

    public override string ToString()
    {
    return "Node " + NodeNumber;
    }
    }

    public class Edge
    {
    public Edge(SuffixTree tree)
    {
    Tree = tree;
    }

    public SuffixTree Tree { get; set; }
    public Node Head { get; set; }
    public Node Tail { get; set; }
    public int StartIndex { get; set; }
    public int? EndIndex { get; set; }

    public override string ToString()
    {
    return string.Concat("[", Head.NodeNumber, ":'", Tree.Word.Substring(StartIndex, Length).ToUpper(), "']");
    }

    public int Length
    {
    get { return (EndIndex ?? Math.Min(Tree.Word.Length - 1, Tree.InsertionPoint)) - StartIndex + 1; }
    }

    public void WriteText(TextWriter writer, int indent, int maxEdgeLength)
    {
    writer.Write(" - ");
    var str = Tree.Word.Substring(StartIndex, Length).ToUpper();
    writer.Write(str);
    if(Tail == null)
    writer.WriteLine("");
    else
    {
    writer.Write(" ");
    writer.Write(new string('-', 1 + maxEdgeLength - str.Length));
    writer.Write(" ");
    Tail.WriteText(writer, indent + maxEdgeLength + 6);
    }
    }
    }

    public class ActivePoint
    {
    public Node ActiveNode { get; set; }
    public char ActiveEdgeStartsWith { get; set; }
    public int ActiveLength { get; set; }

    public override string ToString()
    {
    return string.Concat("{", ActiveNode, ",'", ActiveEdgeStartsWith == '\0' ? "\\0" : ActiveEdgeStartsWith.ToString(), "',", ActiveLength, "}");
    }
    }
    }
    4 changes: 4 additions & 0 deletions test.cs
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,4 @@
    static void Main()
    {
    var tree = new SuffixTree("abcabxabcd");
    }