Revisions
-
matklad revised this gist
May 7, 2012 . 1 changed file with 590 additions and 471 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -6,482 +6,601 @@ namespace SuffixTreeAlgorithm { public class SuffixTree { public SuffixTree(string word) { Word = word; RootNode = new Node(this); ActiveNode = RootNode; //Message = TreeUpdate; } public char? CanonizationChar { get; set; } public string Word { get; private set; } private int CurrentSuffixStartIndex { get; set; } private int CurrentSuffixEndIndex { get; set; } private Node LastCreatedNodeInCurrentIteration { get; set; } private int UnresolvedSuffixes { get; set; } public Node RootNode { get; private set; } private Node ActiveNode { get; set; } private Edge ActiveEdge { get; set; } private int DistanceIntoActiveEdge { get; set; } private char LastCharacterOfCurrentSuffix { get; set; } private int NextNodeNumber { get; set; } private int NextEdgeNumber { get; set; } public static void Main() { Test(); Console.ReadKey(); } public static void Test() { const int testCount = 1000; const int strLength = 20; var r = new Random(); for (int testNo = 0; testNo < testCount; testNo++) { var sb = new StringBuilder(); for (int i = 0; i < strLength; i++) { char c = r.Next(2) == 0 ? 'a' : 'b'; sb.Append(c); } String s = sb.ToString(); int st = CountSubstringsWithSuffixTree(s); int bf = CountSubstringsWithBruteForce(s); if (st != bf) { Console.WriteLine("Error"); Console.WriteLine(s); Console.WriteLine(st); Console.WriteLine(bf); break; } } Console.WriteLine("Test ended"); } public static int CountSubstringsWithSuffixTree(string s) { SuffixTree t = Create(s); return CountEdgesLength(t.RootNode) - s.Length - 1; } private static int CountEdgesLength(Node node) { int ans = 0; foreach (Edge edge in node.Edges.Values) { ans += edge.Length; if (edge.Tail != null) { ans += CountEdgesLength(edge.Tail); } } return ans; } public static int CountSubstringsWithBruteForce(string s) { var substrs = new HashSet<string>(); for (int i = 0; i < s.Length; i++) { for (int l = 1; l <= s.Length - i; l++) { string subString = s.Substring(i, l); substrs.Add(subString); } } return substrs.Count(); } private static void TreeUpdate(string f, object[] args) { Console.WriteLine(f, args); } public event Action<SuffixTree> Changed; private void TriggerChanged() { Action<SuffixTree> handler = Changed; if (handler != null) handler(this); } public event Action<string, object[]> Message; private void SendMessage(string format, params object[] args) { Action<string, object[]> handler = Message; if (handler != null) handler(format, args); } public static SuffixTree Create(string word, char canonizationChar = '$') { var tree = new SuffixTree(word); tree.Build(canonizationChar); return tree; } public void Build(char canonizationChar) { CanonizationChar = canonizationChar; Word = string.Concat(Word, canonizationChar); for (CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) { SendMessage("=== ITERATION {0} ===", CurrentSuffixEndIndex); LastCreatedNodeInCurrentIteration = null; LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex]; for (CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++) { bool wasImplicitlyAdded = !AddNextSuffix(); SendMessage("\n{0}", RenderTree()); if (wasImplicitlyAdded) { UnresolvedSuffixes++; break; } if (UnresolvedSuffixes > 0) UnresolvedSuffixes--; } } } private bool AddNextSuffix() { string suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}"); SendMessage("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex); SendMessage(" => ActiveNode: {0}", ActiveNode); SendMessage(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString()); SendMessage(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge); SendMessage(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes); if (ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) throw new Exception("BOUNDARY EXCEEDED"); if (ActiveEdge != null) return AddCurrentSuffixToActiveEdge(); if (LastCreatedNodeInCurrentIteration != null) { LastCreatedNodeInCurrentIteration.LinkedNode = ActiveNode; SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveNode); TriggerChanged(); } if (GetExistingEdgeAndSetAsActive()) return false; ActiveNode.AddNewEdge(); TriggerChanged(); UpdateActivePointAfterAddingNewEdge(); return true; } private bool GetExistingEdgeAndSetAsActive() { Edge edge; if (ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge)) { SendMessage("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix); ActiveEdge = edge; DistanceIntoActiveEdge = 1; TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); SendMessage(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes); return true; } SendMessage("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix); return false; } private bool AddCurrentSuffixToActiveEdge() { char nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge]; if (nextCharacterOnEdge == LastCharacterOfCurrentSuffix) { SendMessage("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix); DistanceIntoActiveEdge++; TriggerChanged(); SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); return false; } SplitActiveEdge(); ActiveEdge.Tail.AddNewEdge(); TriggerChanged(); UpdateActivePointAfterAddingNewEdge(); return true; } private void UpdateActivePointAfterAddingNewEdge() { if (ReferenceEquals(ActiveNode, RootNode)) { if (DistanceIntoActiveEdge > 0) { SendMessage( "New edge has been added and the active node is root. The active edge will now be updated."); DistanceIntoActiveEdge--; SendMessage(" => DistanceIntoActiveEdge decremented to: {0}", DistanceIntoActiveEdge); ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1); } } else UpdateActivePointToLinkedNodeOrRoot(); } private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge) { int walkDistance = 0; while (ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) { SendMessage( "Active point is at or beyond edge boundary and will be moved until it falls inside an edge boundary"); DistanceIntoActiveEdge -= ActiveEdge.Length; ActiveNode = ActiveEdge.Tail ?? RootNode; if (DistanceIntoActiveEdge == 0) ActiveEdge = null; else { walkDistance += ActiveEdge.Length; char c = Word[firstIndexOfOriginalActiveEdge + walkDistance]; ActiveEdge = ActiveNode.Edges[c]; } TriggerChanged(); } } private void SplitActiveEdge() { ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge); SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); TriggerChanged(); if (LastCreatedNodeInCurrentIteration != null) { LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail; SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail); TriggerChanged(); } LastCreatedNodeInCurrentIteration = ActiveEdge.Tail; LastCreatedNodeInCurrentIteration = ActiveEdge.Tail; } private void UpdateActivePointToLinkedNodeOrRoot() { SendMessage("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString()); if (ActiveNode.LinkedNode != null) { ActiveNode = ActiveNode.LinkedNode; SendMessage(" => ActiveNode is now: {0}", ActiveNode); } else { ActiveNode = RootNode; SendMessage(" => ActiveNode is now ROOT", ActiveNode); } TriggerChanged(); if (ActiveEdge != null) { int firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex; ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]]; TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge); } } public string RenderTree() { var writer = new StringWriter(); RootNode.RenderTree(writer, ""); return writer.ToString(); } public string WriteDotGraph() { var sb = new StringBuilder(); sb.AppendLine("digraph {"); sb.AppendLine("rankdir = LR;"); sb.AppendLine("edge [arrowsize=0.5,fontsize=11];"); for (int i = 0; i < NextNodeNumber; i++) sb.AppendFormat( "node{0} [label=\"{0}\",style=filled,fillcolor={1},shape=circle,width=.1,height=.1,fontsize=11,margin=0.01];", i, ActiveNode.NodeNumber == i ? "cyan" : "lightgrey").AppendLine(); RootNode.WriteDotGraph(sb); sb.AppendLine("}"); return sb.ToString(); } public HashSet<string> ExtractAllSubstrings() { var set = new HashSet<string>(); ExtractAllSubstrings("", set, RootNode); return set; } private void ExtractAllSubstrings(string str, HashSet<string> set, Node node) { foreach (Edge edge in node.Edges.Values) { string edgeStr = edge.StringWithoutCanonizationChar; int edgeLength = !edge.EndIndex.HasValue && CanonizationChar.HasValue ? edge.Length - 1 : edge.Length; // assume tailing canonization char for (int length = 1; length <= edgeLength; length++) set.Add(string.Concat(str, edgeStr.Substring(0, length))); if (edge.Tail != null) ExtractAllSubstrings(string.Concat(str, edge.StringWithoutCanonizationChar), set, edge.Tail); } } public List<string> ExtractSubstringsForIndexing(int? maxLength = null) { var list = new List<string>(); ExtractSubstringsForIndexing("", list, maxLength ?? Word.Length, RootNode); return list; } private void ExtractSubstringsForIndexing(string str, List<string> list, int len, Node node) { foreach (Edge edge in node.Edges.Values) { string newstr = string.Concat(str, Word.Substring(edge.StartIndex, Math.Min(len, edge.Length))); if (len > edge.Length && edge.Tail != null) ExtractSubstringsForIndexing(newstr, list, len - edge.Length, edge.Tail); else list.Add(newstr); } } #region Nested type: Edge public class Edge { private readonly SuffixTree _tree; public Edge(SuffixTree tree, Node head) { _tree = tree; Head = head; StartIndex = tree.CurrentSuffixEndIndex; EdgeNumber = _tree.NextEdgeNumber++; } public Node Head { get; private set; } public Node Tail { get; private set; } public int StartIndex { get; private set; } public int? EndIndex { get; set; } public int EdgeNumber { get; private set; } public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } } public string StringWithoutCanonizationChar { get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex - (_tree.CanonizationChar.HasValue ? 1 : 0)) - StartIndex + 1); } } public string String { get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); } } public Edge SplitAtIndex(int index) { _tree.SendMessage("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]); var newEdge = new Edge(_tree, Head); var newNode = new Node(_tree); newEdge.Tail = newNode; newEdge.StartIndex = StartIndex; newEdge.EndIndex = index - 1; Head = newNode; StartIndex = index; newNode.Edges.Add(_tree.Word[StartIndex], this); newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge; _tree.SendMessage(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this); return newEdge; } public override string ToString() { return string.Concat( _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1), "(", StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")"); } public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength) { string strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); writer.Write(strEdge); if (Tail == null) writer.WriteLine(); else { var line = new string(RenderChars.HorizontalLine, maxEdgeLength - strEdge.Length + 1); writer.Write(line); Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length))); } } public void WriteDotGraph(StringBuilder sb) { if (Tail == null) sb.AppendFormat("leaf{0} [label=\"\",shape=point]", EdgeNumber).AppendLine(); string label, weight, color; if (_tree.ActiveEdge != null && ReferenceEquals(this, _tree.ActiveEdge)) { if (_tree.ActiveEdge.Length == 0) label = ""; else if (_tree.DistanceIntoActiveEdge > Length) label = "<<FONT COLOR=\"red\" SIZE=\"11\"><B>" + String + "</B> (" + _tree.DistanceIntoActiveEdge + ")</FONT>>"; else if (_tree.DistanceIntoActiveEdge == Length) label = "<<FONT COLOR=\"red\" SIZE=\"11\">" + String + "</FONT>>"; else if (_tree.DistanceIntoActiveEdge > 0) label = "<<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\"><TR><TD><FONT COLOR=\"blue\"><B>" + String.Substring(0, _tree.DistanceIntoActiveEdge) + "</B></FONT></TD><TD COLOR=\"black\">" + String.Substring(_tree.DistanceIntoActiveEdge) + "</TD></TR></TABLE>>"; else label = "\"" + String + "\""; color = "blue"; weight = "5"; } else { label = "\"" + String + "\""; color = "black"; weight = "3"; } string tail = Tail == null ? "leaf" + EdgeNumber : "node" + Tail.NodeNumber; sb.AppendFormat("node{0} -> {1} [label={2},weight={3},color={4},size=11]", Head.NodeNumber, tail, label, weight, color).AppendLine(); if (Tail != null) Tail.WriteDotGraph(sb); } } #endregion #region Nested type: Node public class Node { private readonly SuffixTree _tree; public Node(SuffixTree tree) { _tree = tree; Edges = new Dictionary<char, Edge>(); NodeNumber = _tree.NextNodeNumber++; } public Dictionary<char, Edge> Edges { get; private set; } public Node LinkedNode { get; set; } public int NodeNumber { get; private set; } public void AddNewEdge() { _tree.SendMessage("Adding new edge to {0}", this); var edge = new Edge(_tree, this); Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge); _tree.SendMessage(" => {0} --> {1}", this, edge); } public void RenderTree(TextWriter writer, string prefix) { string strNode = string.Concat("(", NodeNumber.ToString(new string('0', _tree.NextNodeNumber.ToString().Length)), ")"); writer.Write(strNode); Edge[] edges = Edges.Select(kvp => kvp.Value).OrderBy(e => _tree.Word[e.StartIndex]).ToArray(); if (edges.Any()) { string prefixWithNodePadding = prefix + new string(' ', strNode.Length); int maxEdgeLength = edges.Max(e => (e.EndIndex ?? _tree.CurrentSuffixEndIndex) - e.StartIndex + 1); for (int i = 0; i < edges.Length; i++) { char connector, extender = ' '; if (i == 0) { if (edges.Length > 1) { connector = RenderChars.TJunctionDown; extender = RenderChars.VerticalLine; } else connector = RenderChars.HorizontalLine; } else { writer.Write(prefixWithNodePadding); if (i == edges.Length - 1) connector = RenderChars.CornerRight; else { connector = RenderChars.TJunctionRight; extender = RenderChars.VerticalLine; } } writer.Write(string.Concat(connector, RenderChars.HorizontalLine)); string newPrefix = string.Concat(prefixWithNodePadding, extender, ' '); edges[i].RenderTree(writer, newPrefix, maxEdgeLength); } } } public override string ToString() { return string.Concat("node #", NodeNumber); } public void WriteDotGraph(StringBuilder sb) { if (LinkedNode != null) sb.AppendFormat("node{0} -> node{1} [label=\"\",weight=.01,style=dotted]", NodeNumber, LinkedNode.NodeNumber).AppendLine(); foreach (Edge edge in Edges.Values) edge.WriteDotGraph(sb); } } #endregion #region Nested type: RenderChars public static class RenderChars { public const char TJunctionDown = '┬'; public const char HorizontalLine = '─'; public const char VerticalLine = '│'; public const char TJunctionRight = '├'; public const char CornerRight = '└'; } #endregion } } -
matklad revised this gist
May 7, 2012 . 1 changed file with 11 additions and 0 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,6 +8,15 @@ namespace SuffixTreeAlgorithm { public class SuffixTree { static void Main() { SuffixTree.Create("aabaaa"); Console.ReadKey(); } private void TreeUpdate(string f, object[] args){ Console.WriteLine(f, args); } public char? CanonizationChar { get; set; } public string Word { get; private set; } private int CurrentSuffixStartIndex { get; set; } @@ -27,6 +36,7 @@ public SuffixTree(string word) Word = word; RootNode = new Node(this); ActiveNode = RootNode; Message = TreeUpdate; } public event Action<SuffixTree> Changed; @@ -71,6 +81,7 @@ public void Build(char canonizationChar) for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++) { var wasImplicitlyAdded = !AddNextSuffix(); SendMessage("\n{0}", RenderTree()); if(wasImplicitlyAdded) { UnresolvedSuffixes++; -
axefrog revised this gist
May 6, 2012 . 1 changed file with 149 additions and 29 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -2,11 +2,13 @@ using System.Collections.Generic; using System.IO; using System.Linq; using System.Text; namespace SuffixTreeAlgorithm { public class SuffixTree { public char? CanonizationChar { get; set; } public string Word { get; private set; } private int CurrentSuffixStartIndex { get; set; } private int CurrentSuffixEndIndex { get; set; } @@ -18,39 +20,57 @@ public class SuffixTree private int DistanceIntoActiveEdge { get; set; } private char LastCharacterOfCurrentSuffix { get; set; } private int NextNodeNumber { get; set; } private int NextEdgeNumber { get; set; } public SuffixTree(string word) { Word = word; RootNode = new Node(this); ActiveNode = RootNode; } public event Action<SuffixTree> Changed; private void TriggerChanged() { var handler = Changed; if(handler != null) handler(this); } public event Action<string, object[]> Message; private void SendMessage(string format, params object[] args) { var handler = Message; if(handler != null) handler(format, args); } public static SuffixTree Create(string word, char canonizationChar = '$') { var tree = new SuffixTree(word); tree.Build(canonizationChar); return tree; } public void Build(char canonizationChar) { var n = Word.IndexOf(Word[Word.Length - 1]); var mustCanonize = n < Word.Length - 1; if(mustCanonize) { CanonizationChar = canonizationChar; Word = string.Concat(Word, canonizationChar); } for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) { SendMessage("=== ITERATION {0} ===", CurrentSuffixEndIndex); LastCreatedNodeInCurrentIteration = null; LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex]; for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++) { var wasImplicitlyAdded = !AddNextSuffix(); if(wasImplicitlyAdded) { UnresolvedSuffixes++; @@ -65,11 +85,11 @@ private void Build(char canonizationChar) private bool AddNextSuffix() { var suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}"); SendMessage("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex); SendMessage(" => ActiveNode: {0}", ActiveNode); SendMessage(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString()); SendMessage(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge); SendMessage(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes); if(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) throw new Exception("BOUNDARY EXCEEDED"); @@ -80,6 +100,8 @@ private bool AddNextSuffix() return false; ActiveNode.AddNewEdge(); TriggerChanged(); UpdateActivePointAfterAddingNewEdge(); return true; } @@ -89,17 +111,19 @@ private bool GetExistingEdgeAndSetAsActive() Edge edge; if(ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge)) { SendMessage("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix); ActiveEdge = edge; DistanceIntoActiveEdge = 1; TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); SendMessage(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes); return true; } SendMessage("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix); return false; } @@ -108,16 +132,20 @@ private bool AddCurrentSuffixToActiveEdge() var nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge]; if(nextCharacterOnEdge == LastCharacterOfCurrentSuffix) { SendMessage("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix); DistanceIntoActiveEdge++; TriggerChanged(); SendMessage(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); return false; } SplitActiveEdge(); ActiveEdge.Tail.AddNewEdge(); TriggerChanged(); UpdateActivePointAfterAddingNewEdge(); return true; @@ -129,8 +157,13 @@ private void UpdateActivePointAfterAddingNewEdge() { if(DistanceIntoActiveEdge > 0) { SendMessage("New edge has been added and the active node is root. The active edge will now be updated."); DistanceIntoActiveEdge--; SendMessage(" => DistanceIntoActiveEdge decremented to: {0}", DistanceIntoActiveEdge); ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1); } } @@ -143,9 +176,9 @@ private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOri var walkDistance = 0; while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) { SendMessage("Active point is at or beyond edge boundary and will be moved until it falls inside an edge boundary"); DistanceIntoActiveEdge -= ActiveEdge.Length; ActiveNode = ActiveEdge.Tail ?? RootNode; if(DistanceIntoActiveEdge == 0) ActiveEdge = null; else @@ -154,38 +187,44 @@ private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOri var c = Word[firstIndexOfOriginalActiveEdge + walkDistance]; ActiveEdge = ActiveNode.Edges[c]; } TriggerChanged(); } } private void SplitActiveEdge() { ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge); SendMessage(" => ActiveEdge is now: {0}", ActiveEdge); TriggerChanged(); if(LastCreatedNodeInCurrentIteration != null) { LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail; SendMessage(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail); TriggerChanged(); } LastCreatedNodeInCurrentIteration = ActiveEdge.Tail; } private void UpdateActivePointToLinkedNodeOrRoot() { SendMessage("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString()); if(ActiveNode.LinkedNode != null) { ActiveNode = ActiveNode.LinkedNode; SendMessage(" => ActiveNode is now: {0}", ActiveNode); } else { ActiveNode = RootNode; SendMessage(" => ActiveNode is now ROOT", ActiveNode); } TriggerChanged(); if(ActiveEdge != null) { var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex; ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]]; TriggerChanged(); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge); } } @@ -197,6 +236,40 @@ public string RenderTree() return writer.ToString(); } public string WriteDotGraph() { var sb = new StringBuilder(); sb.AppendLine("digraph {"); sb.AppendLine("rankdir = LR;"); sb.AppendLine("edge [arrowsize=0.5,fontsize=11];"); for(var i = 0; i < NextNodeNumber; i++) sb.AppendFormat("node{0} [label=\"{0}\",style=filled,fillcolor={1},shape=circle,width=.1,height=.1,fontsize=11,margin=0.01];", i, ActiveNode.NodeNumber == i ? "cyan" : "lightgrey").AppendLine(); RootNode.WriteDotGraph(sb); sb.AppendLine("}"); return sb.ToString(); } public HashSet<string> ExtractAllSubstrings() { var set = new HashSet<string>(); ExtractAllSubstrings("", set, RootNode); return set; } private void ExtractAllSubstrings(string str, HashSet<string> set, Node node) { foreach(var edge in node.Edges.Values) { var edgeStr = edge.StringWithoutCanonizationChar; var edgeLength = !edge.EndIndex.HasValue && CanonizationChar.HasValue ? edge.Length - 1 : edge.Length; // assume tailing canonization char for(var length = 1; length <= edgeLength; length++) set.Add(string.Concat(str, edgeStr.Substring(0, length))); if(edge.Tail != null) ExtractAllSubstrings(string.Concat(str, edge.StringWithoutCanonizationChar), set, edge.Tail); } } public List<string> ExtractSubstringsForIndexing(int? maxLength = null) { var list = new List<string>(); @@ -225,17 +298,19 @@ public Edge(SuffixTree tree, Node head) _tree = tree; Head = head; StartIndex = tree.CurrentSuffixEndIndex; EdgeNumber = _tree.NextEdgeNumber++; } public Node Head { get; private set; } public Node Tail { get; private set; } public int StartIndex { get; private set; } public int? EndIndex { get; set; } public int EdgeNumber { get; private set; } public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } } public Edge SplitAtIndex(int index) { _tree.SendMessage("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]); var newEdge = new Edge(_tree, Head); var newNode = new Node(_tree); newEdge.Tail = newNode; @@ -245,7 +320,7 @@ public Edge SplitAtIndex(int index) StartIndex = index; newNode.Edges.Add(_tree.Word[StartIndex], this); newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge; _tree.SendMessage(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this); return newEdge; } @@ -255,9 +330,14 @@ public override string ToString() StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")"); } public string StringWithoutCanonizationChar { get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex - (_tree.CanonizationChar.HasValue ? 1 : 0)) - StartIndex + 1); } } public string String { get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); } } public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength) @@ -273,6 +353,38 @@ public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength) Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length))); } } public void WriteDotGraph(StringBuilder sb) { if(Tail == null) sb.AppendFormat("leaf{0} [label=\"\",shape=point]", EdgeNumber).AppendLine(); string label, weight, color; if(_tree.ActiveEdge != null && ReferenceEquals(this, _tree.ActiveEdge)) { if(_tree.ActiveEdge.Length == 0) label = ""; else if(_tree.DistanceIntoActiveEdge > Length) label = "<<FONT COLOR=\"red\" SIZE=\"11\"><B>" + String + "</B> (" + _tree.DistanceIntoActiveEdge + ")</FONT>>"; else if(_tree.DistanceIntoActiveEdge == Length) label = "<<FONT COLOR=\"red\" SIZE=\"11\">" + String + "</FONT>>"; else if(_tree.DistanceIntoActiveEdge > 0) label = "<<TABLE BORDER=\"0\" CELLPADDING=\"0\" CELLSPACING=\"0\"><TR><TD><FONT COLOR=\"blue\"><B>" + String.Substring(0, _tree.DistanceIntoActiveEdge) + "</B></FONT></TD><TD COLOR=\"black\">" + String.Substring(_tree.DistanceIntoActiveEdge) + "</TD></TR></TABLE>>"; else label = "\"" + String + "\""; color = "blue"; weight = "5"; } else { label = "\"" + String + "\""; color = "black"; weight = "3"; } var tail = Tail == null ? "leaf" + EdgeNumber : "node" + Tail.NodeNumber; sb.AppendFormat("node{0} -> {1} [label={2},weight={3},color={4},size=11]", Head.NodeNumber, tail, label, weight, color).AppendLine(); if(Tail != null) Tail.WriteDotGraph(sb); } } public class Node @@ -292,10 +404,10 @@ public Node(SuffixTree tree) public void AddNewEdge() { _tree.SendMessage("Adding new edge to {0}", this); var edge = new Edge(_tree, this); Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge); _tree.SendMessage(" => {0} --> {1}", this, edge); } public void RenderTree(TextWriter writer, string prefix) @@ -342,6 +454,14 @@ public override string ToString() { return string.Concat("node #", NodeNumber); } public void WriteDotGraph(StringBuilder sb) { if(LinkedNode != null) sb.AppendFormat("node{0} -> node{1} [label=\"\",weight=.01,style=dotted]", NodeNumber, LinkedNode.NodeNumber).AppendLine(); foreach(var edge in Edges.Values) edge.WriteDotGraph(sb); } } public static class RenderChars @@ -353,4 +473,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } -
axefrog revised this gist
May 2, 2012 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -131,7 +131,7 @@ private void UpdateActivePointAfterAddingNewEdge() { DistanceIntoActiveEdge--; ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(CurrentSuffixStartIndex + 1); } } else @@ -353,4 +353,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } -
axefrog revised this gist
Apr 27, 2012 . 2 changed files with 346 additions and 4 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -38,7 +38,7 @@ private void Build(char canonizationChar) var n = Word.IndexOf(Word[Word.Length - 1]); var mustCanonize = n < Word.Length - 1; if(mustCanonize) Word = string.Concat(Word, canonizationChar); for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) { @@ -130,9 +130,8 @@ private void UpdateActivePointAfterAddingNewEdge() if(DistanceIntoActiveEdge > 0) { DistanceIntoActiveEdge--; ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge == null ? 0 : ActiveEdge.StartIndex); } } else @@ -198,6 +197,25 @@ public string RenderTree() return writer.ToString(); } public List<string> ExtractSubstringsForIndexing(int? maxLength = null) { var list = new List<string>(); ExtractSubstringsForIndexing("", list, maxLength ?? Word.Length, RootNode); return list; } private void ExtractSubstringsForIndexing(string str, List<string> list, int len, Node node) { foreach(var edge in node.Edges.Values) { var newstr = string.Concat(str, Word.Substring(edge.StartIndex, Math.Min(len, edge.Length))); if(len > edge.Length && edge.Tail != null) ExtractSubstringsForIndexing(newstr, list, len - edge.Length, edge.Tail); else list.Add(newstr); } } public class Edge { private readonly SuffixTree _tree; @@ -335,4 +353,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,324 @@ === ITERATION 0 === The next suffix of 'almasamolmaz' to add is '{a}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' not found Adding new edge to node #0 => node #0 --> a(0,#) (0)──a === ITERATION 1 === The next suffix of 'almasamolmaz' to add is '{l}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'l' not found Adding new edge to node #0 => node #0 --> l(1,#) (0)┬─al └─l === ITERATION 2 === The next suffix of 'almasamolmaz' to add is '{m}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'm' not found Adding new edge to node #0 => node #0 --> m(2,#) (0)┬─alm ├─lm └─m === ITERATION 3 === The next suffix of 'almasamolmaz' to add is '{a}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: alma(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─alma ├─lma └─ma === ITERATION 4 === The next suffix of 'almasamolmaz' to add is 'a{s}' at indices 3,4 => ActiveNode: node #0 => ActiveEdge: almas(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge almas(0,#) at index 1 ('l') => Hierarchy is now: node #0 --> a(0,0) --> node #1 --> lmas(1,#) => ActiveEdge is now: a(0,0) Adding new edge to node #1 => node #1 --> s(4,#) (0)┬─a────(1)┬─lmas │ └─s ├─lmas └─mas The next suffix of 'almasamolmaz' to add is '{s}' at indices 4,4 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 's' not found Adding new edge to node #0 => node #0 --> s(4,#) (0)┬─a────(1)┬─lmas │ └─s ├─lmas ├─mas └─s === ITERATION 5 === The next suffix of 'almasamolmaz' to add is '{a}' at indices 5,5 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─a─────(1)┬─lmasa │ └─sa ├─lmasa ├─masa └─sa === ITERATION 6 === The next suffix of 'almasamolmaz' to add is 'a{m}' at indices 5,6 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'm' not found Adding new edge to node #1 => node #1 --> m(6,#) The linked node for active node node #1 is [null] (0)┬─a──────(1)┬─lmasam │ ├─m │ └─sam ├─lmasam ├─masam └─sam The next suffix of 'almasamolmaz' to add is '{m}' at indices 6,6 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'm' found. Values adjusted to: => ActiveEdge is now: masam(2,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─a──────(1)┬─lmasam │ ├─m │ └─sam ├─lmasam ├─masam └─sam === ITERATION 7 === The next suffix of 'almasamolmaz' to add is 'm{o}' at indices 6,7 => ActiveNode: node #0 => ActiveEdge: masamo(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge masamo(2,#) at index 3 ('a') => Hierarchy is now: node #0 --> m(2,2) --> node #2 --> asamo(3,#) => ActiveEdge is now: m(2,2) Adding new edge to node #2 => node #2 --> o(7,#) (0)┬─a───────(1)┬─lmasamo │ ├─mo │ └─samo ├─lmasamo ├─m───────(2)┬─asamo │ └─o └─samo The next suffix of 'almasamolmaz' to add is '{o}' at indices 7,7 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' not found Adding new edge to node #0 => node #0 --> o(7,#) (0)┬─a───────(1)┬─lmasamo │ ├─mo │ └─samo ├─lmasamo ├─m───────(2)┬─asamo │ └─o ├─o └─samo === ITERATION 8 === The next suffix of 'almasamolmaz' to add is '{l}' at indices 8,8 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'l' found. Values adjusted to: => ActiveEdge is now: lmasamol(1,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─a────────(1)┬─lmasamol │ ├─mol │ └─samol ├─lmasamol ├─m────────(2)┬─asamol │ └─ol ├─ol └─samol === ITERATION 9 === The next suffix of 'almasamolmaz' to add is 'l{m}' at indices 8,9 => ActiveNode: node #0 => ActiveEdge: lmasamolm(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'm' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─a─────────(1)┬─lmasamolm │ ├─molm │ └─samolm ├─lmasamolm ├─m─────────(2)┬─asamolm │ └─olm ├─olm └─samolm === ITERATION 10 === The next suffix of 'almasamolmaz' to add is 'lm{a}' at indices 8,10 => ActiveNode: node #0 => ActiveEdge: lmasamolma(1,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 The next character on the current edge is 'a' (suffix added implicitly) => DistanceIntoActiveEdge is now: 3 (0)┬─a──────────(1)┬─lmasamolma │ ├─molma │ └─samolma ├─lmasamolma ├─m──────────(2)┬─asamolma │ └─olma ├─olma └─samolma === ITERATION 11 === The next suffix of 'almasamolmaz' to add is 'lma{z}' at indices 8,11 => ActiveNode: node #0 => ActiveEdge: lmasamolmaz(1,#) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 3 Splitting edge lmasamolmaz(1,#) at index 4 ('s') => Hierarchy is now: node #0 --> lma(1,3) --> node #3 --> samolmaz(4,#) => ActiveEdge is now: lma(1,3) Adding new edge to node #3 => node #3 --> z(11,#) Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─a────────(1)┬─lmasamolmaz │ ├─molmaz │ └─samolmaz ├─lma──────(3)┬─samolmaz │ └─z ├─m────────(2)┬─asamolmaz │ └─olmaz ├─olmaz └─samolmaz The next suffix of 'almasamolmaz' to add is 'ma{z}' at indices 9,11 => ActiveNode: node #2 => ActiveEdge: asamolmaz(3,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge asamolmaz(3,#) at index 4 ('s') => Hierarchy is now: node #2 --> a(3,3) --> node #4 --> samolmaz(4,#) => ActiveEdge is now: a(3,3) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> z(11,#) The linked node for active node node #2 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─a────────(1)┬─lmasamolmaz │ ├─molmaz │ └─samolmaz ├─lma──────(3)┬─samolmaz │ └─z ├─m────────(2)┬─a─────(4)┬─samolmaz │ │ └─z │ └─olmaz ├─olmaz └─samolmaz The next suffix of 'almasamolmaz' to add is 'a{z}' at indices 10,11 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'z' not found Adding new edge to node #1 => node #1 --> z(11,#) The linked node for active node node #1 is [null] (0)┬─a────────(1)┬─lmasamolmaz │ ├─molmaz │ ├─samolmaz │ └─z ├─lma──────(3)┬─samolmaz │ └─z ├─m────────(2)┬─a─────(4)┬─samolmaz │ │ └─z │ └─olmaz ├─olmaz └─samolmaz The next suffix of 'almasamolmaz' to add is '{z}' at indices 11,11 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'z' not found Adding new edge to node #0 => node #0 --> z(11,#) (0)┬─a────────(1)┬─lmasamolmaz │ ├─molmaz │ ├─samolmaz │ └─z ├─lma──────(3)┬─samolmaz │ └─z ├─m────────(2)┬─a─────(4)┬─samolmaz │ │ └─z │ └─olmaz ├─olmaz ├─samolmaz └─z -
axefrog revised this gist
Apr 15, 2012 . 1 changed file with 190 additions and 241 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,5 +1,5 @@ === ITERATION 0 === The next suffix of 'ooooooooo$' to add is '{o}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 @@ -11,310 +11,259 @@ Adding new edge to node #0 (0)──o === ITERATION 1 === The next suffix of 'ooooooooo$' to add is '{o}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' found. Values adjusted to: => ActiveEdge is now: oo(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)──oo === ITERATION 2 === The next suffix of 'ooooooooo$' to add is 'o{o}' at indices 1,2 => ActiveNode: node #0 => ActiveEdge: ooo(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)──ooo === ITERATION 3 === The next suffix of 'ooooooooo$' to add is 'oo{o}' at indices 1,3 => ActiveNode: node #0 => ActiveEdge: oooo(0,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 3 (0)──oooo === ITERATION 4 === The next suffix of 'ooooooooo$' to add is 'ooo{o}' at indices 1,4 => ActiveNode: node #0 => ActiveEdge: ooooo(0,#) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 3 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 4 (0)──ooooo === ITERATION 5 === The next suffix of 'ooooooooo$' to add is 'oooo{o}' at indices 1,5 => ActiveNode: node #0 => ActiveEdge: oooooo(0,#) => DistanceIntoActiveEdge: 4 => UnresolvedSuffixes: 4 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 5 (0)──oooooo === ITERATION 6 === The next suffix of 'ooooooooo$' to add is 'ooooo{o}' at indices 1,6 => ActiveNode: node #0 => ActiveEdge: ooooooo(0,#) => DistanceIntoActiveEdge: 5 => UnresolvedSuffixes: 5 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 6 (0)──ooooooo === ITERATION 7 === The next suffix of 'ooooooooo$' to add is 'oooooo{o}' at indices 1,7 => ActiveNode: node #0 => ActiveEdge: oooooooo(0,#) => DistanceIntoActiveEdge: 6 => UnresolvedSuffixes: 6 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 7 (0)──oooooooo === ITERATION 8 === The next suffix of 'ooooooooo$' to add is 'ooooooo{o}' at indices 1,8 => ActiveNode: node #0 => ActiveEdge: ooooooooo(0,#) => DistanceIntoActiveEdge: 7 => UnresolvedSuffixes: 7 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 8 (0)──ooooooooo === ITERATION 9 === The next suffix of 'ooooooooo$' to add is 'oooooooo{$}' at indices 1,9 => ActiveNode: node #0 => ActiveEdge: ooooooooo$(0,#) => DistanceIntoActiveEdge: 8 => UnresolvedSuffixes: 8 Splitting edge ooooooooo$(0,#) at index 8 ('o') => Hierarchy is now: node #0 --> oooooooo(0,7) --> node #1 --> o$(8,#) => ActiveEdge is now: oooooooo(0,7) Adding new edge to node #1 => node #1 --> $(9,#) (0)──oooooooo─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'ooooooo{$}' at indices 2,9 => ActiveNode: node #0 => ActiveEdge: oooooooo(0,7) => DistanceIntoActiveEdge: 7 => UnresolvedSuffixes: 7 Splitting edge oooooooo(0,7) at index 7 ('o') => Hierarchy is now: node #0 --> ooooooo(0,6) --> node #2 --> o(7,7) => ActiveEdge is now: ooooooo(0,6) => Connected node #1 to node #2 Adding new edge to node #2 => node #2 --> $(9,#) (0)──ooooooo─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'oooooo{$}' at indices 3,9 => ActiveNode: node #0 => ActiveEdge: ooooooo(0,6) => DistanceIntoActiveEdge: 6 => UnresolvedSuffixes: 6 Splitting edge ooooooo(0,6) at index 6 ('o') => Hierarchy is now: node #0 --> oooooo(0,5) --> node #3 --> o(6,6) => ActiveEdge is now: oooooo(0,5) => Connected node #2 to node #3 Adding new edge to node #3 => node #3 --> $(9,#) (0)──oooooo─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'ooooo{$}' at indices 4,9 => ActiveNode: node #0 => ActiveEdge: oooooo(0,5) => DistanceIntoActiveEdge: 5 => UnresolvedSuffixes: 5 Splitting edge oooooo(0,5) at index 5 ('o') => Hierarchy is now: node #0 --> ooooo(0,4) --> node #4 --> o(5,5) => ActiveEdge is now: ooooo(0,4) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> $(9,#) (0)──ooooo─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'oooo{$}' at indices 5,9 => ActiveNode: node #0 => ActiveEdge: ooooo(0,4) => DistanceIntoActiveEdge: 4 => UnresolvedSuffixes: 4 Splitting edge ooooo(0,4) at index 4 ('o') => Hierarchy is now: node #0 --> oooo(0,3) --> node #5 --> o(4,4) => ActiveEdge is now: oooo(0,3) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> $(9,#) (0)──oooo─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'ooo{$}' at indices 6,9 => ActiveNode: node #0 => ActiveEdge: oooo(0,3) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 3 Splitting edge oooo(0,3) at index 3 ('o') => Hierarchy is now: node #0 --> ooo(0,2) --> node #6 --> o(3,3) => ActiveEdge is now: ooo(0,2) => Connected node #5 to node #6 Adding new edge to node #6 => node #6 --> $(9,#) (0)──ooo─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'oo{$}' at indices 7,9 => ActiveNode: node #0 => ActiveEdge: ooo(0,2) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 Splitting edge ooo(0,2) at index 2 ('o') => Hierarchy is now: node #0 --> oo(0,1) --> node #7 --> o(2,2) => ActiveEdge is now: oo(0,1) => Connected node #6 to node #7 Adding new edge to node #7 => node #7 --> $(9,#) (0)──oo─(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is 'o{$}' at indices 8,9 => ActiveNode: node #0 => ActiveEdge: oo(0,1) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge oo(0,1) at index 1 ('o') => Hierarchy is now: node #0 --> o(0,0) --> node #8 --> o(1,1) => ActiveEdge is now: o(0,0) => Connected node #7 to node #8 Adding new edge to node #8 => node #8 --> $(9,#) (0)──o─(8)┬─$ └─o─(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ The next suffix of 'ooooooooo$' to add is '{$}' at indices 9,9 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with '$' not found Adding new edge to node #0 => node #0 --> $(9,#) (0)┬─$ └─o─(8)┬─$ └─o─(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o─(1)┬─$ └─o$ -
axefrog revised this gist
Apr 15, 2012 . 7 changed files with 1928 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,9 @@ static void Main() { SuffixTree.Create("abcabxabcd"); SuffixTree.Create("abcdefabxybcdmnabcdex"); SuffixTree.Create("abcadak"); SuffixTree.Create("dedododeeodo"); SuffixTree.Create("ooooooooo"); SuffixTree.Create("mississippi"); } File renamed without changes.This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,716 @@ === ITERATION 0 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' not found Adding new edge to node #0 => node #0 --> a(0,#) (0)──a === ITERATION 1 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{b}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'b' not found Adding new edge to node #0 => node #0 --> b(1,#) (0)┬─ab └─b === ITERATION 2 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{c}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'c' not found Adding new edge to node #0 => node #0 --> c(2,#) (0)┬─abc ├─bc └─c === ITERATION 3 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{d}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' not found Adding new edge to node #0 => node #0 --> d(3,#) (0)┬─abcd ├─bcd ├─cd └─d === ITERATION 4 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{e}' at indices 4,4 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'e' not found Adding new edge to node #0 => node #0 --> e(4,#) (0)┬─abcde ├─bcde ├─cde ├─de └─e === ITERATION 5 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{f}' at indices 5,5 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'f' not found Adding new edge to node #0 => node #0 --> f(5,#) (0)┬─abcdef ├─bcdef ├─cdef ├─def ├─ef └─f === ITERATION 6 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 6,6 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: abcdefa(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─abcdefa ├─bcdefa ├─cdefa ├─defa ├─efa └─fa === ITERATION 7 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'a{b}' at indices 6,7 => ActiveNode: node #0 => ActiveEdge: abcdefab(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'b' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─abcdefab ├─bcdefab ├─cdefab ├─defab ├─efab └─fab === ITERATION 8 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'ab{x}' at indices 6,8 => ActiveNode: node #0 => ActiveEdge: abcdefabx(0,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 Splitting edge abcdefabx(0,#) at index 2 ('c') => Hierarchy is now: node #0 --> ab(0,1) --> node #1 --> cdefabx(2,#) => ActiveEdge is now: ab(0,1) Adding new edge to node #1 => node #1 --> x(8,#) (0)┬─ab───────(1)┬─cdefabx │ └─x ├─bcdefabx ├─cdefabx ├─defabx ├─efabx └─fabx The next suffix of 'abcdefabxybcdmnabcdex' to add is 'b{x}' at indices 7,8 => ActiveNode: node #0 => ActiveEdge: bcdefabx(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge bcdefabx(1,#) at index 2 ('c') => Hierarchy is now: node #0 --> b(1,1) --> node #2 --> cdefabx(2,#) => ActiveEdge is now: b(1,1) => Connected node #1 to node #2 Adding new edge to node #2 => node #2 --> x(8,#) (0)┬─ab──────(1)┬─cdefabx │ └─x ├─b───────(2)┬─cdefabx │ └─x ├─cdefabx ├─defabx ├─efabx └─fabx The next suffix of 'abcdefabxybcdmnabcdex' to add is '{x}' at indices 8,8 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'x' not found Adding new edge to node #0 => node #0 --> x(8,#) (0)┬─ab──────(1)┬─cdefabx │ └─x ├─b───────(2)┬─cdefabx │ └─x ├─cdefabx ├─defabx ├─efabx ├─fabx └─x === ITERATION 9 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{y}' at indices 9,9 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'y' not found Adding new edge to node #0 => node #0 --> y(9,#) (0)┬─ab───────(1)┬─cdefabxy │ └─xy ├─b────────(2)┬─cdefabxy │ └─xy ├─cdefabxy ├─defabxy ├─efabxy ├─fabxy ├─xy └─y === ITERATION 10 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{b}' at indices 10,10 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'b' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─ab────────(1)┬─cdefabxyb │ └─xyb ├─b─────────(2)┬─cdefabxyb │ └─xyb ├─cdefabxyb ├─defabxyb ├─efabxyb ├─fabxyb ├─xyb └─yb === ITERATION 11 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'b{c}' at indices 10,11 => ActiveNode: node #2 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #2 starting with 'c' found. Values adjusted to: => ActiveEdge is now: cdefabxybc(2,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 1 (0)┬─ab─────────(1)┬─cdefabxybc │ └─xybc ├─b──────────(2)┬─cdefabxybc │ └─xybc ├─cdefabxybc ├─defabxybc ├─efabxybc ├─fabxybc ├─xybc └─ybc === ITERATION 12 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bc{d}' at indices 10,12 => ActiveNode: node #2 => ActiveEdge: cdefabxybcd(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 The next character on the current edge is 'd' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─ab──────────(1)┬─cdefabxybcd │ └─xybcd ├─b───────────(2)┬─cdefabxybcd │ └─xybcd ├─cdefabxybcd ├─defabxybcd ├─efabxybcd ├─fabxybcd ├─xybcd └─ybcd === ITERATION 13 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bcd{m}' at indices 10,13 => ActiveNode: node #2 => ActiveEdge: cdefabxybcdm(2,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 3 Splitting edge cdefabxybcdm(2,#) at index 4 ('e') => Hierarchy is now: node #2 --> cd(2,3) --> node #3 --> efabxybcdm(4,#) => ActiveEdge is now: cd(2,3) Adding new edge to node #3 => node #3 --> m(13,#) The linked node for active node node #2 is [null] (0)┬─ab───────────(1)┬─cdefabxybcdm │ └─xybcdm ├─b────────────(2)┬─cd─────(3)┬─efabxybcdm │ │ └─m │ └─xybcdm ├─cdefabxybcdm ├─defabxybcdm ├─efabxybcdm ├─fabxybcdm ├─xybcdm └─ybcdm The next suffix of 'abcdefabxybcdmnabcdex' to add is 'cd{m}' at indices 11,13 => ActiveNode: node #0 => ActiveEdge: cdefabxybcdm(2,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 Splitting edge cdefabxybcdm(2,#) at index 4 ('e') => Hierarchy is now: node #0 --> cd(2,3) --> node #4 --> efabxybcdm(4,#) => ActiveEdge is now: cd(2,3) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> m(13,#) (0)┬─ab──────────(1)┬─cdefabxybcdm │ └─xybcdm ├─b───────────(2)┬─cd─────(3)┬─efabxybcdm │ │ └─m │ └─xybcdm ├─cd──────────(4)┬─efabxybcdm │ └─m ├─defabxybcdm ├─efabxybcdm ├─fabxybcdm ├─xybcdm └─ybcdm The next suffix of 'abcdefabxybcdmnabcdex' to add is 'd{m}' at indices 12,13 => ActiveNode: node #0 => ActiveEdge: defabxybcdm(3,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge defabxybcdm(3,#) at index 4 ('e') => Hierarchy is now: node #0 --> d(3,3) --> node #5 --> efabxybcdm(4,#) => ActiveEdge is now: d(3,3) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> m(13,#) (0)┬─ab─────────(1)┬─cdefabxybcdm │ └─xybcdm ├─b──────────(2)┬─cd─────(3)┬─efabxybcdm │ │ └─m │ └─xybcdm ├─cd─────────(4)┬─efabxybcdm │ └─m ├─d──────────(5)┬─efabxybcdm │ └─m ├─efabxybcdm ├─fabxybcdm ├─xybcdm └─ybcdm The next suffix of 'abcdefabxybcdmnabcdex' to add is '{m}' at indices 13,13 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'm' not found Adding new edge to node #0 => node #0 --> m(13,#) (0)┬─ab─────────(1)┬─cdefabxybcdm │ └─xybcdm ├─b──────────(2)┬─cd─────(3)┬─efabxybcdm │ │ └─m │ └─xybcdm ├─cd─────────(4)┬─efabxybcdm │ └─m ├─d──────────(5)┬─efabxybcdm │ └─m ├─efabxybcdm ├─fabxybcdm ├─m ├─xybcdm └─ybcdm === ITERATION 14 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{n}' at indices 14,14 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'n' not found Adding new edge to node #0 => node #0 --> n(14,#) (0)┬─ab──────────(1)┬─cdefabxybcdmn │ └─xybcdmn ├─b───────────(2)┬─cd──────(3)┬─efabxybcdmn │ │ └─mn │ └─xybcdmn ├─cd──────────(4)┬─efabxybcdmn │ └─mn ├─d───────────(5)┬─efabxybcdmn │ └─mn ├─efabxybcdmn ├─fabxybcdmn ├─mn ├─n ├─xybcdmn └─ybcdmn === ITERATION 15 === The next suffix of 'abcdefabxybcdmnabcdex' to add is '{a}' at indices 15,15 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: ab(0,1) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─ab───────────(1)┬─cdefabxybcdmna │ └─xybcdmna ├─b────────────(2)┬─cd───────(3)┬─efabxybcdmna │ │ └─mna │ └─xybcdmna ├─cd───────────(4)┬─efabxybcdmna │ └─mna ├─d────────────(5)┬─efabxybcdmna │ └─mna ├─efabxybcdmna ├─fabxybcdmna ├─mna ├─na ├─xybcdmna └─ybcdmna === ITERATION 16 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'a{b}' at indices 15,16 => ActiveNode: node #0 => ActiveEdge: ab(0,1) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'b' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─ab────────────(1)┬─cdefabxybcdmnab │ └─xybcdmnab ├─b─────────────(2)┬─cd────────(3)┬─efabxybcdmnab │ │ └─mnab │ └─xybcdmnab ├─cd────────────(4)┬─efabxybcdmnab │ └─mnab ├─d─────────────(5)┬─efabxybcdmnab │ └─mnab ├─efabxybcdmnab ├─fabxybcdmnab ├─mnab ├─nab ├─xybcdmnab └─ybcdmnab === ITERATION 17 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'ab{c}' at indices 15,17 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 2 Existing edge for node #1 starting with 'c' found. Values adjusted to: => ActiveEdge is now: cdefabxybcdmnabc(2,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 2 (0)┬─ab─────────────(1)┬─cdefabxybcdmnabc │ └─xybcdmnabc ├─b──────────────(2)┬─cd─────────(3)┬─efabxybcdmnabc │ │ └─mnabc │ └─xybcdmnabc ├─cd─────────────(4)┬─efabxybcdmnabc │ └─mnabc ├─d──────────────(5)┬─efabxybcdmnabc │ └─mnabc ├─efabxybcdmnabc ├─fabxybcdmnabc ├─mnabc ├─nabc ├─xybcdmnabc └─ybcdmnabc === ITERATION 18 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abc{d}' at indices 15,18 => ActiveNode: node #1 => ActiveEdge: cdefabxybcdmnabcd(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 3 The next character on the current edge is 'd' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─ab──────────────(1)┬─cdefabxybcdmnabcd │ └─xybcdmnabcd ├─b───────────────(2)┬─cd──────────(3)┬─efabxybcdmnabcd │ │ └─mnabcd │ └─xybcdmnabcd ├─cd──────────────(4)┬─efabxybcdmnabcd │ └─mnabcd ├─d───────────────(5)┬─efabxybcdmnabcd │ └─mnabcd ├─efabxybcdmnabcd ├─fabxybcdmnabcd ├─mnabcd ├─nabcd ├─xybcdmnabcd └─ybcdmnabcd === ITERATION 19 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abcd{e}' at indices 15,19 => ActiveNode: node #1 => ActiveEdge: cdefabxybcdmnabcde(2,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 4 The next character on the current edge is 'e' (suffix added implicitly) => DistanceIntoActiveEdge is now: 3 (0)┬─ab───────────────(1)┬─cdefabxybcdmnabcde │ └─xybcdmnabcde ├─b────────────────(2)┬─cd───────────(3)┬─efabxybcdmnabcde │ │ └─mnabcde │ └─xybcdmnabcde ├─cd───────────────(4)┬─efabxybcdmnabcde │ └─mnabcde ├─d────────────────(5)┬─efabxybcdmnabcde │ └─mnabcde ├─efabxybcdmnabcde ├─fabxybcdmnabcde ├─mnabcde ├─nabcde ├─xybcdmnabcde └─ybcdmnabcde === ITERATION 20 === The next suffix of 'abcdefabxybcdmnabcdex' to add is 'abcde{x}' at indices 15,20 => ActiveNode: node #1 => ActiveEdge: cdefabxybcdmnabcdex(2,#) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 5 Splitting edge cdefabxybcdmnabcdex(2,#) at index 5 ('f') => Hierarchy is now: node #1 --> cde(2,4) --> node #6 --> fabxybcdmnabcdex(5,#) => ActiveEdge is now: cde(2,4) Adding new edge to node #6 => node #6 --> x(20,#) The linked node for active node node #1 is node #2 => ActiveNode is now: node #2 Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b─────────────────(2)┬─cd────────────(3)┬─efabxybcdmnabcdex │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd────────────────(4)┬─efabxybcdmnabcdex │ └─mnabcdex ├─d─────────────────(5)┬─efabxybcdmnabcdex │ └─mnabcdex ├─efabxybcdmnabcdex ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex The next suffix of 'abcdefabxybcdmnabcdex' to add is 'bcde{x}' at indices 16,20 => ActiveNode: node #3 => ActiveEdge: efabxybcdmnabcdex(4,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 4 Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f') => Hierarchy is now: node #3 --> e(4,4) --> node #7 --> fabxybcdmnabcdex(5,#) => ActiveEdge is now: e(4,4) => Connected node #6 to node #7 Adding new edge to node #7 => node #7 --> x(20,#) The linked node for active node node #3 is node #4 => ActiveNode is now: node #4 (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b─────────────────(2)┬─cd────────────(3)┬─e────────(7)┬─fabxybcdmnabcdex │ │ │ └─x │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd────────────────(4)┬─efabxybcdmnabcdex │ └─mnabcdex ├─d─────────────────(5)┬─efabxybcdmnabcdex │ └─mnabcdex ├─efabxybcdmnabcdex ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex The next suffix of 'abcdefabxybcdmnabcdex' to add is 'cde{x}' at indices 17,20 => ActiveNode: node #4 => ActiveEdge: efabxybcdmnabcdex(4,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 3 Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f') => Hierarchy is now: node #4 --> e(4,4) --> node #8 --> fabxybcdmnabcdex(5,#) => ActiveEdge is now: e(4,4) => Connected node #7 to node #8 Adding new edge to node #8 => node #8 --> x(20,#) The linked node for active node node #4 is node #5 => ActiveNode is now: node #5 (0)┬─ab────────────────(1)┬─cde───────────(6)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b─────────────────(2)┬─cd────────────(3)┬─e────────(7)┬─fabxybcdmnabcdex │ │ │ └─x │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd────────────────(4)┬─e────────(8)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─d─────────────────(5)┬─efabxybcdmnabcdex │ └─mnabcdex ├─efabxybcdmnabcdex ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex The next suffix of 'abcdefabxybcdmnabcdex' to add is 'de{x}' at indices 18,20 => ActiveNode: node #5 => ActiveEdge: efabxybcdmnabcdex(4,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f') => Hierarchy is now: node #5 --> e(4,4) --> node #9 --> fabxybcdmnabcdex(5,#) => ActiveEdge is now: e(4,4) => Connected node #8 to node #9 Adding new edge to node #9 => node #9 --> x(20,#) The linked node for active node node #5 is [null] (00)┬─ab────────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b─────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex │ │ │ └─x │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd────────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─d─────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─efabxybcdmnabcdex ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex The next suffix of 'abcdefabxybcdmnabcdex' to add is 'e{x}' at indices 19,20 => ActiveNode: node #0 => ActiveEdge: efabxybcdmnabcdex(4,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge efabxybcdmnabcdex(4,#) at index 5 ('f') => Hierarchy is now: node #0 --> e(4,4) --> node #10 --> fabxybcdmnabcdex(5,#) => ActiveEdge is now: e(4,4) => Connected node #9 to node #10 Adding new edge to node #10 => node #10 --> x(20,#) (00)┬─ab───────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex │ │ │ └─x │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd───────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─d────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─e────────────────(10)┬─fabxybcdmnabcdex │ └─x ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex The next suffix of 'abcdefabxybcdmnabcdex' to add is '{x}' at indices 20,20 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'x' found. Values adjusted to: => ActiveEdge is now: xybcdmnabcdex(8,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (00)┬─ab───────────────(01)┬─cde───────────(06)┬─fabxybcdmnabcdex │ │ └─x │ └─xybcdmnabcdex ├─b────────────────(02)┬─cd────────────(03)┬─e────────(07)┬─fabxybcdmnabcdex │ │ │ └─x │ │ └─mnabcdex │ └─xybcdmnabcdex ├─cd───────────────(04)┬─e────────(08)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─d────────────────(05)┬─e────────(09)┬─fabxybcdmnabcdex │ │ └─x │ └─mnabcdex ├─e────────────────(10)┬─fabxybcdmnabcdex │ └─x ├─fabxybcdmnabcdex ├─mnabcdex ├─nabcdex ├─xybcdmnabcdex └─ybcdmnabcdex This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,138 @@ === ITERATION 0 === The next suffix of 'abcadak' to add is '{a}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' not found Adding new edge to node #0 => node #0 --> a(0,#) (0)──a === ITERATION 1 === The next suffix of 'abcadak' to add is '{b}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'b' not found Adding new edge to node #0 => node #0 --> b(1,#) (0)┬─ab └─b === ITERATION 2 === The next suffix of 'abcadak' to add is '{c}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'c' not found Adding new edge to node #0 => node #0 --> c(2,#) (0)┬─abc ├─bc └─c === ITERATION 3 === The next suffix of 'abcadak' to add is '{a}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: abca(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─abca ├─bca └─ca === ITERATION 4 === The next suffix of 'abcadak' to add is 'a{d}' at indices 3,4 => ActiveNode: node #0 => ActiveEdge: abcad(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge abcad(0,#) at index 1 ('b') => Hierarchy is now: node #0 --> a(0,0) --> node #1 --> bcad(1,#) => ActiveEdge is now: a(0,0) Adding new edge to node #1 => node #1 --> d(4,#) (0)┬─a────(1)┬─bcad │ └─d ├─bcad └─cad The next suffix of 'abcadak' to add is '{d}' at indices 4,4 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' not found Adding new edge to node #0 => node #0 --> d(4,#) (0)┬─a────(1)┬─bcad │ └─d ├─bcad ├─cad └─d === ITERATION 5 === The next suffix of 'abcadak' to add is '{a}' at indices 5,5 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─a─────(1)┬─bcada │ └─da ├─bcada ├─cada └─da === ITERATION 6 === The next suffix of 'abcadak' to add is 'a{k}' at indices 5,6 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'k' not found Adding new edge to node #1 => node #1 --> k(6,#) The linked node for active node node #1 is [null] (0)┬─a──────(1)┬─bcadak │ ├─dak │ └─k ├─bcadak ├─cadak └─dak The next suffix of 'abcadak' to add is '{k}' at indices 6,6 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'k' not found Adding new edge to node #0 => node #0 --> k(6,#) (0)┬─a──────(1)┬─bcadak │ ├─dak │ └─k ├─bcadak ├─cadak ├─dak └─k This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,409 @@ === ITERATION 0 === The next suffix of 'dedododeeodo$' to add is '{d}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' not found Adding new edge to node #0 => node #0 --> d(0,#) (0)──d === ITERATION 1 === The next suffix of 'dedododeeodo$' to add is '{e}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'e' not found Adding new edge to node #0 => node #0 --> e(1,#) (0)┬─de └─e === ITERATION 2 === The next suffix of 'dedododeeodo$' to add is '{d}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' found. Values adjusted to: => ActiveEdge is now: ded(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─ded └─ed === ITERATION 3 === The next suffix of 'dedododeeodo$' to add is 'd{o}' at indices 2,3 => ActiveNode: node #0 => ActiveEdge: dedo(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge dedo(0,#) at index 1 ('e') => Hierarchy is now: node #0 --> d(0,0) --> node #1 --> edo(1,#) => ActiveEdge is now: d(0,0) Adding new edge to node #1 => node #1 --> o(3,#) (0)┬─d───(1)┬─edo │ └─o └─edo The next suffix of 'dedododeeodo$' to add is '{o}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' not found Adding new edge to node #0 => node #0 --> o(3,#) (0)┬─d───(1)┬─edo │ └─o ├─edo └─o === ITERATION 4 === The next suffix of 'dedododeeodo$' to add is '{d}' at indices 4,4 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─d────(1)┬─edod │ └─od ├─edod └─od === ITERATION 5 === The next suffix of 'dedododeeodo$' to add is 'd{o}' at indices 4,5 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'o' found. Values adjusted to: => ActiveEdge is now: odo(3,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 1 (0)┬─d─────(1)┬─edodo │ └─odo ├─edodo └─odo === ITERATION 6 === The next suffix of 'dedododeeodo$' to add is 'do{d}' at indices 4,6 => ActiveNode: node #1 => ActiveEdge: odod(3,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 The next character on the current edge is 'd' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─d──────(1)┬─edodod │ └─odod ├─edodod └─odod === ITERATION 7 === The next suffix of 'dedododeeodo$' to add is 'dod{e}' at indices 4,7 => ActiveNode: node #1 => ActiveEdge: odode(3,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 3 Splitting edge odode(3,#) at index 5 ('o') => Hierarchy is now: node #1 --> od(3,4) --> node #2 --> ode(5,#) => ActiveEdge is now: od(3,4) Adding new edge to node #2 => node #2 --> e(7,#) The linked node for active node node #1 is [null] (0)┬─d───────(1)┬─edodode │ └─od──────(2)┬─e │ └─ode ├─edodode └─odode The next suffix of 'dedododeeodo$' to add is 'od{e}' at indices 5,7 => ActiveNode: node #0 => ActiveEdge: odode(3,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 Splitting edge odode(3,#) at index 5 ('o') => Hierarchy is now: node #0 --> od(3,4) --> node #3 --> ode(5,#) => ActiveEdge is now: od(3,4) => Connected node #2 to node #3 Adding new edge to node #3 => node #3 --> e(7,#) Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─d───────(1)┬─edodode │ └─od──────(2)┬─e │ └─ode ├─edodode └─od──────(3)┬─e └─ode The next suffix of 'dedododeeodo$' to add is 'd{e}' at indices 6,7 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'e' found. Values adjusted to: => ActiveEdge is now: edodode(1,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 1 (0)┬─d───────(1)┬─edodode │ └─od──────(2)┬─e │ └─ode ├─edodode └─od──────(3)┬─e └─ode === ITERATION 8 === The next suffix of 'dedododeeodo$' to add is 'de{e}' at indices 6,8 => ActiveNode: node #1 => ActiveEdge: edododee(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge edododee(1,#) at index 2 ('d') => Hierarchy is now: node #1 --> e(1,1) --> node #4 --> dododee(2,#) => ActiveEdge is now: e(1,1) Adding new edge to node #4 => node #4 --> e(8,#) The linked node for active node node #1 is [null] (0)┬─d────────(1)┬─e──(4)┬─dododee │ │ └─e │ └─od─(2)┬─ee │ └─odee ├─edododee └─od───────(3)┬─ee └─odee The next suffix of 'dedododeeodo$' to add is 'e{e}' at indices 7,8 => ActiveNode: node #0 => ActiveEdge: edododee(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge edododee(1,#) at index 2 ('d') => Hierarchy is now: node #0 --> e(1,1) --> node #5 --> dododee(2,#) => ActiveEdge is now: e(1,1) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> e(8,#) (0)┬─d──(1)┬─e──(4)┬─dododee │ │ └─e │ └─od─(2)┬─ee │ └─odee ├─e──(5)┬─dododee │ └─e └─od─(3)┬─ee └─odee The next suffix of 'dedododeeodo$' to add is '{e}' at indices 8,8 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'e' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─d──(1)┬─e──(4)┬─dododee │ │ └─e │ └─od─(2)┬─ee │ └─odee ├─e──(5)┬─dododee │ └─e └─od─(3)┬─ee └─odee === ITERATION 9 === The next suffix of 'dedododeeodo$' to add is 'e{o}' at indices 8,9 => ActiveNode: node #5 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #5 starting with 'o' not found Adding new edge to node #5 => node #5 --> o(9,#) The linked node for active node node #5 is [null] (0)┬─d──(1)┬─e──(4)┬─dododeeo │ │ └─eo │ └─od─(2)┬─eeo │ └─odeeo ├─e──(5)┬─dododeeo │ ├─eo │ └─o └─od─(3)┬─eeo └─odeeo The next suffix of 'dedododeeodo$' to add is '{o}' at indices 9,9 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' found. Values adjusted to: => ActiveEdge is now: od(3,4) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─d──(1)┬─e──(4)┬─dododeeo │ │ └─eo │ └─od─(2)┬─eeo │ └─odeeo ├─e──(5)┬─dododeeo │ ├─eo │ └─o └─od─(3)┬─eeo └─odeeo === ITERATION 10 === The next suffix of 'dedododeeodo$' to add is 'o{d}' at indices 9,10 => ActiveNode: node #0 => ActiveEdge: od(3,4) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'd' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─d──(1)┬─e──(4)┬─dododeeod │ │ └─eod │ └─od─(2)┬─eeod │ └─odeeod ├─e──(5)┬─dododeeod │ ├─eod │ └─od └─od─(3)┬─eeod └─odeeod === ITERATION 11 === The next suffix of 'dedododeeodo$' to add is 'od{o}' at indices 9,11 => ActiveNode: node #3 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 2 Existing edge for node #3 starting with 'o' found. Values adjusted to: => ActiveEdge is now: odeeodo(5,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 2 (0)┬─d──(1)┬─e──(4)┬─dododeeodo │ │ └─eodo │ └─od─(2)┬─eeodo │ └─odeeodo ├─e──(5)┬─dododeeodo │ ├─eodo │ └─odo └─od─(3)┬─eeodo └─odeeodo === ITERATION 12 === The next suffix of 'dedododeeodo$' to add is 'odo{$}' at indices 9,12 => ActiveNode: node #3 => ActiveEdge: odeeodo$(5,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 3 Splitting edge odeeodo$(5,#) at index 6 ('d') => Hierarchy is now: node #3 --> o(5,5) --> node #6 --> deeodo$(6,#) => ActiveEdge is now: o(5,5) Adding new edge to node #6 => node #6 --> $(12,#) The linked node for active node node #3 is [null] (0)┬─d──(1)┬─e──(4)┬─dododeeodo$ │ │ └─eodo$ │ └─od─(2)┬─eeodo$ │ └─odeeodo$ ├─e──(5)┬─dododeeodo$ │ ├─eodo$ │ └─odo$ └─od─(3)┬─eeodo$ └─o──────(6)┬─$ └─deeodo$ The next suffix of 'dedododeeodo$' to add is 'do{$}' at indices 10,12 => ActiveNode: node #0 => ActiveEdge: od(3,4) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge od(3,4) at index 4 ('d') => Hierarchy is now: node #0 --> o(3,3) --> node #7 --> d(4,4) => ActiveEdge is now: o(3,3) => Connected node #6 to node #7 Adding new edge to node #7 => node #7 --> $(12,#) (0)┬─d─(1)┬─e──(4)┬─dododeeodo$ │ │ └─eodo$ │ └─od─(2)┬─eeodo$ │ └─odeeodo$ ├─e─(5)┬─dododeeodo$ │ ├─eodo$ │ └─odo$ └─o─(7)┬─$ └─d─(3)┬─eeodo$ └─o──────(6)┬─$ └─deeodo$ The next suffix of 'dedododeeodo$' to add is 'o{$}' at indices 11,12 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #0 starting with '$' not found Adding new edge to node #0 => node #0 --> $(12,#) (0)┬─$ ├─d─(1)┬─e──(4)┬─dododeeodo$ │ │ └─eodo$ │ └─od─(2)┬─eeodo$ │ └─odeeodo$ ├─e─(5)┬─dododeeodo$ │ ├─eodo$ │ └─odo$ └─o─(7)┬─$ └─d─(3)┬─eeodo$ └─o──────(6)┬─$ └─deeodo$ The next suffix of 'dedododeeodo$' to add is '{$}' at indices 12,12 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with '$' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─$ ├─d─(1)┬─e──(4)┬─dododeeodo$ │ │ └─eodo$ │ └─od─(2)┬─eeodo$ │ └─odeeodo$ ├─e─(5)┬─dododeeodo$ │ ├─eodo$ │ └─odo$ └─o─(7)┬─$ └─d─(3)┬─eeodo$ └─o──────(6)┬─$ └─deeodo$ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,320 @@ === ITERATION 0 === The next suffix of 'okoooooooo$' to add is '{o}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' not found Adding new edge to node #0 => node #0 --> o(0,#) (0)──o === ITERATION 1 === The next suffix of 'okoooooooo$' to add is '{k}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'k' not found Adding new edge to node #0 => node #0 --> k(1,#) (0)┬─k └─ok === ITERATION 2 === The next suffix of 'okoooooooo$' to add is '{o}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' found. Values adjusted to: => ActiveEdge is now: oko(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─ko └─oko === ITERATION 3 === The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 2,3 => ActiveNode: node #0 => ActiveEdge: okoo(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge okoo(0,#) at index 1 ('k') => Hierarchy is now: node #0 --> o(0,0) --> node #1 --> koo(1,#) => ActiveEdge is now: o(0,0) Adding new edge to node #1 => node #1 --> o(3,#) (0)┬─koo └─o───(1)┬─koo └─o The next suffix of 'okoooooooo$' to add is '{o}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'o' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─koo └─o───(1)┬─koo └─o === ITERATION 4 === The next suffix of 'okoooooooo$' to add is 'o{o}' at indices 3,4 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with 'o' found. Values adjusted to: => ActiveEdge is now: oo(3,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 1 (0)┬─kooo └─o────(1)┬─kooo └─oo === ITERATION 5 === The next suffix of 'okoooooooo$' to add is 'oo{o}' at indices 3,5 => ActiveNode: node #1 => ActiveEdge: ooo(3,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─koooo └─o─────(1)┬─koooo └─ooo === ITERATION 6 === The next suffix of 'okoooooooo$' to add is 'ooo{o}' at indices 3,6 => ActiveNode: node #1 => ActiveEdge: oooo(3,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 3 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 3 (0)┬─kooooo └─o──────(1)┬─kooooo └─oooo === ITERATION 7 === The next suffix of 'okoooooooo$' to add is 'oooo{o}' at indices 3,7 => ActiveNode: node #1 => ActiveEdge: ooooo(3,#) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 4 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 4 (0)┬─koooooo └─o───────(1)┬─koooooo └─ooooo === ITERATION 8 === The next suffix of 'okoooooooo$' to add is 'ooooo{o}' at indices 3,8 => ActiveNode: node #1 => ActiveEdge: oooooo(3,#) => DistanceIntoActiveEdge: 4 => UnresolvedSuffixes: 5 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 5 (0)┬─kooooooo └─o────────(1)┬─kooooooo └─oooooo === ITERATION 9 === The next suffix of 'okoooooooo$' to add is 'oooooo{o}' at indices 3,9 => ActiveNode: node #1 => ActiveEdge: ooooooo(3,#) => DistanceIntoActiveEdge: 5 => UnresolvedSuffixes: 6 The next character on the current edge is 'o' (suffix added implicitly) => DistanceIntoActiveEdge is now: 6 (0)┬─koooooooo └─o─────────(1)┬─koooooooo └─ooooooo === ITERATION 10 === The next suffix of 'okoooooooo$' to add is 'ooooooo{$}' at indices 3,10 => ActiveNode: node #1 => ActiveEdge: ooooooo$(3,#) => DistanceIntoActiveEdge: 6 => UnresolvedSuffixes: 7 Splitting edge ooooooo$(3,#) at index 9 ('o') => Hierarchy is now: node #1 --> oooooo(3,8) --> node #2 --> o$(9,#) => ActiveEdge is now: oooooo(3,8) Adding new edge to node #2 => node #2 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─oooooo─────(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'oooooo{$}' at indices 4,10 => ActiveNode: node #1 => ActiveEdge: oooooo(3,8) => DistanceIntoActiveEdge: 5 => UnresolvedSuffixes: 6 Splitting edge oooooo(3,8) at index 8 ('o') => Hierarchy is now: node #1 --> ooooo(3,7) --> node #3 --> o(8,8) => ActiveEdge is now: ooooo(3,7) => Connected node #2 to node #3 Adding new edge to node #3 => node #3 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─ooooo──────(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'ooooo{$}' at indices 5,10 => ActiveNode: node #1 => ActiveEdge: ooooo(3,7) => DistanceIntoActiveEdge: 4 => UnresolvedSuffixes: 5 Splitting edge ooooo(3,7) at index 7 ('o') => Hierarchy is now: node #1 --> oooo(3,6) --> node #4 --> o(7,7) => ActiveEdge is now: oooo(3,6) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─oooo───────(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'oooo{$}' at indices 6,10 => ActiveNode: node #1 => ActiveEdge: oooo(3,6) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 4 Splitting edge oooo(3,6) at index 6 ('o') => Hierarchy is now: node #1 --> ooo(3,5) --> node #5 --> o(6,6) => ActiveEdge is now: ooo(3,5) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─ooo────────(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'ooo{$}' at indices 7,10 => ActiveNode: node #1 => ActiveEdge: ooo(3,5) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 3 Splitting edge ooo(3,5) at index 5 ('o') => Hierarchy is now: node #1 --> oo(3,4) --> node #6 --> o(5,5) => ActiveEdge is now: oo(3,4) => Connected node #5 to node #6 Adding new edge to node #6 => node #6 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─oo─────────(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'oo{$}' at indices 8,10 => ActiveNode: node #1 => ActiveEdge: oo(3,4) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge oo(3,4) at index 4 ('o') => Hierarchy is now: node #1 --> o(3,3) --> node #7 --> o(4,4) => ActiveEdge is now: o(3,3) => Connected node #6 to node #7 Adding new edge to node #7 => node #7 --> $(10,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─koooooooo$ └─o──────────(1)┬─koooooooo$ └─o──────────(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is 'o{$}' at indices 9,10 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #1 starting with '$' not found Adding new edge to node #1 => node #1 --> $(10,#) The linked node for active node node #1 is [null] (0)┬─koooooooo$ └─o──────────(1)┬─$ ├─koooooooo$ └─o──────────(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ The next suffix of 'okoooooooo$' to add is '{$}' at indices 10,10 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with '$' not found Adding new edge to node #0 => node #0 --> $(10,#) (0)┬─$ ├─koooooooo$ └─o──────────(1)┬─$ ├─koooooooo$ └─o──────────(7)┬─$ └─o─(6)┬─$ └─o─(5)┬─$ └─o─(4)┬─$ └─o─(3)┬─$ └─o─(2)┬─$ └─o$ This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,339 @@ === ITERATION 0 === The next suffix of 'mississippi$' to add is '{m}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'm' not found Adding new edge to node #0 => node #0 --> m(0,#) (0)──m === ITERATION 1 === The next suffix of 'mississippi$' to add is '{i}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'i' not found Adding new edge to node #0 => node #0 --> i(1,#) (0)┬─i └─mi === ITERATION 2 === The next suffix of 'mississippi$' to add is '{s}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 's' not found Adding new edge to node #0 => node #0 --> s(2,#) (0)┬─is ├─mis └─s === ITERATION 3 === The next suffix of 'mississippi$' to add is '{s}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 's' found. Values adjusted to: => ActiveEdge is now: ss(2,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─iss ├─miss └─ss === ITERATION 4 === The next suffix of 'mississippi$' to add is 's{i}' at indices 3,4 => ActiveNode: node #0 => ActiveEdge: ssi(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge ssi(2,#) at index 3 ('s') => Hierarchy is now: node #0 --> s(2,2) --> node #1 --> si(3,#) => ActiveEdge is now: s(2,2) Adding new edge to node #1 => node #1 --> i(4,#) (0)┬─issi ├─missi └─s─────(1)┬─i └─si The next suffix of 'mississippi$' to add is '{i}' at indices 4,4 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'i' found. Values adjusted to: => ActiveEdge is now: issi(1,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─issi ├─missi └─s─────(1)┬─i └─si === ITERATION 5 === The next suffix of 'mississippi$' to add is 'i{s}' at indices 4,5 => ActiveNode: node #0 => ActiveEdge: issis(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 's' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─issis ├─missis └─s──────(1)┬─is └─sis === ITERATION 6 === The next suffix of 'mississippi$' to add is 'is{s}' at indices 4,6 => ActiveNode: node #0 => ActiveEdge: ississ(1,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 The next character on the current edge is 's' (suffix added implicitly) => DistanceIntoActiveEdge is now: 3 (0)┬─ississ ├─mississ └─s───────(1)┬─iss └─siss === ITERATION 7 === The next suffix of 'mississippi$' to add is 'iss{i}' at indices 4,7 => ActiveNode: node #0 => ActiveEdge: ississi(1,#) => DistanceIntoActiveEdge: 3 => UnresolvedSuffixes: 3 The next character on the current edge is 'i' (suffix added implicitly) => DistanceIntoActiveEdge is now: 4 (0)┬─ississi ├─mississi └─s────────(1)┬─issi └─sissi === ITERATION 8 === The next suffix of 'mississippi$' to add is 'issi{p}' at indices 4,8 => ActiveNode: node #0 => ActiveEdge: ississip(1,#) => DistanceIntoActiveEdge: 4 => UnresolvedSuffixes: 4 Splitting edge ississip(1,#) at index 5 ('s') => Hierarchy is now: node #0 --> issi(1,4) --> node #2 --> ssip(5,#) => ActiveEdge is now: issi(1,4) Adding new edge to node #2 => node #2 --> p(8,#) Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─issi──────(2)┬─p │ └─ssip ├─mississip └─s─────────(1)┬─issip └─sissip The next suffix of 'mississippi$' to add is 'ssi{p}' at indices 5,8 => ActiveNode: node #1 => ActiveEdge: sissip(3,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 3 Splitting edge sissip(3,#) at index 5 ('s') => Hierarchy is now: node #1 --> si(3,4) --> node #3 --> ssip(5,#) => ActiveEdge is now: si(3,4) => Connected node #2 to node #3 Adding new edge to node #3 => node #3 --> p(8,#) The linked node for active node node #1 is [null] Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─issi──────(2)┬─p │ └─ssip ├─mississip └─s─────────(1)┬─issip └─si────(3)┬─p └─ssip The next suffix of 'mississippi$' to add is 'si{p}' at indices 6,8 => ActiveNode: node #1 => ActiveEdge: issip(4,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge issip(4,#) at index 5 ('s') => Hierarchy is now: node #1 --> i(4,4) --> node #4 --> ssip(5,#) => ActiveEdge is now: i(4,4) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> p(8,#) The linked node for active node node #1 is [null] (0)┬─issi──────(2)┬─p │ └─ssip ├─mississip └─s─────────(1)┬─i──(4)┬─p │ └─ssip └─si─(3)┬─p └─ssip The next suffix of 'mississippi$' to add is 'i{p}' at indices 7,8 => ActiveNode: node #0 => ActiveEdge: issi(1,4) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge issi(1,4) at index 2 ('s') => Hierarchy is now: node #0 --> i(1,1) --> node #5 --> ssi(2,4) => ActiveEdge is now: i(1,1) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> p(8,#) (0)┬─i─────────(5)┬─p │ └─ssi─(2)┬─p │ └─ssip ├─mississip └─s─────────(1)┬─i──(4)┬─p │ └─ssip └─si─(3)┬─p └─ssip The next suffix of 'mississippi$' to add is '{p}' at indices 8,8 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'p' not found Adding new edge to node #0 => node #0 --> p(8,#) (0)┬─i─────────(5)┬─p │ └─ssi─(2)┬─p │ └─ssip ├─mississip ├─p └─s─────────(1)┬─i──(4)┬─p │ └─ssip └─si─(3)┬─p └─ssip === ITERATION 9 === The next suffix of 'mississippi$' to add is '{p}' at indices 9,9 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'p' found. Values adjusted to: => ActiveEdge is now: pp(8,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─i──────────(5)┬─pp │ └─ssi─(2)┬─pp │ └─ssipp ├─mississipp ├─pp └─s──────────(1)┬─i──(4)┬─pp │ └─ssipp └─si─(3)┬─pp └─ssipp === ITERATION 10 === The next suffix of 'mississippi$' to add is 'p{i}' at indices 9,10 => ActiveNode: node #0 => ActiveEdge: ppi(8,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge ppi(8,#) at index 9 ('p') => Hierarchy is now: node #0 --> p(8,8) --> node #6 --> pi(9,#) => ActiveEdge is now: p(8,8) Adding new edge to node #6 => node #6 --> i(10,#) (0)┬─i───────────(5)┬─ppi │ └─ssi─(2)┬─ppi │ └─ssippi ├─mississippi ├─p───────────(6)┬─i │ └─pi └─s───────────(1)┬─i──(4)┬─ppi │ └─ssippi └─si─(3)┬─ppi └─ssippi The next suffix of 'mississippi$' to add is '{i}' at indices 10,10 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'i' found. Values adjusted to: Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary => ActiveEdge is now: => DistanceIntoActiveEdge is now: 0 => UnresolvedSuffixes is now: 0 (0)┬─i───────────(5)┬─ppi │ └─ssi─(2)┬─ppi │ └─ssippi ├─mississippi ├─p───────────(6)┬─i │ └─pi └─s───────────(1)┬─i──(4)┬─ppi │ └─ssippi └─si─(3)┬─ppi └─ssippi === ITERATION 11 === The next suffix of 'mississippi$' to add is 'i{$}' at indices 10,11 => ActiveNode: node #5 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 1 Existing edge for node #5 starting with '$' not found Adding new edge to node #5 => node #5 --> $(11,#) The linked node for active node node #5 is [null] (0)┬─i────────────(5)┬─$ │ ├─ppi$ │ └─ssi──(2)┬─ppi$ │ └─ssippi$ ├─mississippi$ ├─p────────────(6)┬─i$ │ └─pi$ └─s────────────(1)┬─i──(4)┬─ppi$ │ └─ssippi$ └─si─(3)┬─ppi$ └─ssippi$ The next suffix of 'mississippi$' to add is '{$}' at indices 11,11 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with '$' not found Adding new edge to node #0 => node #0 --> $(11,#) (0)┬─$ ├─i────────────(5)┬─$ │ ├─ppi$ │ └─ssi──(2)┬─ppi$ │ └─ssippi$ ├─mississippi$ ├─p────────────(6)┬─i$ │ └─pi$ └─s────────────(1)┬─i──(4)┬─ppi$ │ └─ssippi$ └─si─(3)┬─ppi$ └─ssippi$ -
axefrog revised this gist
Apr 15, 2012 . 1 changed file with 2 additions and 2 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ using System.IO; using System.Linq; namespace SuffixTreeAlgorithm { public class SuffixTree { @@ -335,4 +335,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } -
axefrog revised this gist
Apr 15, 2012 . 1 changed file with 22 additions and 11 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ using System.IO; using System.Linq; namespace Shiny { public class SuffixTree { @@ -38,7 +38,7 @@ private void Build(char canonizationChar) var n = Word.IndexOf(Word[Word.Length - 1]); var mustCanonize = n < Word.Length - 1; if(mustCanonize) Word = string.Concat(Word, canonizationChar); for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) { @@ -80,6 +80,7 @@ private bool AddNextSuffix() return false; ActiveNode.AddNewEdge(); UpdateActivePointAfterAddingNewEdge(); return true; } @@ -117,18 +118,25 @@ private bool AddCurrentSuffixToActiveEdge() SplitActiveEdge(); ActiveEdge.Tail.AddNewEdge(); UpdateActivePointAfterAddingNewEdge(); return true; } private void UpdateActivePointAfterAddingNewEdge() { if(ReferenceEquals(ActiveNode, RootNode)) { if(DistanceIntoActiveEdge > 0) { DistanceIntoActiveEdge--; var firstIndex = ActiveEdge.StartIndex; ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex); } } else UpdateActivePointToLinkedNodeOrRoot(); } private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge) @@ -174,10 +182,13 @@ private void UpdateActivePointToLinkedNodeOrRoot() { ActiveNode = RootNode; } if(ActiveEdge != null) { var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex; ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge); } } public string RenderTree() -
axefrog revised this gist
Apr 15, 2012 . 2 changed files with 31 additions and 37 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,10 +3,7 @@ using System.IO; using System.Linq; namespace SuffixTreeAlgorithm { public class SuffixTree { @@ -15,7 +12,7 @@ public class SuffixTree private int CurrentSuffixEndIndex { get; set; } private Node LastCreatedNodeInCurrentIteration { get; set; } private int UnresolvedSuffixes { get; set; } public Node RootNode { get; private set; } private Node ActiveNode { get; set; } private Edge ActiveEdge { get; set; } private int DistanceIntoActiveEdge { get; set; } @@ -39,7 +36,8 @@ public static SuffixTree Create(string word, char canonizationChar = '$') private void Build(char canonizationChar) { var n = Word.IndexOf(Word[Word.Length - 1]); var mustCanonize = n < Word.Length - 1; if(mustCanonize) Word = string.Concat(Word, canonizationChar); for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) @@ -93,7 +91,7 @@ private bool GetExistingEdgeAndSetAsActive() Console.WriteLine("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix); ActiveEdge = edge; DistanceIntoActiveEdge = 1; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge); Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); Console.WriteLine(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes); @@ -112,7 +110,8 @@ private bool AddCurrentSuffixToActiveEdge() Console.WriteLine("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix); DistanceIntoActiveEdge++; Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(ActiveEdge.StartIndex); return false; } @@ -122,23 +121,32 @@ private bool AddCurrentSuffixToActiveEdge() if(ReferenceEquals(ActiveNode, RootNode)) { DistanceIntoActiveEdge--; var firstIndex = ActiveEdge.StartIndex; ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndex); } else UpdateActivePointToLinkedNodeOrRoot(); return true; } private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge) { var walkDistance = 0; while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) { Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary"); DistanceIntoActiveEdge -= ActiveEdge.Length; ActiveNode = ActiveEdge.Tail; if(DistanceIntoActiveEdge == 0) ActiveEdge = null; else { walkDistance += ActiveEdge.Length; var c = Word[firstIndexOfOriginalActiveEdge + walkDistance]; ActiveEdge = ActiveNode.Edges[c]; } } } @@ -172,25 +180,6 @@ private void UpdateActivePointToLinkedNodeOrRoot() NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge); } public string RenderTree() { var writer = new StringWriter(); @@ -237,6 +226,11 @@ public override string ToString() StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")"); } public string String { get { return _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1); } } public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength) { var strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); @@ -330,4 +324,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -142,7 +142,7 @@ The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 6,7 => UnresolvedSuffixes: 1 The next character on the current edge is 'b' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary (0)┬─ab─────(1)┬─cabxab │ └─xab @@ -253,4 +253,4 @@ Adding new edge to node #0 ├─c─────(5)┬─abxabcd │ └─d ├─d └─xabcd -
axefrog revised this gist
Apr 15, 2012 . 1 changed file with 2 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,8 @@ using System.IO; using System.Linq; // Implemented by Nathan Ridley ([email protected]) based on the explanation by jogojapan at: // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english namespace Shiny { -
axefrog revised this gist
Apr 15, 2012 . 1 changed file with 3 additions and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,6 +3,8 @@ using System.IO; using System.Linq; // Implemented based on the explanation at http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english namespace Shiny { public class SuffixTree @@ -327,4 +329,4 @@ public static class RenderChars public const char CornerRight = '└'; } } } -
axefrog revised this gist
Apr 15, 2012 . 3 changed files with 529 additions and 322 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,221 +3,328 @@ using System.IO; using System.Linq; namespace Shiny { public class SuffixTree { public string Word { get; private set; } private int CurrentSuffixStartIndex { get; set; } private int CurrentSuffixEndIndex { get; set; } private Node LastCreatedNodeInCurrentIteration { get; set; } private int UnresolvedSuffixes { get; set; } private Node RootNode { get; set; } private Node ActiveNode { get; set; } private Edge ActiveEdge { get; set; } private int DistanceIntoActiveEdge { get; set; } private char LastCharacterOfCurrentSuffix { get; set; } private int NextNodeNumber { get; set; } private SuffixTree(string word) { Word = word; RootNode = new Node(this); ActiveNode = RootNode; } public static SuffixTree Create(string word, char canonizationChar = '$') { var tree = new SuffixTree(word); tree.Build(canonizationChar); return tree; } private void Build(char canonizationChar) { var n = Word.IndexOf(Word[Word.Length - 1]); if(n < Word.Length - 1) Word = string.Concat(Word, canonizationChar); for(CurrentSuffixEndIndex = 0; CurrentSuffixEndIndex < Word.Length; CurrentSuffixEndIndex++) { Console.WriteLine("=== ITERATION {0} ===", CurrentSuffixEndIndex); LastCreatedNodeInCurrentIteration = null; LastCharacterOfCurrentSuffix = Word[CurrentSuffixEndIndex]; for(CurrentSuffixStartIndex = CurrentSuffixEndIndex - UnresolvedSuffixes; CurrentSuffixStartIndex <= CurrentSuffixEndIndex; CurrentSuffixStartIndex++) { var wasImplicitlyAdded = !AddNextSuffix(); Console.WriteLine(); Console.WriteLine(RenderTree()); if(wasImplicitlyAdded) { UnresolvedSuffixes++; break; } if(UnresolvedSuffixes > 0) UnresolvedSuffixes--; } } } private bool AddNextSuffix() { var suffix = string.Concat(Word.Substring(CurrentSuffixStartIndex, CurrentSuffixEndIndex - CurrentSuffixStartIndex), "{", Word[CurrentSuffixEndIndex], "}"); Console.WriteLine("The next suffix of '{0}' to add is '{1}' at indices {2},{3}", Word, suffix, CurrentSuffixStartIndex, CurrentSuffixEndIndex); Console.WriteLine(" => ActiveNode: {0}", ActiveNode); Console.WriteLine(" => ActiveEdge: {0}", ActiveEdge == null ? "none" : ActiveEdge.ToString()); Console.WriteLine(" => DistanceIntoActiveEdge: {0}", DistanceIntoActiveEdge); Console.WriteLine(" => UnresolvedSuffixes: {0}", UnresolvedSuffixes); if(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) throw new Exception("BOUNDARY EXCEEDED"); if(ActiveEdge != null) return AddCurrentSuffixToActiveEdge(); if(GetExistingEdgeAndSetAsActive()) return false; ActiveNode.AddNewEdge(); return true; } private bool GetExistingEdgeAndSetAsActive() { Edge edge; if(ActiveNode.Edges.TryGetValue(LastCharacterOfCurrentSuffix, out edge)) { Console.WriteLine("Existing edge for {0} starting with '{1}' found. Values adjusted to:", ActiveNode, LastCharacterOfCurrentSuffix); ActiveEdge = edge; DistanceIntoActiveEdge = 1; NormalizeActivePointIfNowAtEdgeBoundary(); Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge); Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); Console.WriteLine(" => UnresolvedSuffixes is now: {0}", UnresolvedSuffixes); return true; } Console.WriteLine("Existing edge for {0} starting with '{1}' not found", ActiveNode, LastCharacterOfCurrentSuffix); return false; } private bool AddCurrentSuffixToActiveEdge() { var nextCharacterOnEdge = Word[ActiveEdge.StartIndex + DistanceIntoActiveEdge]; if(nextCharacterOnEdge == LastCharacterOfCurrentSuffix) { Console.WriteLine("The next character on the current edge is '{0}' (suffix added implicitly)", LastCharacterOfCurrentSuffix); DistanceIntoActiveEdge++; Console.WriteLine(" => DistanceIntoActiveEdge is now: {0}", DistanceIntoActiveEdge); NormalizeActivePointIfNowAtEdgeBoundary(); return false; } SplitActiveEdge(); ActiveEdge.Tail.AddNewEdge(); if(ReferenceEquals(ActiveNode, RootNode)) { DistanceIntoActiveEdge--; ActiveEdge = DistanceIntoActiveEdge == 0 ? null : ActiveNode.Edges[Word[CurrentSuffixStartIndex + 1]]; NormalizeActivePointIfNowAtEdgeBoundary(); } else UpdateActivePointToLinkedNodeOrRoot(); return true; } private void NormalizeActivePointIfNowAtEdgeBoundary() { if(ActiveEdge != null && DistanceIntoActiveEdge == ActiveEdge.Length) { Console.WriteLine("Active point is now at edge boundary and will be reset to the tail node of the active edge"); DistanceIntoActiveEdge = 0; ActiveNode = ActiveEdge.Tail ?? RootNode; ActiveEdge = null; } } private void SplitActiveEdge() { ActiveEdge = ActiveEdge.SplitAtIndex(ActiveEdge.StartIndex + DistanceIntoActiveEdge); Console.WriteLine(" => ActiveEdge is now: {0}", ActiveEdge); if(LastCreatedNodeInCurrentIteration != null) { LastCreatedNodeInCurrentIteration.LinkedNode = ActiveEdge.Tail; Console.WriteLine(" => Connected {0} to {1}", LastCreatedNodeInCurrentIteration, ActiveEdge.Tail); } LastCreatedNodeInCurrentIteration = ActiveEdge.Tail; } private void UpdateActivePointToLinkedNodeOrRoot() { Console.WriteLine("The linked node for active node {0} is {1}", ActiveNode, ActiveNode.LinkedNode == null ? "[null]" : ActiveNode.LinkedNode.ToString()); if(ActiveNode.LinkedNode != null) { ActiveNode = ActiveNode.LinkedNode; Console.WriteLine(" => ActiveNode is now: {0}", ActiveNode); } else { ActiveNode = RootNode; } var firstIndexOfOriginalActiveEdge = ActiveEdge.StartIndex; ActiveEdge = ActiveNode.Edges[Word[ActiveEdge.StartIndex]]; NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(firstIndexOfOriginalActiveEdge); } private void NormalizeActivePointIfNowAtOrBeyondEdgeBoundary(int firstIndexOfOriginalActiveEdge) { var walkDistance = 0; while(ActiveEdge != null && DistanceIntoActiveEdge >= ActiveEdge.Length) { Console.WriteLine("Active point is now at or beyond edge boundary and will be moved until it falls inside an edge boundary"); DistanceIntoActiveEdge -= ActiveEdge.Length; ActiveNode = ActiveEdge.Tail; if(DistanceIntoActiveEdge == 0) ActiveEdge = null; else { walkDistance += ActiveEdge.Length; var c = Word[firstIndexOfOriginalActiveEdge + walkDistance]; ActiveEdge = ActiveNode.Edges[c]; } } } public string RenderTree() { var writer = new StringWriter(); RootNode.RenderTree(writer, ""); return writer.ToString(); } public class Edge { private readonly SuffixTree _tree; public Edge(SuffixTree tree, Node head) { _tree = tree; Head = head; StartIndex = tree.CurrentSuffixEndIndex; } public Node Head { get; private set; } public Node Tail { get; private set; } public int StartIndex { get; private set; } public int? EndIndex { get; set; } public int Length { get { return (EndIndex ?? _tree.Word.Length - 1) - StartIndex + 1; } } public Edge SplitAtIndex(int index) { Console.WriteLine("Splitting edge {0} at index {1} ('{2}')", this, index, _tree.Word[index]); var newEdge = new Edge(_tree, Head); var newNode = new Node(_tree); newEdge.Tail = newNode; newEdge.StartIndex = StartIndex; newEdge.EndIndex = index - 1; Head = newNode; StartIndex = index; newNode.Edges.Add(_tree.Word[StartIndex], this); newEdge.Head.Edges[_tree.Word[newEdge.StartIndex]] = newEdge; Console.WriteLine(" => Hierarchy is now: {0} --> {1} --> {2} --> {3}", newEdge.Head, newEdge, newNode, this); return newEdge; } public override string ToString() { return string.Concat(_tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1), "(", StartIndex, ",", EndIndex.HasValue ? EndIndex.ToString() : "#", ")"); } public void RenderTree(TextWriter writer, string prefix, int maxEdgeLength) { var strEdge = _tree.Word.Substring(StartIndex, (EndIndex ?? _tree.CurrentSuffixEndIndex) - StartIndex + 1); writer.Write(strEdge); if(Tail == null) writer.WriteLine(); else { var line = new string(RenderChars.HorizontalLine, maxEdgeLength - strEdge.Length + 1); writer.Write(line); Tail.RenderTree(writer, string.Concat(prefix, new string(' ', strEdge.Length + line.Length))); } } } public class Node { private readonly SuffixTree _tree; public Node(SuffixTree tree) { _tree = tree; Edges = new Dictionary<char, Edge>(); NodeNumber = _tree.NextNodeNumber++; } public Dictionary<char, Edge> Edges { get; private set; } public Node LinkedNode { get; set; } public int NodeNumber { get; private set; } public void AddNewEdge() { Console.WriteLine("Adding new edge to {0}", this); var edge = new Edge(_tree, this); Edges.Add(_tree.Word[_tree.CurrentSuffixEndIndex], edge); Console.WriteLine(" => {0} --> {1}", this, edge); } public void RenderTree(TextWriter writer, string prefix) { var strNode = string.Concat("(", NodeNumber.ToString(new string('0', _tree.NextNodeNumber.ToString().Length)), ")"); writer.Write(strNode); var edges = Edges.Select(kvp => kvp.Value).OrderBy(e => _tree.Word[e.StartIndex]).ToArray(); if(edges.Any()) { var prefixWithNodePadding = prefix + new string(' ', strNode.Length); var maxEdgeLength = edges.Max(e => (e.EndIndex ?? _tree.CurrentSuffixEndIndex) - e.StartIndex + 1); for(var i = 0; i < edges.Length; i++) { char connector, extender = ' '; if(i == 0) { if(edges.Length > 1) { connector = RenderChars.TJunctionDown; extender = RenderChars.VerticalLine; } else connector = RenderChars.HorizontalLine; } else { writer.Write(prefixWithNodePadding); if(i == edges.Length - 1) connector = RenderChars.CornerRight; else { connector = RenderChars.TJunctionRight; extender = RenderChars.VerticalLine; } } writer.Write(string.Concat(connector, RenderChars.HorizontalLine)); var newPrefix = string.Concat(prefixWithNodePadding, extender, ' '); edges[i].RenderTree(writer, newPrefix, maxEdgeLength); } } } public override string ToString() { return string.Concat("node #", NodeNumber); } } public static class RenderChars { public const char TJunctionDown = '┬'; public const char HorizontalLine = '─'; public const char VerticalLine = '│'; public const char TJunctionRight = '├'; public const char CornerRight = '└'; } } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,4 +1,4 @@ static void Main() { var tree = SuffixTree.Create("abcabxabcd"); } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,156 +1,256 @@ === ITERATION 0 === The next suffix of 'abcabxabcd' to add is '{a}' at indices 0,0 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' not found Adding new edge to node #0 => node #0 --> a(0,#) (0)──a === ITERATION 1 === The next suffix of 'abcabxabcd' to add is '{b}' at indices 1,1 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'b' not found Adding new edge to node #0 => node #0 --> b(1,#) (0)┬─ab └─b === ITERATION 2 === The next suffix of 'abcabxabcd' to add is '{c}' at indices 2,2 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'c' not found Adding new edge to node #0 => node #0 --> c(2,#) (0)┬─abc ├─bc └─c === ITERATION 3 === The next suffix of 'abcabxabcd' to add is '{a}' at indices 3,3 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: abca(0,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─abca ├─bca └─ca === ITERATION 4 === The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 3,4 => ActiveNode: node #0 => ActiveEdge: abcab(0,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'b' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 (0)┬─abcab ├─bcab └─cab === ITERATION 5 === The next suffix of 'abcabxabcd' to add is 'ab{x}' at indices 3,5 => ActiveNode: node #0 => ActiveEdge: abcabx(0,#) => DistanceIntoActiveEdge: 2 => UnresolvedSuffixes: 2 Splitting edge abcabx(0,#) at index 2 ('c') => Hierarchy is now: node #0 --> ab(0,1) --> node #1 --> cabx(2,#) => ActiveEdge is now: ab(0,1) Adding new edge to node #1 => node #1 --> x(5,#) (0)┬─ab────(1)┬─cabx │ └─x ├─bcabx └─cabx The next suffix of 'abcabxabcd' to add is 'b{x}' at indices 4,5 => ActiveNode: node #0 => ActiveEdge: bcabx(1,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge bcabx(1,#) at index 2 ('c') => Hierarchy is now: node #0 --> b(1,1) --> node #2 --> cabx(2,#) => ActiveEdge is now: b(1,1) => Connected node #1 to node #2 Adding new edge to node #2 => node #2 --> x(5,#) (0)┬─ab───(1)┬─cabx │ └─x ├─b────(2)┬─cabx │ └─x └─cabx The next suffix of 'abcabxabcd' to add is '{x}' at indices 5,5 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'x' not found Adding new edge to node #0 => node #0 --> x(5,#) (0)┬─ab───(1)┬─cabx │ └─x ├─b────(2)┬─cabx │ └─x ├─cabx └─x === ITERATION 6 === The next suffix of 'abcabxabcd' to add is '{a}' at indices 6,6 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'a' found. Values adjusted to: => ActiveEdge is now: ab(0,1) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 0 (0)┬─ab────(1)┬─cabxa │ └─xa ├─b─────(2)┬─cabxa │ └─xa ├─cabxa └─xa === ITERATION 7 === The next suffix of 'abcabxabcd' to add is 'a{b}' at indices 6,7 => ActiveNode: node #0 => ActiveEdge: ab(0,1) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 The next character on the current edge is 'b' (suffix added implicitly) => DistanceIntoActiveEdge is now: 2 Active point is now at edge boundary and will be reset to the tail node of the active edge (0)┬─ab─────(1)┬─cabxab │ └─xab ├─b──────(2)┬─cabxab │ └─xab ├─cabxab └─xab === ITERATION 8 === The next suffix of 'abcabxabcd' to add is 'ab{c}' at indices 6,8 => ActiveNode: node #1 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 2 Existing edge for node #1 starting with 'c' found. Values adjusted to: => ActiveEdge is now: cabxabc(2,#) => DistanceIntoActiveEdge is now: 1 => UnresolvedSuffixes is now: 2 (0)┬─ab──────(1)┬─cabxabc │ └─xabc ├─b───────(2)┬─cabxabc │ └─xabc ├─cabxabc └─xabc === ITERATION 9 === The next suffix of 'abcabxabcd' to add is 'abc{d}' at indices 6,9 => ActiveNode: node #1 => ActiveEdge: cabxabcd(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 3 Splitting edge cabxabcd(2,#) at index 3 ('a') => Hierarchy is now: node #1 --> c(2,2) --> node #3 --> abxabcd(3,#) => ActiveEdge is now: c(2,2) Adding new edge to node #3 => node #3 --> d(9,#) The linked node for active node node #1 is node #2 => ActiveNode is now: node #2 (0)┬─ab───────(1)┬─c─────(3)┬─abxabcd │ │ └─d │ └─xabcd ├─b────────(2)┬─cabxabcd │ └─xabcd ├─cabxabcd └─xabcd The next suffix of 'abcabxabcd' to add is 'bc{d}' at indices 7,9 => ActiveNode: node #2 => ActiveEdge: cabxabcd(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 2 Splitting edge cabxabcd(2,#) at index 3 ('a') => Hierarchy is now: node #2 --> c(2,2) --> node #4 --> abxabcd(3,#) => ActiveEdge is now: c(2,2) => Connected node #3 to node #4 Adding new edge to node #4 => node #4 --> d(9,#) The linked node for active node node #2 is [null] (0)┬─ab───────(1)┬─c─────(3)┬─abxabcd │ │ └─d │ └─xabcd ├─b────────(2)┬─c─────(4)┬─abxabcd │ │ └─d │ └─xabcd ├─cabxabcd └─xabcd The next suffix of 'abcabxabcd' to add is 'c{d}' at indices 8,9 => ActiveNode: node #0 => ActiveEdge: cabxabcd(2,#) => DistanceIntoActiveEdge: 1 => UnresolvedSuffixes: 1 Splitting edge cabxabcd(2,#) at index 3 ('a') => Hierarchy is now: node #0 --> c(2,2) --> node #5 --> abxabcd(3,#) => ActiveEdge is now: c(2,2) => Connected node #4 to node #5 Adding new edge to node #5 => node #5 --> d(9,#) (0)┬─ab────(1)┬─c─────(3)┬─abxabcd │ │ └─d │ └─xabcd ├─b─────(2)┬─c─────(4)┬─abxabcd │ │ └─d │ └─xabcd ├─c─────(5)┬─abxabcd │ └─d └─xabcd The next suffix of 'abcabxabcd' to add is '{d}' at indices 9,9 => ActiveNode: node #0 => ActiveEdge: none => DistanceIntoActiveEdge: 0 => UnresolvedSuffixes: 0 Existing edge for node #0 starting with 'd' not found Adding new edge to node #0 => node #0 --> d(9,#) (0)┬─ab────(1)┬─c─────(3)┬─abxabcd │ │ └─d │ └─xabcd ├─b─────(2)┬─c─────(4)┬─abxabcd │ │ └─d │ └─xabcd ├─c─────(5)┬─abxabcd │ └─d ├─d └─xabcd -
axefrog revised this gist
Apr 13, 2012 . 3 changed files with 1 addition and 1 deletion.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -3,7 +3,7 @@ using System.IO; using System.Linq; namespace StringAlgorithms { public class SuffixTree { File renamed without changes.File renamed without changes. -
axefrog created this gist
Apr 13, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,156 @@ Adding: 'A' Remainder is 1 The active point is currently: {Node 0,'\0',0} Inserting singular edge for 'A' from node 0 0 + - A Adding: 'B' Remainder is 1 The active point is currently: {Node 0,'\0',0} Inserting singular edge for 'B' from node 0 0 + - AB + - B Adding: 'C' Remainder is 1 The active point is currently: {Node 0,'\0',0} Inserting singular edge for 'C' from node 0 0 + - ABC + - BC + - C Adding: 'A' Remainder is 1 The active point is currently: {Node 0,'\0',0} Found 'A' on edge [0:'ABCA']; updating active point. 0 + - ABCA + - BCA + - CA Adding: 'B' Remainder is 2 The active point is currently: {Node 0,'a',1} Looking for edge starting with 'A' Found implicit match in edge [0:'ABCAB'] 0 + - ABCAB + - BCAB + - CAB Adding: 'X' Remainder is 3 The active point is currently: {Node 0,'a',2} Looking for edge starting with 'A' Splitting edge [0:'ABCABX'] --> [0:'AB'] => 1 => [1:'CABX'] / [1:'X'] 0 + - AB --- 1 + - CABX | + - X + - BCABX + - CABX The active point is currently: {Node 0,'b',1} Looking for edge starting with 'B' Splitting edge [0:'BCABX'] --> [0:'B'] => 2 => [2:'CABX'] / [2:'X'] Connected node 1 ---> node 2 0 + - AB -- 1 + - CABX | + - X + - B --- 2 + - CABX | + - X + - CABX The active point is currently: {Node 0,'\0',0} Inserting singular edge for 'X' from node 0 0 + - AB -- 1 + - CABX | + - X + - B --- 2 + - CABX | + - X + - CABX + - X Adding: 'A' Remainder is 1 The active point is currently: {Node 0,'\0',0} Found 'A' on edge [0:'AB']; updating active point. 0 + - AB --- 1 + - CABXA | + - XA + - B ---- 2 + - CABXA | + - XA + - CABXA + - XA Adding: 'B' Remainder is 2 The active point is currently: {Node 0,'a',1} Looking for edge starting with 'A' Found implicit match in edge [0:'AB'] Match is at end of edge; active point will be moved to the connecting node. 0 + - AB ---- 1 + - CABXAB | + - XAB + - B ----- 2 + - CABXAB | + - XAB + - CABXAB + - XAB Adding: 'C' Remainder is 3 The active point is currently: {Node 1,'\0',0} Found 'C' on edge [1:'CABXABC']; updating active point. 0 + - AB ----- 1 + - CABXABC | + - XABC + - B ------ 2 + - CABXABC | + - XABC + - CABXABC + - XABC Adding: 'D' Remainder is 4 The active point is currently: {Node 1,'c',1} Looking for edge starting with 'C' Splitting edge [1:'CABXABCD'] --> [1:'C'] => 3 => [3:'ABXABCD'] / [3:'D'] Changed the active node from 1 to 2 0 + - AB ------ 1 + - C ---- 3 + - ABXABCD | + - D | + - XABCD + - B ------- 2 + - CABXABCD | + - XABCD + - CABXABCD + - XABCD The active point is currently: {Node 2,'c',1} Looking for edge starting with 'C' Splitting edge [2:'CABXABCD'] --> [2:'C'] => 4 => [4:'ABXABCD'] / [4:'D'] Connected node 3 ---> node 4 Changed the active node from 2 to 0 0 + - AB ------ 1 + - C ---- 3 + - ABXABCD | + - D | + - XABCD + - B ------- 2 + - C ---- 4 + - ABXABCD | + - D | + - XABCD + - CABXABCD + - XABCD The active point is currently: {Node 0,'c',1} Looking for edge starting with 'C' Splitting edge [0:'CABXABCD'] --> [0:'C'] => 5 => [5:'ABXABCD'] / [5:'D'] Connected node 4 ---> node 5 0 + - AB --- 1 + - C ---- 3 + - ABXABCD | + - D | + - XABCD + - B ---- 2 + - C ---- 4 + - ABXABCD | + - D | + - XABCD + - C ---- 5 + - ABXABCD | + - D + - XABCD The active point is currently: {Node 0,'\0',0} Inserting singular edge for 'D' from node 0 0 + - AB --- 1 + - C ---- 3 + - ABXABCD | + - D | + - XABCD + - B ---- 2 + - C ---- 4 + - ABXABCD | + - D | + - XABCD + - C ---- 5 + - ABXABCD | + - D + - XABCD + - D This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,223 @@ using System; using System.Collections.Generic; using System.IO; using System.Linq; namespace Shiny { public class SuffixTree { // http://stackoverflow.com/questions/9452701/can-someone-please-explain-ukkonens-suffix-tree-algorithm-in-plain-english private ActivePoint _activePoint; public string Word { get; set; } public Node RootNode { get; set; } public int InsertionPoint { get; set; } public int NextNodeNumber { get; set; } public SuffixTree(string word) { Word = word; RootNode = new Node { NodeNumber = NextNodeNumber++ }; Build(); } private void Build() { _activePoint = new ActivePoint { ActiveNode = RootNode, ActiveEdgeStartsWith = '\0', ActiveLength = 0 }; var remainder = 1; InsertionPoint = 0; for(InsertionPoint = 0; InsertionPoint < Word.Length; InsertionPoint++) { var c = Word[InsertionPoint]; Console.WriteLine("Adding: '" + c.ToString().ToUpper() + "'"); var suffixesToInsert = remainder; Node previousNode = null; var implicitComplete = false; Console.WriteLine("Remainder is " + remainder); for(var i = 0; i < suffixesToInsert && !implicitComplete; i++) { Console.WriteLine("The active point is currently: " + _activePoint); Edge edge; if(_activePoint.ActiveEdgeStartsWith != '\0') { Console.WriteLine("Looking for edge starting with '" + _activePoint.ActiveEdgeStartsWith.ToString().ToUpper() + "'"); edge = _activePoint.ActiveNode.Edges[_activePoint.ActiveEdgeStartsWith]; if(Word[edge.StartIndex + _activePoint.ActiveLength] == c) { Console.WriteLine("Found implicit match in edge " + edge); _activePoint.ActiveLength++; remainder++; if(_activePoint.ActiveLength == edge.Length) { Console.WriteLine("Match is at end of edge; active point will be moved to the connecting node."); _activePoint.ActiveNode = edge.Tail; _activePoint.ActiveEdgeStartsWith = '\0'; _activePoint.ActiveLength = 0; } implicitComplete = true; } else { Console.Write("Splitting edge " + edge + " --> "); edge.EndIndex = edge.StartIndex + _activePoint.ActiveLength - 1; edge.Tail = new Node { Stem = edge, NodeNumber = NextNodeNumber++ }; var subdivisionStartIndex = edge.EndIndex.Value + 1; var subdividedStartChar = Word[subdivisionStartIndex]; edge.Tail.Edges.Add(subdividedStartChar, new Edge(this) { StartIndex = subdivisionStartIndex, Head = edge.Tail }); edge.Tail.Edges.Add(c, new Edge(this) { StartIndex = InsertionPoint, Head = edge.Tail }); Console.WriteLine(edge + " => " + edge.Tail.NodeNumber + " => " + edge.Tail.Edges.First().Value + " / " + edge.Tail.Edges.Last().Value); if(i > 0 && previousNode != null) { Console.WriteLine("Connected node " + previousNode.NodeNumber + " ---> node " + edge.Tail.NodeNumber); previousNode.LinkedNode = edge.Tail; } previousNode = edge.Tail; remainder--; if(_activePoint.ActiveNode.NodeNumber > 0) { Console.Write("Changed the active node from " + _activePoint.ActiveNode.NodeNumber + " to "); _activePoint.ActiveNode = _activePoint.ActiveNode.LinkedNode ?? RootNode; Console.WriteLine(_activePoint.ActiveNode.NodeNumber); } else { _activePoint.ActiveLength--; _activePoint.ActiveEdgeStartsWith = _activePoint.ActiveLength == 0 ? '\0' : Word[InsertionPoint - _activePoint.ActiveLength]; } } } else if(_activePoint.ActiveNode.Edges.TryGetValue(c, out edge)) { Console.WriteLine("Found '" + c.ToString().ToUpper() + "' on edge " + edge + "; updating active point."); _activePoint.ActiveEdgeStartsWith = c; _activePoint.ActiveLength = 1; remainder++; implicitComplete = true; } else { Console.WriteLine("Inserting singular edge for '" + c.ToString().ToUpper() + "' from node " + _activePoint.ActiveNode.NodeNumber); edge = new Edge(this) { StartIndex = InsertionPoint, Head = _activePoint.ActiveNode }; _activePoint.ActiveNode.Edges.Add(c, edge); } Console.WriteLine(WriteText()); } } } public string WriteText() { var writer = new StringWriter(); RootNode.WriteText(writer, 0); return writer.ToString(); } } public class Node { public Dictionary<char, Edge> Edges { get; private set; } public Node LinkedNode { get; set; } public Edge Stem { get; set; } public int NodeNumber { get; set; } public Node() { Edges = new Dictionary<char, Edge>(); } public void WriteText(TextWriter writer, int indent) { writer.Write(NodeNumber); var i = 0; var maxEdgeLength = Edges.Values.Max(e => (e.EndIndex ?? e.Tree.InsertionPoint) - e.StartIndex); foreach(var edge in Edges.Values) { if(i++ == 0) writer.Write(" +"); else { writer.Write(" "); if(indent > 0) { writer.Write("|"); writer.Write(new string(' ', indent - 1)); } writer.Write("+"); } edge.WriteText(writer, indent + 3, maxEdgeLength); } } public int Depth { get { return Stem == null ? 0 : Stem.Length + Stem.Head.Depth; } } public override string ToString() { return "Node " + NodeNumber; } } public class Edge { public Edge(SuffixTree tree) { Tree = tree; } public SuffixTree Tree { get; set; } public Node Head { get; set; } public Node Tail { get; set; } public int StartIndex { get; set; } public int? EndIndex { get; set; } public override string ToString() { return string.Concat("[", Head.NodeNumber, ":'", Tree.Word.Substring(StartIndex, Length).ToUpper(), "']"); } public int Length { get { return (EndIndex ?? Math.Min(Tree.Word.Length - 1, Tree.InsertionPoint)) - StartIndex + 1; } } public void WriteText(TextWriter writer, int indent, int maxEdgeLength) { writer.Write(" - "); var str = Tree.Word.Substring(StartIndex, Length).ToUpper(); writer.Write(str); if(Tail == null) writer.WriteLine(""); else { writer.Write(" "); writer.Write(new string('-', 1 + maxEdgeLength - str.Length)); writer.Write(" "); Tail.WriteText(writer, indent + maxEdgeLength + 6); } } } public class ActivePoint { public Node ActiveNode { get; set; } public char ActiveEdgeStartsWith { get; set; } public int ActiveLength { get; set; } public override string ToString() { return string.Concat("{", ActiveNode, ",'", ActiveEdgeStartsWith == '\0' ? "\\0" : ActiveEdgeStartsWith.ToString(), "',", ActiveLength, "}"); } } } This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,4 @@ static void Main() { var tree = new SuffixTree("abcabxabcd"); }