Created
April 10, 2016 06:43
-
-
Save matanox/ba788225081a6d013763ef9b1c0840ed to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
object WeightedWagnerFischerTest extends App { | |
/* currently tests for the default weights */ | |
val externalLibraryImpl = new info.debatty.java.stringsimilarity.Levenshtein | |
val random = new scala.util.Random(333) // pin down random seed | |
def randomString(length: Int) = random.alphanumeric.take(length).mkString | |
def randomPair(length: Int, maxTouches: Int): (String, String) = { | |
val string1 = randomString(length).toCharArray | |
var string2 = string1.clone | |
val touches = random.nextInt(maxTouches) | |
for (touch <- 0 to touches) { | |
val touchPoint = random.nextInt(string2.length) | |
string2 = random.nextInt(3) match { | |
case touchKind: Int if touchKind == 0 => // substitution | |
string2(touchPoint) = random.alphanumeric.take(1).toString.head | |
string2 | |
case touchKind: Int if touchKind == 1 => // addition | |
val (head, tail) = string1.splitAt(touchPoint) | |
head ++ random.alphanumeric.take(1).mkString ++ tail | |
case touchKind: Int if touchKind == 2 => // deletion | |
val (head, tail) = string1.splitAt(touchPoint) | |
head ++ tail.drop(1) | |
} | |
} | |
(string1.mkString, string2.mkString) | |
} | |
def assertedEditDistance(pair: (String, String)) = { | |
val externalLibraryResult = externalLibraryImpl.distance(pair._1, pair._2) | |
val weightedWagnerFischer = new WeightedWagnerFischer(DefaultCosts) | |
val computed = weightedWagnerFischer.distance(pair._1, pair._2) | |
assert( | |
computed == externalLibraryImpl.distance(pair._1, pair._2), | |
s"Computed Levenshtein distance is: $computed, but should be $externalLibraryResult according to external library, for $pair" | |
) | |
computed | |
} | |
val testPairs = Seq.fill(100)(randomPair(10,5)) | |
testPairs map (pair => Seq(pair._1, pair._2, assertedEditDistance(pair))) | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment