Last active
July 28, 2021 16:14
-
-
Save Chlumsky/459df0599c41e85fc7ad78443d036028 to your computer and use it in GitHub Desktop.
Modified Levenshtein distance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#pragma once | |
#include <algorithm> | |
#include <vector> | |
template <typename T> struct NeverEmptyComparator { bool operator==(const T &) const { return false; } }; | |
/// Computes the Levenshtein distance between a and b but insertion/deletion of elements equal to EmptyComparator() is free | |
template <typename T, class EmptyComparator = NeverEmptyComparator<T> > | |
int levenshtein(const std::vector<T> &a, const std::vector<T> &b) { | |
size_t n = b.size()+1, m = 1, k = 0; | |
while (m <= n) | |
m <<= 1; | |
std::vector<int> d(m--); | |
d[k] = 0; | |
for (size_t i = 0; i < b.size(); ++i) | |
++k, d[k] = d[k-1]+!(EmptyComparator() == b[i]); | |
for (size_t i = 0; i < a.size(); ++i) { | |
int aEmpty = !(EmptyComparator() == a[i]); | |
++k, d[k&m] = d[(k-n)&m]+aEmpty; | |
for (size_t j = 0; j < b.size(); ++j) { | |
++k, d[k&m] = std::min(std::min( | |
d[(k-1)&m]+!(EmptyComparator() == b[j]), | |
d[(k-n)&m]+aEmpty | |
), | |
d[(k-n-1)&m]+!(a[i] == b[j]) | |
); | |
} | |
} | |
return d[k&m]; | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment