Skip to content

Instantly share code, notes, and snippets.

@hanny24
Created February 8, 2014 13:45

Revisions

  1. hanny24 created this gist Feb 8, 2014.
    55 changes: 55 additions & 0 deletions String distances
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,55 @@
    function leven_damerau(a::AbstractVector, b::AbstractVector, width::Int, fill_fun)
    a_n = length(a)
    b_n = length(b)
    n = max(a_n,b_n)
    tmp = Array(Int,n+1,width)
    idx = [i for i in 1:width]

    for i in 1:n+1
    tmp[i, idx[width] - 1] = i-1
    for j in 1:width - 2
    tmp[i, j] = 0
    end
    end
    for i in 1:n
    tmp[1,idx[width]] = i
    for j in 2:n+1
    tmp[j,idx[width]] = fill_fun(i,j,idx,tmp)
    end
    idx = circshift(idx,width-1)
    end
    tmp[n+1, idx[width-1]]
    end

    function levenshtein(a::AbstractVector, b::AbstractVector)
    a_n = length(a)
    b_n = length(b)
    leven_damerau(a, b, 2, (i,j,idx,tmp) ->
    min(tmp[j-1,idx[2]] + 1, tmp[j,idx[1]] + 1,
    if i <= a_n && j-1 <= b_n && a[i] == b[j-1]
    tmp[j-1,idx[1]]
    else
    tmp[j-1,idx[1]] + 1
    end
    ))
    end

    function damerau(a::AbstractVector, b::AbstractVector)
    a_n = length(a)
    b_n = length(b)
    leven_damerau(a, b, 3, (i,j,idx,tmp) ->
    min(tmp[j-1,idx[3]] + 1, tmp[j,idx[2]] + 1,
    if i <= a_n && j-1 <= b_n && a[i] == b[j-1]
    tmp[j-1,idx[2]]
    else
    tmp[j-1,idx[2]] + 1
    end,
    if i > 1 && i <= a_n && j > 2 j <= b_n && a[i-1] == b[j] && a[i] == b[j-1]
    print (i,j)
    tmp[j-2, idx[1]] + 1
    else
    Inf
    end
    )
    )
    end