Last active
January 6, 2021 05:52
-
-
Save xylcbd/effb388b185321a313fb72442c8912ee to your computer and use it in GitHub Desktop.
calc cr & ar, for ocr benchmark.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def custom_min(data): | |
assert len(data) >= 1 | |
if len(data) <= 0: | |
return data,0 | |
min_idx = 0 | |
min_data = data[min_idx] | |
for i,item in enumerate(data): | |
if item < min_data: | |
min_data = item | |
min_idx = i | |
return min_data,min_idx | |
def wer(r, h): | |
# initialisation | |
import numpy | |
d = numpy.zeros((len(r)+1)*(len(h)+1), dtype=numpy.uint8) | |
d = d.reshape((len(r)+1, len(h)+1)) | |
for i in range(len(r)+1): | |
for j in range(len(h)+1): | |
if i == 0: | |
d[0][j] = j | |
elif j == 0: | |
d[i][0] = i | |
# computation | |
for i in range(1, len(r)+1): | |
for j in range(1, len(h)+1): | |
if r[i-1] == h[j-1]: | |
d[i][j] = d[i-1][j-1] | |
else: | |
substitution = d[i-1][j-1] + 1 | |
insertion = d[i][j-1] + 1 | |
deletion = d[i-1][j] + 1 | |
d[i][j],min_idx = custom_min([substitution, insertion, deletion]) | |
dist = d[len(r)][len(h)] | |
#reverse track | |
#subsitution, insertion, deletion, total | |
details_count = [0,0,0,0] | |
min_y = len(r) | |
min_x = len(h) | |
min_value = d[min_y][min_x] | |
while True: | |
substitution = d[min_y-1][min_x-1] | |
insertion = d[min_y][min_x-1] | |
deletion = d[min_y-1][min_x] | |
cur_min_value,min_idx = custom_min([substitution, insertion, deletion]) | |
details_count[-1] += 1 | |
if min_idx == 0: | |
min_y -= 1 | |
min_x -= 1 | |
if cur_min_value < min_value: | |
details_count[0] += 1 | |
elif min_idx == 1: | |
min_x -= 1 | |
if cur_min_value < min_value: | |
details_count[2] += 1 | |
elif min_idx == 2: | |
min_y -= 1 | |
if cur_min_value < min_value: | |
details_count[1] += 1 | |
min_value = cur_min_value | |
if min_x < 0 or min_y < 0: | |
break | |
return dist,details_count[0],details_count[1],details_count[2] | |
def cr_ar(subsitution, insertion, deletion, golden_len): | |
cr = float(golden_len - deletion - subsitution) / float(golden_len) | |
ar = float(golden_len - deletion - subsitution - insertion) / float(golden_len) | |
#can be negative | |
return cr,ar | |
def score(golden, predicts): | |
golden_len = len(golden) | |
_, subsitution, insertion, deletion = wer(golden, predicts) | |
return cr_ar(subsitution, insertion, deletion, golden_len) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment