Created
January 3, 2019 06:07
-
-
Save Deepayan137/d3e3dc73286c9265af6bcd9d07b7cff6 to your computer and use it in GitHub Desktop.
CA and WA
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import sys | |
import os | |
import tempfile | |
import subprocess | |
import pdb | |
import pandas as pd | |
import numpy as np | |
from collections import defaultdict | |
from ocr.baselines.base_config import * | |
def calculate_word_accuracy(**kwargs): | |
""" Calculates the word level accuracy of the OCR result using corrected result as ground truth. """ | |
path = kwargs['path'] | |
acc = kwargs['accuracy'] | |
files = list(map(lambda f: path+'/' + f, os.listdir(path))) | |
def clean(base_name): | |
base_name = base_name.split('.')[0] | |
return base_name + '_ocr.txt' | |
count = 0 | |
ch_acc = defaultdict(float) | |
for file_ in files: | |
# pdb.set_trace() | |
if '_ocr' not in file_: | |
gt_file = file_ | |
pr_file = clean(file_) | |
count+=1 | |
print(count) | |
try: | |
cmd = ['ocr-evaluation-tools/dist/bin/ocrevalutf8.fix', '{}'.format(acc), '{}'.format(gt_file), '{}'.format(pr_file)] | |
process = subprocess.run(cmd, stdout=subprocess.PIPE) | |
accuracy = process.stdout.decode().splitlines()[4].strip().split()[0].replace('%', '') | |
ch_acc[file_] = float(accuracy) | |
except Exception as e: | |
print(e) | |
pass | |
df = pd.DataFrame(list(ch_acc.items()), columns=['file', 'accuracy']) | |
df.to_csv('ocr/stats/{}.csv'.format('rahul')) | |
print(np.mean(list(ch_acc.values()))) | |
def main(**kwargs): | |
opt = Config() | |
opt._parse(kwargs) | |
dir_ = opt.path | |
accuracy = opt.accuracy | |
calculate_word_accuracy(path=dir_, | |
accuracy=accuracy) | |
if __name__=='__main__': | |
import fire | |
fire.Fire(main) | |
# python -m ocr.baselines.evaluate --path=<path> |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment