Created
March 2, 2021 17:34
-
-
Save Soumi7/f0d06804db2f3725387289f640b16598 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from flask import Flask, request, jsonify, render_template, url_for, make_response | |
from werkzeug.utils import secure_filename | |
import os | |
import json | |
import numpy as np | |
app = Flask(__name__) | |
app.config['UPLOAD_FOLDER'] = "./imgdir" | |
import numpy as np | |
import cv2 | |
import pandas as pd | |
import pytesseract | |
import io | |
from PIL import Image | |
@app.route('/') | |
def home(): | |
return render_template('index.html') | |
@app.route('/predict_api', methods=["GET","POST"]) | |
def list_post(): | |
file = request.files['file'] | |
npimg = np.fromfile(file, np.uint8) | |
img = cv2.imdecode(npimg, 0) | |
thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY) | |
img_bin = 255-img_bin | |
img_bin1 = 255-img | |
thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU) | |
img_bin2 = 255-img | |
thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) | |
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100)) | |
eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3) | |
vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3) | |
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1)) | |
horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5) | |
horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5) | |
vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0) | |
vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3) | |
thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines) | |
bitnot = cv2.bitwise_not(bitxor) | |
contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
boundingBoxes = [cv2.boundingRect(contour) for contour in contours] | |
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1])) | |
boxes = [] | |
for contour in contours: | |
x, y, w, h = cv2.boundingRect(contour) | |
if (w<1000 and h<500): | |
image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2) | |
boxes.append([x,y,w,h]) | |
rows=[] | |
columns=[] | |
heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))] | |
mean = np.mean(heights) | |
print(mean) | |
columns.append(boxes[0]) | |
previous=boxes[0] | |
for i in range(1,len(boxes)): | |
if(boxes[i][1]<=previous[1]+mean/2): | |
columns.append(boxes[i]) | |
previous=boxes[i] | |
if(i==len(boxes)-1): | |
rows.append(columns) | |
else: | |
rows.append(columns) | |
columns=[] | |
previous = boxes[i] | |
columns.append(boxes[i]) | |
# print("Rows") | |
for row in rows: | |
print(row) | |
total_cells=0 | |
for i in range(len(row)): | |
if len(row[i]) > total_cells: | |
total_cells = len(row[i]) | |
# print(total_cells) | |
center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]] | |
# print(center) | |
center=np.array(center) | |
center.sort() | |
# print(center) | |
boxes_list = [] | |
for i in range(len(rows)): | |
l=[] | |
for k in range(total_cells): | |
l.append([]) | |
for j in range(len(rows[i])): | |
diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4)) | |
minimum = min(diff) | |
indexing = list(diff).index(minimum) | |
l[indexing].append(rows[i][j]) | |
boxes_list.append(l) | |
# for box in boxes_list: | |
# print(box) | |
dataframe_final=[] | |
for i in range(len(boxes_list)): | |
for j in range(len(boxes_list[i])): | |
s='' | |
if(len(boxes_list[i][j])==0): | |
dataframe_final.append(' ') | |
else: | |
for k in range(len(boxes_list[i][j])): | |
y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3] | |
roi = bitnot[x:x+h, y:y+w] | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1)) | |
border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255]) | |
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) | |
dilation = cv2.dilate(resizing, kernel,iterations=1) | |
erosion = cv2.erode(dilation, kernel,iterations=2) | |
out = pytesseract.image_to_string(erosion) | |
if(len(out)==0): | |
out = pytesseract.image_to_string(erosion) | |
s = s +" "+ out | |
dataframe_final.append(s) | |
# print(dataframe_final) | |
arr = np.array(dataframe_final) | |
# print(arr) | |
dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells)) | |
data = dataframe.style.set_properties(align="left") | |
dataframe.to_csv("output.csv") | |
dataframe=pd.read_csv("output.csv") | |
res = {} | |
# dataframe = pd.DataFrame(res) | |
parsed = json.loads(dataframe.to_json(orient="split")) | |
s = json.dumps(parsed, indent=4) | |
print(s) | |
return jsonify(result= json.dumps(parsed, indent=4)) | |
@app.route('/',methods=['POST','GET']) | |
def predict(): | |
#for HTML GUI rendering | |
file = request.files['file'] | |
print(file.filename) | |
if file.filename.split(".")[-1]!="png": | |
return render_template('Wrong_file_type.html') | |
npimg = np.fromfile(file, np.uint8) | |
img = cv2.imdecode(npimg, 0) | |
# f.save(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename))) | |
# img = cv2.imread(os.path.join(app.config['UPLOAD_FOLDER'], secure_filename(f.filename))) | |
# img = cv2.imdecode(img, 0) | |
thresh,img_bin = cv2.threshold(img,128,255,cv2.THRESH_BINARY) | |
img_bin = 255-img_bin | |
img_bin1 = 255-img | |
thresh1,img_bin1_otsu = cv2.threshold(img_bin1,128,255,cv2.THRESH_OTSU) | |
img_bin2 = 255-img | |
thresh1,img_bin_otsu = cv2.threshold(img_bin2,128,255,cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 2)) | |
vertical_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, np.array(img).shape[1]//100)) | |
eroded_image = cv2.erode(img_bin_otsu, vertical_kernel, iterations=3) | |
vertical_lines = cv2.dilate(eroded_image, vertical_kernel, iterations=3) | |
hor_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (np.array(img).shape[1]//100, 1)) | |
horizontal_lines = cv2.erode(img_bin, hor_kernel, iterations=5) | |
horizontal_lines = cv2.dilate(horizontal_lines, hor_kernel, iterations=5) | |
vertical_horizontal_lines = cv2.addWeighted(vertical_lines, 0.5, horizontal_lines, 0.5, 0.0) | |
vertical_horizontal_lines = cv2.erode(~vertical_horizontal_lines, kernel, iterations=3) | |
thresh, vertical_horizontal_lines = cv2.threshold(vertical_horizontal_lines,128,255, cv2.THRESH_BINARY | cv2.THRESH_OTSU) | |
bitxor = cv2.bitwise_xor(img,vertical_horizontal_lines) | |
bitnot = cv2.bitwise_not(bitxor) | |
contours, hierarchy = cv2.findContours(vertical_horizontal_lines, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE) | |
boundingBoxes = [cv2.boundingRect(contour) for contour in contours] | |
(contours, boundingBoxes) = zip(*sorted(zip(contours, boundingBoxes),key=lambda x:x[1][1])) | |
boxes = [] | |
for contour in contours: | |
x, y, w, h = cv2.boundingRect(contour) | |
if (w<1000 and h<500): | |
image = cv2.rectangle(img,(x,y),(x+w,y+h),(0,255,0),2) | |
boxes.append([x,y,w,h]) | |
rows=[] | |
columns=[] | |
heights = [boundingBoxes[i][3] for i in range(len(boundingBoxes))] | |
mean = np.mean(heights) | |
print(mean) | |
columns.append(boxes[0]) | |
previous=boxes[0] | |
for i in range(1,len(boxes)): | |
if(boxes[i][1]<=previous[1]+mean/2): | |
columns.append(boxes[i]) | |
previous=boxes[i] | |
if(i==len(boxes)-1): | |
rows.append(columns) | |
else: | |
rows.append(columns) | |
columns=[] | |
previous = boxes[i] | |
columns.append(boxes[i]) | |
# print("Rows") | |
for row in rows: | |
print(row) | |
total_cells=0 | |
for i in range(len(row)): | |
if len(row[i]) > total_cells: | |
total_cells = len(row[i]) | |
# print(total_cells) | |
center = [int(rows[i][j][0]+rows[i][j][2]/2) for j in range(len(rows[i])) if rows[0]] | |
# print(center) | |
center=np.array(center) | |
center.sort() | |
# print(center) | |
boxes_list = [] | |
for i in range(len(rows)): | |
l=[] | |
for k in range(total_cells): | |
l.append([]) | |
for j in range(len(rows[i])): | |
diff = abs(center-(rows[i][j][0]+rows[i][j][2]/4)) | |
minimum = min(diff) | |
indexing = list(diff).index(minimum) | |
l[indexing].append(rows[i][j]) | |
boxes_list.append(l) | |
# for box in boxes_list: | |
# print(box) | |
dataframe_final=[] | |
for i in range(len(boxes_list)): | |
for j in range(len(boxes_list[i])): | |
s='' | |
if(len(boxes_list[i][j])==0): | |
dataframe_final.append(' ') | |
else: | |
for k in range(len(boxes_list[i][j])): | |
y,x,w,h = boxes_list[i][j][k][0],boxes_list[i][j][k][1], boxes_list[i][j][k][2],boxes_list[i][j][k][3] | |
roi = bitnot[x:x+h, y:y+w] | |
kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (2, 1)) | |
border = cv2.copyMakeBorder(roi,2,2,2,2, cv2.BORDER_CONSTANT,value=[255,255]) | |
resizing = cv2.resize(border, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC) | |
dilation = cv2.dilate(resizing, kernel,iterations=1) | |
erosion = cv2.erode(dilation, kernel,iterations=2) | |
out = pytesseract.image_to_string(erosion) | |
if(len(out)==0): | |
out = pytesseract.image_to_string(erosion) | |
s = s +" "+ out | |
dataframe_final.append(s) | |
# print(dataframe_final) | |
arr = np.array(dataframe_final) | |
# print(arr) | |
dataframe = pd.DataFrame(arr.reshape(len(rows), total_cells)) | |
data = dataframe.style.set_properties(align="left") | |
resp = make_response(dataframe.to_csv()) | |
resp.headers["Content-Disposition"] = "attachment; filename=table.csv" | |
resp.headers["Content-Type"] = "text/csv" | |
return resp | |
if __name__ == '__main__': | |
app.run(host='0.0.0.0',port=5000) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment