-
-
Save MrAch26/5e2aa7e73b508f8ba9133d468efa4348 to your computer and use it in GitHub Desktop.
from PIL import Image | |
from scipy.ndimage import gaussian_filter | |
import numpy | |
import pytesseract | |
from PIL import ImageFilter | |
def solve_captcha(filename): | |
# thresold1 on the first stage | |
th1 = 140 | |
th2 = 140 # threshold after blurring | |
sig = 1.5 # the blurring sigma | |
from scipy import ndimage | |
original = Image.open(filename) | |
original.save("original.png") # reading the image from the request | |
black_and_white = original.convert("L") # converting to black and white | |
black_and_white.save("black_and_white.png") | |
first_threshold = black_and_white.point(lambda p: p > th1 and 255) | |
first_threshold.save("first_threshold.png") | |
blur = numpy.array(first_threshold) # create an image array | |
blurred = gaussian_filter(blur, sigma=sig) | |
blurred = Image.fromarray(blurred) | |
blurred.save("blurred.png") | |
final = blurred.point(lambda p: p > th2 and 255) | |
final = final.filter(ImageFilter.EDGE_ENHANCE_MORE) | |
final = final.filter(ImageFilter.SHARPEN) | |
final.save("final.png") | |
number = pytesseract.image_to_string(Image.open('final.png'), lang='eng', | |
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789').strip() | |
print("RESULT OF CAPTCHA:") | |
print(number) | |
print("===================") | |
return number |
@arunpurshotam
That’s because you have a setting on line 29 that specifically says : tessedit_char_whitelist=[0123456789](tel:0123456789)')
you should change or remove that
@AbdulMobinFata
Change line 29 to this :
result = pytesseract.image_to_string(Image.open('final.png'), lang='eng', config='--psm 10 --oem 3')
How do I use my own captcha to test this?
@MikeyD-rbg
Can you send more examples of your captcha and with better resolution ?
this snippet is ment for number as mention on line 28 tessedit_char_whitelist=0123456789
@Manedi send more examples
will try
@MikeyD-rbg Can you send more examples of your captcha and with better resolution ?
this snippet is ment for number as mention on line 28
tessedit_char_whitelist=0123456789
I've sent you 10 more examples. unfortunately all the captchas are from a site and all this size.
How many more examples would you like?
I've manged to solve one of the captchas but the z is not capitalized, how do I ensure all thr letters are capitals?
Added this line but still lowercase z;
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZ'
I have got good results with this script. The image preprocessing works really good and is the key to improve the code. Thanks @MrAch26!
@MrAch26 I've now downloaded over 200k captchas 😂 unfortunately all the same resolution, how many should I upload?
Didn't work for attached image
