Created
December 4, 2022 08:36
-
-
Save MrAch26/5e2aa7e73b508f8ba9133d468efa4348 to your computer and use it in GitHub Desktop.
Captcha Solver with python
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from PIL import Image | |
from scipy.ndimage import gaussian_filter | |
import numpy | |
import pytesseract | |
from PIL import ImageFilter | |
def solve_captcha(filename): | |
# thresold1 on the first stage | |
th1 = 140 | |
th2 = 140 # threshold after blurring | |
sig = 1.5 # the blurring sigma | |
from scipy import ndimage | |
original = Image.open(filename) | |
original.save("original.png") # reading the image from the request | |
black_and_white = original.convert("L") # converting to black and white | |
black_and_white.save("black_and_white.png") | |
first_threshold = black_and_white.point(lambda p: p > th1 and 255) | |
first_threshold.save("first_threshold.png") | |
blur = numpy.array(first_threshold) # create an image array | |
blurred = gaussian_filter(blur, sigma=sig) | |
blurred = Image.fromarray(blurred) | |
blurred.save("blurred.png") | |
final = blurred.point(lambda p: p > th2 and 255) | |
final = final.filter(ImageFilter.EDGE_ENHANCE_MORE) | |
final = final.filter(ImageFilter.SHARPEN) | |
final.save("final.png") | |
number = pytesseract.image_to_string(Image.open('final.png'), lang='eng', | |
config='--psm 10 --oem 3 -c tessedit_char_whitelist=0123456789').strip() | |
print("RESULT OF CAPTCHA:") | |
print(number) | |
print("===================") | |
return number |
I have got good results with this script. The image preprocessing works really good and is the key to improve the code. Thanks @MrAch26!
@MrAch26 I've now downloaded over 200k captchas 😂 unfortunately all the same resolution, how many should I upload?
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
It gives the result Captured CAPTCHA: 2Y2z7