Created
February 19, 2024 23:23
-
-
Save EDDxample/085a7d1fe01dd1ec1c70a40b8c44331f to your computer and use it in GitHub Desktop.
Applies OCR to the rect defined by the resizable window that pops up. (Press 'd' to trigger OCR)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# pip install pygame pywin32 pytesseract | |
# tesseract binary: https://digi.bib.uni-mannheim.de/tesseract/ | |
from queue import Queue | |
from threading import Thread | |
import pygame | |
import win32api | |
import win32con | |
import win32gui | |
from PIL import Image, ImageGrab | |
from pytesseract import image_to_string, pytesseract | |
def main(): | |
pytesseract.tesseract_cmd = r"C:/Program Files/Tesseract-OCR/tesseract.exe" | |
queue = Queue() | |
Thread(target=ocr_thread, args=(queue,), daemon=True).start() | |
# init "transparent" window | |
pygame.init() | |
color_key = (255, 0, 128) | |
window_handler, surface = init_screen(color_key) | |
i = 0 | |
done = False | |
while not done: | |
for event in pygame.event.get(): | |
if event.type == pygame.QUIT: | |
done = True | |
elif event.type == pygame.KEYDOWN and event.unicode == "d": | |
# NOTE: the position of the rect is based on the Display1 | |
box = win32gui.GetWindowRect(window_handler) | |
filename = f"screenshot_{i}.png" | |
ImageGrab.grab(box).save(filename) | |
queue.put(filename) | |
i += 1 | |
surface.fill(color_key) | |
pygame.display.update() | |
def init_screen(color_key: tuple[int, int, int]) -> tuple[int, pygame.Surface]: | |
screen = pygame.display.set_mode((800, 600), pygame.RESIZABLE) | |
window_handler = pygame.display.get_wm_info()["window"] | |
win32gui.SetWindowLong( | |
window_handler, | |
win32con.GWL_EXSTYLE, | |
win32gui.GetWindowLong(window_handler, win32con.GWL_EXSTYLE) | |
| win32con.WS_EX_LAYERED, | |
) | |
win32gui.SetLayeredWindowAttributes( | |
window_handler, win32api.RGB(*color_key), 0, win32con.LWA_COLORKEY | |
) | |
return window_handler, screen | |
def ocr_thread(queue: Queue[str]): | |
while screenshot := queue.get(): | |
text = image_to_string(Image.open(screenshot), lang="jpn") | |
print(f"------------\n{text}") | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment