Created
July 9, 2025 01:53
-
-
Save hikaMaeng/eb431f5d47113447fbdb223f0b2b5146 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
package kore.ocr | |
import kore.vo.VO | |
import kore.vo.field.value.float | |
import kore.vo.field.value.int | |
import kore.vo.field.value.string | |
import kore.vo.field.voList | |
import net.sourceforge.tess4j.Tesseract | |
import java.awt.image.BufferedImage | |
import java.awt.image.ConvolveOp | |
import java.awt.image.Kernel | |
class Tess internal constructor(path:String, lang:String) { | |
class OCRList:VO(){ | |
var width by int(0) | |
var height by int(0) | |
val list by voList(::OCRWord){ | |
default{arrayListOf()} | |
} | |
} | |
class OCRWord:VO(){ | |
var index by int | |
var char by string | |
var x by int | |
var y by int | |
var width by int | |
var height by int | |
var data by string("") | |
var accuracy by float(0f) | |
} | |
enum class OCRLevel{ | |
BLOCK, PARA, TEXTLINE, WORD, SYMBOL | |
} | |
companion object{ | |
operator fun invoke(path:String, lang:String = "eng+kor"):Tess = Tess(path, lang) | |
fun preprocessImage(originalImage:BufferedImage):BufferedImage{ | |
val grayscale = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY) | |
val g = grayscale.createGraphics() | |
g.drawImage(originalImage, 0, 0, null) | |
g.dispose() | |
val sharpenKernel = floatArrayOf( | |
0f, -1f, 0f, | |
-1f, 5f, -1f, | |
0f, -1f, 0f | |
) | |
val kernel = Kernel(3, 3, sharpenKernel) | |
val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null) | |
val sharpened = convolveOp.filter(grayscale, null) | |
val binaryImage = otsuThreshold(sharpened) | |
return binaryImage | |
} | |
fun otsuThreshold(image:BufferedImage): BufferedImage { | |
require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) { | |
"이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다." | |
} | |
val width = image.width | |
val height = image.height | |
val histogram = IntArray(256) | |
val raster = image.raster | |
for (y in 0 until height) { | |
for (x in 0 until width) { | |
val pixel = raster.getSample(x, y, 0) | |
histogram[pixel]++ | |
} | |
} | |
val totalPixels = width * height | |
var sumAll = 0.0 | |
for (i in 0..255) { | |
sumAll += i * histogram[i] | |
} | |
var sumBackground = 0.0 | |
var wBackground = 0 | |
var wForeground: Int | |
var maxVariance = 0.0 | |
var threshold = 0 | |
for (t in 0..255) { | |
wBackground += histogram[t] | |
if (wBackground == 0) continue | |
wForeground = totalPixels - wBackground | |
if (wForeground == 0) break | |
sumBackground += (t * histogram[t]).toDouble() | |
val meanBackground = sumBackground / wBackground | |
val meanForeground = (sumAll - sumBackground) / wForeground | |
val betweenVariance = wBackground.toDouble() * wForeground.toDouble() * | |
(meanBackground - meanForeground) * (meanBackground - meanForeground) | |
if (betweenVariance > maxVariance) { | |
maxVariance = betweenVariance | |
threshold = t | |
} | |
} | |
val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY) | |
val binaryRaster = binary.raster | |
for (y in 0 until height) { | |
for (x in 0 until width) { | |
val pixel = raster.getSample(x, y, 0) | |
val newVal = if (pixel > threshold) 255 else 0 | |
binaryRaster.setSample(x, y, 0, newVal) | |
} | |
} | |
return binary | |
} | |
} | |
private val tess = Tesseract().also{ | |
it.setDatapath(path) | |
it.setLanguage(lang) | |
} | |
fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL):OCRList{ | |
var i = 0 | |
return tess.getWords(image, level.ordinal).fold(OCRList()){acc, it-> | |
val text = it.text.trim() | |
if(it.text.isNotEmpty()) { | |
val boundingBox = it.boundingBox | |
println("OCR: $text, $boundingBox") | |
acc.list.add(OCRWord().apply { | |
index = i++ | |
char = text | |
x = boundingBox.x | |
y = boundingBox.y | |
width = boundingBox.width | |
height = boundingBox.height | |
accuracy = it.confidence | |
}) | |
} | |
acc | |
} | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment