Skip to content

Instantly share code, notes, and snippets.

@hikaMaeng
Created July 9, 2025 01:53
Show Gist options
  • Save hikaMaeng/eb431f5d47113447fbdb223f0b2b5146 to your computer and use it in GitHub Desktop.
Save hikaMaeng/eb431f5d47113447fbdb223f0b2b5146 to your computer and use it in GitHub Desktop.
package kore.ocr
import kore.vo.VO
import kore.vo.field.value.float
import kore.vo.field.value.int
import kore.vo.field.value.string
import kore.vo.field.voList
import net.sourceforge.tess4j.Tesseract
import java.awt.image.BufferedImage
import java.awt.image.ConvolveOp
import java.awt.image.Kernel
class Tess internal constructor(path:String, lang:String) {
class OCRList:VO(){
var width by int(0)
var height by int(0)
val list by voList(::OCRWord){
default{arrayListOf()}
}
}
class OCRWord:VO(){
var index by int
var char by string
var x by int
var y by int
var width by int
var height by int
var data by string("")
var accuracy by float(0f)
}
enum class OCRLevel{
BLOCK, PARA, TEXTLINE, WORD, SYMBOL
}
companion object{
operator fun invoke(path:String, lang:String = "eng+kor"):Tess = Tess(path, lang)
fun preprocessImage(originalImage:BufferedImage):BufferedImage{
val grayscale = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
val g = grayscale.createGraphics()
g.drawImage(originalImage, 0, 0, null)
g.dispose()
val sharpenKernel = floatArrayOf(
0f, -1f, 0f,
-1f, 5f, -1f,
0f, -1f, 0f
)
val kernel = Kernel(3, 3, sharpenKernel)
val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
val sharpened = convolveOp.filter(grayscale, null)
val binaryImage = otsuThreshold(sharpened)
return binaryImage
}
fun otsuThreshold(image:BufferedImage): BufferedImage {
require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) {
"이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
}
val width = image.width
val height = image.height
val histogram = IntArray(256)
val raster = image.raster
for (y in 0 until height) {
for (x in 0 until width) {
val pixel = raster.getSample(x, y, 0)
histogram[pixel]++
}
}
val totalPixels = width * height
var sumAll = 0.0
for (i in 0..255) {
sumAll += i * histogram[i]
}
var sumBackground = 0.0
var wBackground = 0
var wForeground: Int
var maxVariance = 0.0
var threshold = 0
for (t in 0..255) {
wBackground += histogram[t]
if (wBackground == 0) continue
wForeground = totalPixels - wBackground
if (wForeground == 0) break
sumBackground += (t * histogram[t]).toDouble()
val meanBackground = sumBackground / wBackground
val meanForeground = (sumAll - sumBackground) / wForeground
val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
(meanBackground - meanForeground) * (meanBackground - meanForeground)
if (betweenVariance > maxVariance) {
maxVariance = betweenVariance
threshold = t
}
}
val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
val binaryRaster = binary.raster
for (y in 0 until height) {
for (x in 0 until width) {
val pixel = raster.getSample(x, y, 0)
val newVal = if (pixel > threshold) 255 else 0
binaryRaster.setSample(x, y, 0, newVal)
}
}
return binary
}
}
private val tess = Tesseract().also{
it.setDatapath(path)
it.setLanguage(lang)
}
fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL):OCRList{
var i = 0
return tess.getWords(image, level.ordinal).fold(OCRList()){acc, it->
val text = it.text.trim()
if(it.text.isNotEmpty()) {
val boundingBox = it.boundingBox
println("OCR: $text, $boundingBox")
acc.list.add(OCRWord().apply {
index = i++
char = text
x = boundingBox.x
y = boundingBox.y
width = boundingBox.width
height = boundingBox.height
accuracy = it.confidence
})
}
acc
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment