hikaMaeng · July 9, 2025 01:53
diff --git a/Tess.kt b/Tess.kt
 package kore.ocr

 import kore.vo.VO
 import kore.vo.field.value.float
 import kore.vo.field.value.int
 import kore.vo.field.value.string
 import kore.vo.field.voList
 import net.sourceforge.tess4j.Tesseract
 import java.awt.image.BufferedImage
 import java.awt.image.ConvolveOp
 import java.awt.image.Kernel

 class Tess internal constructor(path:String, lang:String) {
    class OCRList:VO(){
        var width by int(0)
        var height by int(0)
        val list by voList(::OCRWord){
            default{arrayListOf()}
        }
    }
    class OCRWord:VO(){
        var index by int
        var char by string
        var x by int
        var y by int
        var width by int
        var height by int
        var data by string("")
        var accuracy by float(0f)
    }
    enum class OCRLevel{
        BLOCK, PARA, TEXTLINE, WORD, SYMBOL
    }
    companion object{
        operator fun invoke(path:String, lang:String = "eng+kor"):Tess = Tess(path, lang)
        fun preprocessImage(originalImage:BufferedImage):BufferedImage{
            val grayscale = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
            val g = grayscale.createGraphics()
            g.drawImage(originalImage, 0, 0, null)
            g.dispose()
            val sharpenKernel = floatArrayOf(
                0f,  -1f,  0f,
                -1f,  5f, -1f,
                0f,  -1f,  0f
            )
            val kernel = Kernel(3, 3, sharpenKernel)
            val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
            val sharpened = convolveOp.filter(grayscale, null)
            val binaryImage = otsuThreshold(sharpened)
            return binaryImage
        }
        fun otsuThreshold(image:BufferedImage): BufferedImage {
            require(image.type == BufferedImage.TYPE_BYTE_GRAY || image.type == BufferedImage.TYPE_BYTE_BINARY) {
                "이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
            }
            val width = image.width
            val height = image.height
            val histogram = IntArray(256)
            val raster = image.raster
            for (y in 0 until height) {
                for (x in 0 until width) {
                    val pixel = raster.getSample(x, y, 0)
                    histogram[pixel]++
                }
            }
            val totalPixels = width * height
            var sumAll = 0.0
            for (i in 0..255) {
                sumAll += i * histogram[i]
            }
            var sumBackground = 0.0
            var wBackground = 0
            var wForeground: Int
            var maxVariance = 0.0
            var threshold = 0

            for (t in 0..255) {
                wBackground += histogram[t]
                if (wBackground == 0) continue
                wForeground = totalPixels - wBackground
                if (wForeground == 0) break

                sumBackground += (t * histogram[t]).toDouble()
                val meanBackground = sumBackground / wBackground
                val meanForeground = (sumAll - sumBackground) / wForeground
                val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
                        (meanBackground - meanForeground) * (meanBackground - meanForeground)
                if (betweenVariance > maxVariance) {
                    maxVariance = betweenVariance
                    threshold = t
                }
            }
            val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
            val binaryRaster = binary.raster
            for (y in 0 until height) {
                for (x in 0 until width) {
                    val pixel = raster.getSample(x, y, 0)
                    val newVal = if (pixel > threshold) 255 else 0
                    binaryRaster.setSample(x, y, 0, newVal)
                }
            }
            return binary
        }
    }


    private val tess = Tesseract().also{
        it.setDatapath(path)
        it.setLanguage(lang)
    }
    fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL):OCRList{
        var i = 0
        return tess.getWords(image, level.ordinal).fold(OCRList()){acc, it->
            val text = it.text.trim()
            if(it.text.isNotEmpty()) {
                val boundingBox = it.boundingBox
                println("OCR: $text, $boundingBox")
                acc.list.add(OCRWord().apply {
                    index = i++
                    char = text
                    x = boundingBox.x
                    y = boundingBox.y
                    width = boundingBox.width
                    height = boundingBox.height
                    accuracy = it.confidence
                })
            }
            acc
        }
    }
 }
	package kore.ocr

	import kore.vo.VO
	import kore.vo.field.value.float
	import kore.vo.field.value.int
	import kore.vo.field.value.string
	import kore.vo.field.voList
	import net.sourceforge.tess4j.Tesseract
	import java.awt.image.BufferedImage
	import java.awt.image.ConvolveOp
	import java.awt.image.Kernel

	class Tess internal constructor(path:String, lang:String) {
	class OCRList:VO(){
	var width by int(0)
	var height by int(0)
	val list by voList(::OCRWord){
	default{arrayListOf()}
	}
	}
	class OCRWord:VO(){
	var index by int
	var char by string
	var x by int
	var y by int
	var width by int
	var height by int
	var data by string("")
	var accuracy by float(0f)
	}
	enum class OCRLevel{
	BLOCK, PARA, TEXTLINE, WORD, SYMBOL
	}
	companion object{
	operator fun invoke(path:String, lang:String = "eng+kor"):Tess = Tess(path, lang)
	fun preprocessImage(originalImage:BufferedImage):BufferedImage{
	val grayscale = BufferedImage(originalImage.width, originalImage.height, BufferedImage.TYPE_BYTE_GRAY)
	val g = grayscale.createGraphics()
	g.drawImage(originalImage, 0, 0, null)
	g.dispose()
	val sharpenKernel = floatArrayOf(
	0f, -1f, 0f,
	-1f, 5f, -1f,
	0f, -1f, 0f
	)
	val kernel = Kernel(3, 3, sharpenKernel)
	val convolveOp = ConvolveOp(kernel, ConvolveOp.EDGE_NO_OP, null)
	val sharpened = convolveOp.filter(grayscale, null)
	val binaryImage = otsuThreshold(sharpened)
	return binaryImage
	}
	fun otsuThreshold(image:BufferedImage): BufferedImage {
	require(image.type == BufferedImage.TYPE_BYTE_GRAY \|\| image.type == BufferedImage.TYPE_BYTE_BINARY) {
	"이미지는 TYPE_BYTE_GRAY 또는 TYPE_BYTE_BINARY 형식이어야 합니다."
	}
	val width = image.width
	val height = image.height
	val histogram = IntArray(256)
	val raster = image.raster
	for (y in 0 until height) {
	for (x in 0 until width) {
	val pixel = raster.getSample(x, y, 0)
	histogram[pixel]++
	}
	}
	val totalPixels = width * height
	var sumAll = 0.0
	for (i in 0..255) {
	sumAll += i * histogram[i]
	}
	var sumBackground = 0.0
	var wBackground = 0
	var wForeground: Int
	var maxVariance = 0.0
	var threshold = 0

	for (t in 0..255) {
	wBackground += histogram[t]
	if (wBackground == 0) continue
	wForeground = totalPixels - wBackground
	if (wForeground == 0) break

	sumBackground += (t * histogram[t]).toDouble()
	val meanBackground = sumBackground / wBackground
	val meanForeground = (sumAll - sumBackground) / wForeground
	val betweenVariance = wBackground.toDouble() * wForeground.toDouble() *
	(meanBackground - meanForeground) * (meanBackground - meanForeground)
	if (betweenVariance > maxVariance) {
	maxVariance = betweenVariance
	threshold = t
	}
	}
	val binary = BufferedImage(width, height, BufferedImage.TYPE_BYTE_BINARY)
	val binaryRaster = binary.raster
	for (y in 0 until height) {
	for (x in 0 until width) {
	val pixel = raster.getSample(x, y, 0)
	val newVal = if (pixel > threshold) 255 else 0
	binaryRaster.setSample(x, y, 0, newVal)
	}
	}
	return binary
	}
	}


	private val tess = Tesseract().also{
	it.setDatapath(path)
	it.setLanguage(lang)
	}
	fun extractCharacters(image:BufferedImage, level:OCRLevel = OCRLevel.SYMBOL):OCRList{
	var i = 0
	return tess.getWords(image, level.ordinal).fold(OCRList()){acc, it->
	val text = it.text.trim()
	if(it.text.isNotEmpty()) {
	val boundingBox = it.boundingBox
	println("OCR: $text, $boundingBox")
	acc.list.add(OCRWord().apply {
	index = i++
	char = text
	x = boundingBox.x
	y = boundingBox.y
	width = boundingBox.width
	height = boundingBox.height
	accuracy = it.confidence
	})
	}
	acc
	}
	}
	}