All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.hildan.ocr.reference.UniqueImageStore.kt Maven / Gradle / Ivy

package org.hildan.ocr.reference

import org.hildan.ocr.toByteArray
import java.awt.image.BufferedImage
import java.nio.file.Path
import java.security.MessageDigest
import java.util.*
import javax.imageio.ImageIO
import kotlin.collections.HashMap
import kotlin.io.path.createDirectories
import kotlin.io.path.listDirectoryEntries
import kotlin.io.path.readBytes
import kotlin.io.path.writeBytes

/**
 * Stores images in the given [imagesDir], detecting and preventing duplicates.
 *
 * The [imageFormat] is used to write [BufferedImage]s to files, and for the file extension of the images.
 * It can be any format in [ImageIO.getWriterFormatNames] (at least "png", "jpeg", "gif", "bmp").
 */
class UniqueImageStore(
    private val imagesDir: Path,
    private val imageFormat: String = "png",
) {
    init {
        imagesDir.createDirectories()
    }

    private val imagesPathByHash = imagesDir.listDirectoryEntries()
        .groupBy { it.hash() }
        .mapValuesTo(HashMap()) { (_, list) -> list.toMutableList() }

    /**
     * Ensures the given [image] is saved in this store and returns its [Path].
     *
     * If an identical image already exists, the existing file is kept and its path is returned.
     * If not, the given image is written as a new file and its path is returned.
     */
    fun saveOrGetPath(image: BufferedImage): Path = saveOrGetPath(image.toByteArray(imageFormat))

    /**
     * Ensures the given image is saved in this store and returns its [Path].
     * The [imageBytes] must be provided in the correct [imageFormat] for this store.
     *
     * If an identical image already exists, the existing file is kept and its path is returned.
     * If not, the given image is written as a new file and its path is returned.
     */
    fun saveOrGetPath(imageBytes: ByteArray): Path {
        val hash = imageBytes.hash()
        return findExistingImage(imageBytes, hash) ?: writeImage(imageBytes, hash)
    }

    private fun findExistingImage(imageBytes: ByteArray, hash: String): Path? {
        val similarImages = imagesPathByHash[hash]
        return similarImages?.firstOrNull { it.readBytes().contentEquals(imageBytes) }
    }

    private fun writeImage(imageBytes: ByteArray, hash: String): Path {
        val path = imagesDir.resolve("${UUID.randomUUID()}.$imageFormat")
        path.writeBytes(imageBytes)
        imagesPathByHash.getOrPut(hash) { mutableListOf() }.add(path)
        return path
    }
}

private fun Path.hash(): String = readBytes().hash()

private fun ByteArray.hash(): String = sha256().encodeBase64()

private fun ByteArray.sha256(): ByteArray = MessageDigest.getInstance("SHA-256").digest(this)

private fun ByteArray.encodeBase64(): String = Base64.getEncoder().encodeToString(this)




© 2015 - 2025 Weber Informatics LLC | Privacy Policy