All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.dankito.text.extraction.pdf.pdfimagesImagesFromPdfExtractor.kt Maven / Gradle / Ivy

Go to download

A framework for extracting text from different types of files, e.g. PDFs, images, office documents, text files, ...

There is a newer version: 0.6.0
Show newest version
package net.dankito.text.extraction.pdf

import net.dankito.text.extraction.model.ExtractedImages
import net.dankito.utils.process.*
import java.io.File


open class pdfimagesImagesFromPdfExtractor(
    protected val commandExecutor: ICommandExecutor = CommandExecutor()
) : IImagesFromPdfExtractor {

    protected val commandlineProgram = CommandlineProgram("pdfimages", commandExecutor)

    override val isAvailable: Boolean
        get() = commandlineProgram.isAvailable


    override fun extractImages(pdfFile: File): ExtractedImages {
        val tmpDir = createTempImagesDestinationDirectory(pdfFile)

        val config = createCommandConfig(pdfFile, tmpDir)

        val result = commandExecutor.executeCommand(config)

        return mapResult(result, tmpDir)
    }

    override suspend fun extractImagesSuspendable(pdfFile: File): ExtractedImages {
        val tmpDir = createTempImagesDestinationDirectory(pdfFile)

        val config = createCommandConfig(pdfFile, tmpDir)

        val result = commandExecutor.executeCommandSuspendable(config)

        return mapResult(result, tmpDir)
    }


    protected open fun createTempImagesDestinationDirectory(pdfFile: File): File {
        val tmpDir = createTempDir("ExtractImagesFrom${pdfFile.nameWithoutExtension}", "")
        tmpDir.deleteOnExit()

        return tmpDir
    }

    protected open fun createCommandConfig(pdfFile: File, tmpDir: File): CommandConfig {
        val commandArgs = listOf(
            commandlineProgram.programExecutablePath,
            "-p", // add page number to file name
            "-tiff", // change the default output format to TIFF
            pdfFile.absolutePath,
            File(tmpDir, pdfFile.nameWithoutExtension).absolutePath
        )

        return CommandConfig(commandArgs)
    }

    protected open fun mapResult(result: ExecuteCommandResult, tmpDir: File): ExtractedImages {
        val extractedImages = tmpDir.listFiles().sortedBy { it.lastModified() }

        extractedImages.forEach { it.deleteOnExit() }

        if (result.successful == false) {
            return ExtractedImages(listOf(), Exception(result.errors))
        }

        return ExtractedImages(extractedImages)
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy