All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.dankito.text.extraction.pdf.PdfBoxPdfMetadataExtractor.kt Maven / Gradle / Ivy

package net.dankito.text.extraction.pdf

import net.dankito.text.extraction.model.Metadata
import org.apache.pdfbox.pdmodel.PDDocument
import org.slf4j.LoggerFactory
import java.io.File


open class PdfBoxPdfMetadataExtractor : IPdfMetadataExtractor {

    companion object {
        private val log = LoggerFactory.getLogger(PdfBoxPdfMetadataExtractor::class.java)
    }


    override fun extractMetadata(file: File): Metadata? {
        PDDocument.load(file).use { document ->
            return extractMetadata(document, file)
        }
    }

    open fun extractMetadata(document: PDDocument, file: File): Metadata? {
        try {
            val info = document.documentInformation

            val title = info.title ?: ""
            val author = info.author ?: ""
            val keywords = info.keywords

            return Metadata(title, author, document.numberOfPages, keywords = keywords)
        } catch (e: Exception) {
            log.error("Could not extract metadata of file $file", e)
        }

        return null
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy