All Downloads are FREE. Search and download functionalities are using the official Maven repository.

tri.util.poi.WordDocUtils.kt Maven / Gradle / Ivy

/*-
 * #%L
 * tri.promptfx:promptkt
 * %%
 * Copyright (C) 2023 - 2025 Johns Hopkins University Applied Physics Laboratory
 * %%
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * 
 *      http://www.apache.org/licenses/LICENSE-2.0
 * 
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 * #L%
 */
package tri.util.poi

import org.apache.poi.hwpf.extractor.WordExtractor
import org.apache.poi.xwpf.extractor.XWPFWordExtractor
import org.apache.poi.xwpf.usermodel.XWPFDocument
import java.io.File

/** Utilities for working with Word documents. */
object WordDocUtils {

    /** Read text from a DOC file. */
    fun readDoc(file: File) = WordExtractor(file.inputStream()).use { it.text }
    /** Read metadata from a DOC file. */
    fun readDocMetadata(file: File) = WordExtractor(file.inputStream()).use {
        it.summaryInformation.let {
            mapOf(
                "doc.title" to it.title,
                "doc.author" to it.author,
                "doc.subject" to it.subject,
                "doc.keywords" to it.keywords,
                "doc.comments" to it.comments,
                "doc.template" to it.template,
                "doc.lastAuthor" to it.lastAuthor,
                "doc.revNumber" to it.revNumber,
                "doc.createTime" to it.createDateTime,
                "doc.editTime" to it.editTime
            )
        }
    }

    /** Read text from a DOCX file. */
    fun readDocx(file: File) = XWPFWordExtractor(XWPFDocument(file.inputStream())).use { it.text }
    /** Read metadata from a DOCX file. */
    fun readDocxMetadata(file: File) = XWPFDocument(file.inputStream()).use {
        it.properties.coreProperties.let {
            mapOf(
                "docx.title" to it.title,
                "docx.author" to it.creator,
                "docx.subject" to it.subject,
                "docx.category" to it.category,
                "docx.keywords" to it.keywords,
                "docx.description" to it.description,
                "docx.created" to it.created,
                "docx.modified" to it.modified,
                "docx.modifiedBy" to it.lastModifiedByUser,
                "docx.contentStatus" to it.contentStatus,
                "docx.contentType" to it.contentType,
                "docx.version" to it.version,
                "docx.revision" to it.revision,
            )
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy