io.quarkus.tika.runtime.TikaConfiguration Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of quarkus-tika Show documentation
Show all versions of quarkus-tika Show documentation
Extract data from your documents with Apache Tika
package io.quarkus.tika.runtime;
import java.util.Map;
import java.util.Optional;
import io.quarkus.runtime.annotations.ConfigItem;
import io.quarkus.runtime.annotations.ConfigPhase;
import io.quarkus.runtime.annotations.ConfigRoot;
/**
* Tika parser configuration
*/
@ConfigRoot(phase = ConfigPhase.BUILD_AND_RUN_TIME_FIXED)
public class TikaConfiguration {
/**
* The resource path within the application artifact to the {@code tika-config.xml} file.
*/
@ConfigItem
public Optional tikaConfigPath;
/**
* Comma separated list of the parsers which must be supported.
*
* Most of the document formats recognized by Apache Tika are supported by default but it affects
* the application memory and native executable sizes. One can list only the required parsers in
* {@code tika-config.xml} to minimize a number of parsers loaded into the memory, but using this
* property is recommended to achieve both optimizations.
*
* Either the abbreviated or full parser class names can be used.
* Only PDF and OpenDocument format parsers can be listed using the reserved 'pdf' and 'odf' abbreviations.
* Custom class name abbreviations have to be used for all other parsers.
* For example:
*
*
* // Only PDF parser is required:
* quarkus.tika.parsers = pdf
* // Only PDF and OpenDocument parsers are required:
* quarkus.tika.parsers = pdf,odf
*
*
* This property will have no effect if the `tikaConfigPath' property has been set.
*/
@ConfigItem
public Optional parsers;
/**
* Configuration of the individual parsers.
* For example:
*
*
* quarkus.tika.parsers = pdf,odf
* quarkus.tika.parser-options.pdf.sort-by-position = true
*/
@ConfigItem
public Map> parserOptions;
/**
* Full parser class name for a given parser abbreviation.
* For example:
*
*
* quarkus.tika.parsers = classparser
* quarkus.tika.parser.classparser = org.apache.tika.parser.asm.ClassParser
*/
@ConfigItem
public Map parser;
/**
* Controls how the content of the embedded documents is parsed.
* By default it is appended to the master document content.
* Setting this property to false makes the content of each of the embedded documents
* available separately.
*/
@ConfigItem(defaultValue = "true")
public boolean appendEmbeddedContent;
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy