org.apache.jackrabbit.oak.plugins.tika.TikaCommandOptions Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.tika;
import java.io.File;
import java.util.Set;
import org.apache.jackrabbit.guava.common.collect.ImmutableSet;
import joptsimple.OptionParser;
import joptsimple.OptionSet;
import joptsimple.OptionSpec;
import org.apache.jackrabbit.oak.run.cli.OptionsBean;
import org.apache.jackrabbit.oak.run.cli.OptionsBeanFactory;
public class TikaCommandOptions implements OptionsBean {
public static final String NAME = "tika";
public static final OptionsBeanFactory FACTORY = TikaCommandOptions::new;
private final OptionSpec pathOpt;
private final OptionSpec dataFileSpecOpt;
private final OptionSpec tikaConfigSpecOpt;
private final OptionSpec storeDirSpecOpt;
private final OptionSpec indexDirSpecOpt;
private final OptionSpec poolSizeOpt;
private final OptionSpec reportAction;
private final OptionSpec generateAction;
private final OptionSpec populateAction;
private final OptionSpec extractAction;
private final Set operationNames;
private OptionSet options;
public TikaCommandOptions(OptionParser parser) {
pathOpt = parser
.accepts("path", "Path in repository under which the binaries would be searched")
.withRequiredArg()
.ofType(String.class)
.defaultsTo("/");
dataFileSpecOpt = parser
.accepts("data-file", "Data file in csv format containing the binary metadata")
.withRequiredArg()
.ofType(File.class)
.defaultsTo(new File("oak-binary-stats.csv"));
tikaConfigSpecOpt = parser
.accepts("tika-config", "Tika config file path")
.withRequiredArg()
.ofType(File.class);
storeDirSpecOpt = parser
.accepts("store-path", "Path of directory used to store extracted text content")
.withRequiredArg()
.ofType(File.class);
indexDirSpecOpt = parser
.accepts("index-dir", "Path of directory which stores lucene index containing extracted data")
.withRequiredArg()
.ofType(File.class);
poolSizeOpt = parser
.accepts("pool-size", "Size of the thread pool used to perform text extraction. Defaults " +
"to number of cores on the system")
.withRequiredArg()
.ofType(Integer.class);
reportAction = parser.accepts("report", "Generates a summary report based on the csv file");
generateAction = parser.accepts("generate", "Generates the CSV file required for 'extract' and 'report' actions");
populateAction = parser.accepts("populate", "Populates extraction store based on supplied indexed data and csv file");
extractAction = parser.accepts("extract", "Performs the text extraction based on the csv file");
operationNames = ImmutableSet.of("report", "generate", "populate", "extract");
}
@Override
public void configure(OptionSet options) {
this.options = options;
}
@Override
public String title() {
return "";
}
@Override
public String description() {
return "The tika command supports following operations. All operations connect to repository in read only mode. \n" +
"Use of one of the supported actions like --report, --generate, --populate, --extract etc. ";
}
@Override
public int order() {
return 50;
}
@Override
public Set operationNames() {
return operationNames;
}
public String getPath() {
return pathOpt.value(options);
}
public File getDataFile() {
return dataFileSpecOpt.value(options);
}
public File getTikaConfig() {
return tikaConfigSpecOpt.value(options);
}
public File getStoreDir() {
return storeDirSpecOpt.value(options);
}
public File getIndexDir() {
return indexDirSpecOpt.value(options);
}
public boolean isPoolSizeDefined() {
return options.has(poolSizeOpt);
}
public int getPoolSize() {
return poolSizeOpt.value(options);
}
public boolean report() {
//The non option mode is for comparability support with previous versions
return options.has(reportAction) || hasNonOption("report");
}
public boolean generate() {
return options.has(generateAction) || hasNonOption("generate");
}
public boolean populate() {
return options.has(populateAction) || hasNonOption("populate");
}
public boolean extract() {
return options.has(extractAction) || hasNonOption("extract");
}
public OptionSpec getDataFileSpecOpt() {
return dataFileSpecOpt;
}
public OptionSpec getIndexDirSpecOpt() {
return indexDirSpecOpt;
}
public OptionSpec getStoreDirSpecOpt() {
return storeDirSpecOpt;
}
private boolean hasNonOption(String name) {
return options.nonOptionArguments().contains(name);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy