org.apache.tika.batch.fs.FSBatchProcessCLI Maven / Gradle / Ivy
The newest version!
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.batch.fs;
import java.io.IOException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MarkerFactory;
import org.apache.tika.batch.BatchProcess;
import org.apache.tika.batch.BatchProcessDriverCLI;
import org.apache.tika.batch.ParallelFileProcessingResult;
import org.apache.tika.batch.builders.BatchProcessBuilder;
import org.apache.tika.batch.builders.CommandLineParserBuilder;
import org.apache.tika.io.TikaInputStream;
public class FSBatchProcessCLI {
private static final Logger LOG = LoggerFactory.getLogger(FSBatchProcessCLI.class);
public static String FINISHED_STRING = "Main thread in TikaFSBatchCLI has finished processing.";
private final Options options;
public FSBatchProcessCLI(String[] args) throws IOException {
TikaInputStream configIs = null;
try {
configIs = getConfigInputStream(args, true);
CommandLineParserBuilder builder = new CommandLineParserBuilder();
options = builder.build(configIs);
} finally {
IOUtils.closeQuietly(configIs);
}
}
public static void main(String[] args) throws Exception {
try {
FSBatchProcessCLI cli = new FSBatchProcessCLI(args);
cli.execute(args);
} catch (Throwable t) {
t.printStackTrace();
LOG.error(MarkerFactory.getMarker("FATAL"), "Fatal exception from FSBatchProcessCLI: {}", t.getMessage(), t);
System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
}
}
public void usage() {
HelpFormatter helpFormatter = new HelpFormatter();
helpFormatter.printHelp("tika filesystem batch", options);
}
private TikaInputStream getConfigInputStream(String[] args, boolean logDefault) throws IOException {
TikaInputStream is = null;
Path batchConfigFile = getConfigFile(args);
if (batchConfigFile != null) {
//this will throw IOException if it can't find a specified config file
//better to throw an exception than silently back off to default.
is = TikaInputStream.get(batchConfigFile);
} else {
if (logDefault) {
LOG.info("No config file set via -bc, relying on tika-app-batch-config.xml " + "or default-tika-batch-config.xml");
}
//test to see if there's a tika-app-batch-config.xml on the path
URL config = FSBatchProcessCLI.class.getResource("/tika-app-batch-config.xml");
if (config != null) {
is = TikaInputStream.get(FSBatchProcessCLI.class.getResourceAsStream("/tika-app-batch-config.xml"));
} else {
is = TikaInputStream.get(FSBatchProcessCLI.class.getResourceAsStream("default-tika-batch-config.xml"));
}
}
return is;
}
private void execute(String[] args) throws Exception {
CommandLineParser cliParser = new DefaultParser();
CommandLine line = cliParser.parse(options, args);
if (line.hasOption("help")) {
usage();
System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
}
Map mapArgs = new HashMap<>();
for (Option option : line.getOptions()) {
String v = option.getValue();
if (v == null || v.equals("")) {
v = "true";
}
mapArgs.put(option.getOpt(), v);
}
BatchProcessBuilder b = new BatchProcessBuilder();
TikaInputStream is = null;
BatchProcess process = null;
try {
is = getConfigInputStream(args, false);
process = b.build(is, mapArgs);
} finally {
IOUtils.closeQuietly(is);
}
final Thread mainThread = Thread.currentThread();
ExecutorService executor = Executors.newFixedThreadPool(1);
Future futureResult = executor.submit(process);
ParallelFileProcessingResult result = futureResult.get();
System.out.println(FINISHED_STRING);
System.out.println("\n");
System.out.println(result.toString());
System.exit(result.getExitStatus());
}
private Path getConfigFile(String[] args) {
Path configFile = null;
for (int i = 0; i < args.length; i++) {
if (args[i].equals("-bc") || args[i].equals("-batch-config")) {
if (i < args.length - 1) {
configFile = Paths.get(args[i + 1]);
}
}
}
return configFile;
}
}