All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.batch.fs.FSBatchProcessCLI Maven / Gradle / Ivy

There is a newer version: 3.0.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.batch.fs;

import java.io.IOException;
import java.net.URL;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashMap;
import java.util.Map;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Future;

import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.DefaultParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.io.IOUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.slf4j.MarkerFactory;

import org.apache.tika.batch.BatchProcess;
import org.apache.tika.batch.BatchProcessDriverCLI;
import org.apache.tika.batch.ParallelFileProcessingResult;
import org.apache.tika.batch.builders.BatchProcessBuilder;
import org.apache.tika.batch.builders.CommandLineParserBuilder;
import org.apache.tika.io.TikaInputStream;

public class FSBatchProcessCLI {
    private static final Logger LOG = LoggerFactory.getLogger(FSBatchProcessCLI.class);
    public static String FINISHED_STRING = "Main thread in TikaFSBatchCLI has finished processing.";
    private final Options options;

    public FSBatchProcessCLI(String[] args) throws IOException {
        TikaInputStream configIs = null;
        try {
            configIs = getConfigInputStream(args, true);
            CommandLineParserBuilder builder = new CommandLineParserBuilder();
            options = builder.build(configIs);
        } finally {
            IOUtils.closeQuietly(configIs);
        }
    }

    public static void main(String[] args) throws Exception {
        try {
            FSBatchProcessCLI cli = new FSBatchProcessCLI(args);
            cli.execute(args);
        } catch (Throwable t) {
            t.printStackTrace();
            LOG.error(MarkerFactory.getMarker("FATAL"),
                    "Fatal exception from FSBatchProcessCLI: {}", t.getMessage(), t);
            System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
        }
    }

    public void usage() {
        HelpFormatter helpFormatter = new HelpFormatter();
        helpFormatter.printHelp("tika filesystem batch", options);
    }

    private TikaInputStream getConfigInputStream(String[] args, boolean logDefault)
            throws IOException {
        TikaInputStream is = null;
        Path batchConfigFile = getConfigFile(args);
        if (batchConfigFile != null) {
            //this will throw IOException if it can't find a specified config file
            //better to throw an exception than silently back off to default.
            is = TikaInputStream.get(batchConfigFile);
        } else {
            if (logDefault) {
                LOG.info("No config file set via -bc, relying on tika-app-batch-config.xml " +
                        "or default-tika-batch-config.xml");
            }
            //test to see if there's a tika-app-batch-config.xml on the path
            URL config = FSBatchProcessCLI.class.getResource("/tika-app-batch-config.xml");
            if (config != null) {
                is = TikaInputStream.get(FSBatchProcessCLI.class
                        .getResourceAsStream("/tika-app-batch-config.xml"));
            } else {
                is = TikaInputStream.get(FSBatchProcessCLI.class
                        .getResourceAsStream("default-tika-batch-config.xml"));
            }
        }
        return is;
    }

    private void execute(String[] args) throws Exception {

        CommandLineParser cliParser = new DefaultParser();
        CommandLine line = cliParser.parse(options, args);

        if (line.hasOption("help")) {
            usage();
            System.exit(BatchProcessDriverCLI.PROCESS_NO_RESTART_EXIT_CODE);
        }

        Map mapArgs = new HashMap<>();
        for (Option option : line.getOptions()) {
            String v = option.getValue();
            if (v == null || v.equals("")) {
                v = "true";
            }
            mapArgs.put(option.getOpt(), v);
        }

        BatchProcessBuilder b = new BatchProcessBuilder();
        TikaInputStream is = null;
        BatchProcess process = null;
        try {
            is = getConfigInputStream(args, false);
            process = b.build(is, mapArgs);
        } finally {
            IOUtils.closeQuietly(is);
        }
        final Thread mainThread = Thread.currentThread();


        ExecutorService executor = Executors.newFixedThreadPool(1);
        Future futureResult = executor.submit(process);

        ParallelFileProcessingResult result = futureResult.get();
        System.out.println(FINISHED_STRING);
        System.out.println("\n");
        System.out.println(result.toString());
        System.exit(result.getExitStatus());
    }

    private Path getConfigFile(String[] args) {
        Path configFile = null;
        for (int i = 0; i < args.length; i++) {
            if (args[i].equals("-bc") || args[i].equals("-batch-config")) {
                if (i < args.length - 1) {
                    configFile = Paths.get(args[i + 1]);
                }
            }
        }
        return configFile;
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy