All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.batch.builders.AppParserFactoryBuilder Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.tika.batch.builders;

import java.util.Locale;
import java.util.Map;

import org.apache.tika.batch.DigestingAutoDetectParserFactory;
import org.apache.tika.batch.ParserFactory;
import org.apache.tika.parser.DigestingParser;
import org.apache.tika.parser.utils.CommonsDigester;
import org.apache.tika.util.ClassLoaderUtil;
import org.apache.tika.util.XMLDOMUtil;
import org.w3c.dom.Node;

public class AppParserFactoryBuilder implements IParserFactoryBuilder {

    @Override
    public ParserFactory build(Node node, Map runtimeAttrs) {
        Map localAttrs = XMLDOMUtil.mapifyAttrs(node, runtimeAttrs);
        String className = localAttrs.get("class");
        ParserFactory pf = ClassLoaderUtil.buildClass(ParserFactory.class, className);

        if (localAttrs.containsKey("parseRecursively")) {
            String bString = localAttrs.get("parseRecursively").toLowerCase(Locale.ENGLISH);
            if (bString.equals("true")) {
                pf.setParseRecursively(true);
            } else if (bString.equals("false")) {
                pf.setParseRecursively(false);
            } else {
                throw new RuntimeException("parseRecursively must have value of \"true\" or \"false\": "+
                        bString);
            }
        }
        if (pf instanceof DigestingAutoDetectParserFactory) {
            DigestingParser.Digester d = buildDigester(localAttrs);
            ((DigestingAutoDetectParserFactory)pf).setDigester(d);
        }
        return pf;
    }

    private DigestingParser.Digester buildDigester(Map localAttrs) {
        String digestString = localAttrs.get("digest");
        CommonsDigester.DigestAlgorithm[] algos = CommonsDigester.parse(digestString);

        String readLimitString = localAttrs.get("digestMarkLimit");
        if (readLimitString == null) {
            throw new IllegalArgumentException("Must specify \"digestMarkLimit\" for "+
            "the DigestingAutoDetectParserFactory");
        }
        int readLimit = -1;

        try {
            readLimit = Integer.parseInt(readLimitString);
        } catch (NumberFormatException e) {
            throw new IllegalArgumentException("Parameter \"digestMarkLimit\" must be a parseable int: "+
            readLimitString);
        }
        return new CommonsDigester(readLimit, algos);
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy