All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.fork.ForkParser Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.fork;

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.Queue;
import java.util.Set;

import org.apache.tika.exception.TikaException;
import org.apache.tika.metadata.Metadata;
import org.apache.tika.mime.MediaType;
import org.apache.tika.parser.AbstractParser;
import org.apache.tika.parser.AutoDetectParser;
import org.apache.tika.parser.ParseContext;
import org.apache.tika.parser.Parser;
import org.apache.tika.sax.TeeContentHandler;
import org.xml.sax.ContentHandler;
import org.xml.sax.SAXException;

public class ForkParser extends AbstractParser {

    /** Serial version UID */
    private static final long serialVersionUID = -4962742892274663950L;

    private final ClassLoader loader;

    private final Parser parser;

    /** Java command line */
    private List java = Arrays.asList("java", "-Xmx32m");

    /** Process pool size */
    private int poolSize = 5;

    private int currentlyInUse = 0;

    private final Queue pool =
        new LinkedList();

    /**
     * @param loader The ClassLoader to use 
     * @param parser the parser to delegate to. This one cannot be another ForkParser
     */
    public ForkParser(ClassLoader loader, Parser parser) {
        if (parser instanceof ForkParser) {
            throw new IllegalArgumentException("The underlying parser of a ForkParser should not be a ForkParser, but a specific implementation.");
        }
        this.loader = loader;
        this.parser = parser;
    }

    public ForkParser(ClassLoader loader) {
        this(loader, new AutoDetectParser());
    }

    public ForkParser() {
        this(ForkParser.class.getClassLoader());
    }

    /**
     * Returns the size of the process pool.
     *
     * @return process pool size
     */
    public synchronized int getPoolSize() {
        return poolSize;
    }

    /**
     * Sets the size of the process pool.
     *
     * @param poolSize process pool size
     */
    public synchronized void setPoolSize(int poolSize) {
        this.poolSize = poolSize;
    }

    /**
     * Returns the command used to start the forked server process.
     *
     * @return java command line
     * @deprecated since 1.8
     * @see ForkParser#getJavaCommandAsList()
     */
    @Deprecated
    public String getJavaCommand() {
        StringBuilder sb = new StringBuilder();
        for (String part : getJavaCommandAsList()) {
            sb.append(part).append(' ');
        }
        sb.deleteCharAt(sb.length() - 1);
        return sb.toString();
    }

    /**
     * Returns the command used to start the forked server process.
     * 

* Returned list is unmodifiable. * @return java command line args */ public List getJavaCommandAsList() { return Collections.unmodifiableList(java); } /** * Sets the command used to start the forked server process. * The arguments "-jar" and "/path/to/bootstrap.jar" are * appended to the given command when starting the process. * The default setting is {"java", "-Xmx32m"}. *

* Creates a defensive copy. * @param java java command line */ public void setJavaCommand(List java) { this.java = new ArrayList(java); } /** * Sets the command used to start the forked server process. * The given command line is split on whitespace and the arguments 2 * "-jar" and "/path/to/bootstrap.jar" are appended to it when starting 2 * the process. The default setting is "java -Xmx32m". * * @param java java command line * @deprecated since 1.8 * @see ForkParser#setJavaCommand(List) */ @Deprecated public void setJavaCommand(String java) { setJavaCommand(Arrays.asList(java.split(" "))); } public Set getSupportedTypes(ParseContext context) { return parser.getSupportedTypes(context); } public void parse( InputStream stream, ContentHandler handler, Metadata metadata, ParseContext context) throws IOException, SAXException, TikaException { if (stream == null) { throw new NullPointerException("null stream"); } Throwable t; boolean alive = false; ForkClient client = acquireClient(); try { ContentHandler tee = new TeeContentHandler( handler, new MetadataContentHandler(metadata)); t = client.call("parse", stream, tee, metadata, context); alive = true; } catch (TikaException te) { // Problem occurred on our side alive = true; throw te; } catch (IOException e) { // Problem occurred on the other side throw new TikaException( "Failed to communicate with a forked parser process." + " The process has most likely crashed due to some error" + " like running out of memory. A new process will be" + " started for the next parsing request.", e); } finally { releaseClient(client, alive); } if (t instanceof IOException) { throw (IOException) t; } else if (t instanceof SAXException) { throw (SAXException) t; } else if (t instanceof TikaException) { throw (TikaException) t; } else if (t != null) { throw new TikaException( "Unexpected error in forked server process", t); } } public synchronized void close() { for (ForkClient client : pool) { client.close(); } pool.clear(); poolSize = 0; } private synchronized ForkClient acquireClient() throws IOException, TikaException { while (true) { ForkClient client = pool.poll(); // Create a new process if there's room in the pool if (client == null && currentlyInUse < poolSize) { client = new ForkClient(loader, parser, java); } // Ping the process, and get rid of it if it's inactive if (client != null && !client.ping()) { client.close(); client = null; } if (client != null) { currentlyInUse++; return client; } else if (currentlyInUse >= poolSize) { try { wait(); } catch (InterruptedException e) { throw new TikaException( "Interrupted while waiting for a fork parser", e); } } } } private synchronized void releaseClient(ForkClient client, boolean alive) { currentlyInUse--; if (currentlyInUse + pool.size() < poolSize && alive) { pool.offer(client); notifyAll(); } else { client.close(); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy