All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.tika.fork.ForkClient Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.tika.fork;

import static java.nio.charset.StandardCharsets.UTF_8;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.NotSerializableException;
import java.util.ArrayList;
import java.util.List;
import java.util.jar.JarEntry;
import java.util.jar.JarOutputStream;
import java.util.zip.ZipEntry;

import org.apache.tika.exception.TikaException;
import org.apache.tika.io.IOUtils;
import org.xml.sax.ContentHandler;

class ForkClient {

    private final List resources = new ArrayList();

    private final ClassLoader loader;

    private final File jar;

    private final Process process;

    private final DataOutputStream output;

    private final DataInputStream input;

    private final InputStream error;

    public ForkClient(ClassLoader loader, Object object, List java)
            throws IOException, TikaException {
        boolean ok = false;
        try {
            this.loader = loader;
            this.jar = createBootstrapJar();

            ProcessBuilder builder = new ProcessBuilder();
            List command = new ArrayList();
            command.addAll(java);
            command.add("-jar");
            command.add(jar.getPath());
            builder.command(command);
            this.process = builder.start();

            this.output = new DataOutputStream(process.getOutputStream());
            this.input = new DataInputStream(process.getInputStream());
            this.error = process.getErrorStream();

            waitForStartBeacon();

            sendObject(loader, resources);
            sendObject(object, resources);

            ok = true;
        } finally {
            if (!ok) {
                close();
            }
        }
    }

    private void waitForStartBeacon() throws IOException {
        while (true) {
            consumeErrorStream();
            int type = input.read();
            if ((byte) type == ForkServer.READY) {
                consumeErrorStream();
                return;
            }
        }
    }

    public synchronized boolean ping() {
        try {
            output.writeByte(ForkServer.PING);
            output.flush();
            while (true) {
                consumeErrorStream();
                int type = input.read();
                if (type == ForkServer.PING) {
                    consumeErrorStream();
                    return true;
                } else {
                    return false;
                }
            }
        } catch (IOException e) {
            return false;
        }
    }


    public synchronized Throwable call(String method, Object... args)
            throws IOException, TikaException {
        List r = new ArrayList(resources);
        output.writeByte(ForkServer.CALL);
        output.writeUTF(method);
        for (int i = 0; i < args.length; i++) {
            sendObject(args[i], r);
        }
        return waitForResponse(r);
    }

    /**
     * Serializes the object first into an in-memory buffer and then
     * writes it to the output stream with a preceding size integer.
     *
     * @param object object to be serialized
     * @param resources list of fork resources, used when adding proxies
     * @throws IOException if the object could not be serialized
     */
    private void sendObject(Object object, List resources)
            throws IOException, TikaException {
        int n = resources.size();
        if (object instanceof InputStream) {
            resources.add(new InputStreamResource((InputStream) object));
            object = new InputStreamProxy(n);
        } else if (object instanceof ContentHandler) {
            resources.add(new ContentHandlerResource((ContentHandler) object));
            object = new ContentHandlerProxy(n);
        } else if (object instanceof ClassLoader) {
            resources.add(new ClassLoaderResource((ClassLoader) object));
            object = new ClassLoaderProxy(n);
        }

        try {
           ForkObjectInputStream.sendObject(object, output);
        } catch(NotSerializableException nse) {
           // Build a more friendly error message for this
           throw new TikaException(
                 "Unable to serialize " + object.getClass().getSimpleName() +
                 " to pass to the Forked Parser", nse);
        }

        waitForResponse(resources);
    }

    public synchronized void close() {
        try {
            if (output != null) {
                output.close();
            }
            if (input != null) {
                input.close();
            }
            if (error != null) {
                error.close();
            }
        } catch (IOException ignore) {
        }
        if (process != null) {
            process.destroy();
            try {
                //TIKA-1933
                process.waitFor();
            } catch (InterruptedException e) {

            }
        }
        if (jar != null) {
            jar.delete();
        }
    }

    private Throwable waitForResponse(List resources)
            throws IOException {
        output.flush();
        while (true) {
            consumeErrorStream();
            int type = input.read();
            if (type == -1) {
                consumeErrorStream();
                throw new IOException(
                        "Lost connection to a forked server process");
            } else if (type == ForkServer.RESOURCE) {
                ForkResource resource =
                    resources.get(input.readUnsignedByte());
                resource.process(input, output);
            } else if ((byte) type == ForkServer.ERROR) {
                try {
                    return (Throwable) ForkObjectInputStream.readObject(
                            input, loader);
                } catch (ClassNotFoundException e) {
                    throw new IOException(
                            "Unable to deserialize an exception", e);
                }
            } else {
                return null;
            }
        }
    }

    /**
     * Consumes all pending bytes from the standard error stream of the
     * forked server process, and prints them out to the standard error
     * stream of this process. This method should be called always before
     * expecting some output from the server, to prevent the server from
     * blocking due to a filled up pipe buffer of the error stream.
     *
     * @throws IOException if the error stream could not be read
     */
    private void consumeErrorStream() throws IOException {
        int n;
        while ((n = error.available()) > 0) {
            byte[] b = new byte[n];
            n = error.read(b);
            if (n > 0) {
                System.err.write(b, 0, n);
            }
        }
    }

    /**
     * Creates a temporary jar file that can be used to bootstrap the forked
     * server process. Remember to remove the file when no longer used.
     *
     * @return the created jar file
     * @throws IOException if the bootstrap archive could not be created
     */
    private static File createBootstrapJar() throws IOException {
        File file = File.createTempFile("apache-tika-fork-", ".jar");
        boolean ok = false;
        try {
            fillBootstrapJar(file);
            ok = true;
        } finally {
            if (!ok) {
                file.delete();
            }
        }
        return file;
    }

    /**
     * Fills in the jar file used to bootstrap the forked server process.
     * All the required .class files and a manifest with a
     * Main-Class entry are written into the archive.
     *
     * @param file file to hold the bootstrap archive
     * @throws IOException if the bootstrap archive could not be created
     */
    private static void fillBootstrapJar(File file) throws IOException {
        try (JarOutputStream jar =
                new JarOutputStream(new FileOutputStream(file))) {
            String manifest =
                    "Main-Class: " + ForkServer.class.getName() + "\n";
            jar.putNextEntry(new ZipEntry("META-INF/MANIFEST.MF"));
            jar.write(manifest.getBytes(UTF_8));

            Class[] bootstrap = {
                    ForkServer.class, ForkObjectInputStream.class,
                    ForkProxy.class, ClassLoaderProxy.class,
                    MemoryURLConnection.class,
                    MemoryURLStreamHandler.class,
                    MemoryURLStreamHandlerFactory.class,
                    MemoryURLStreamRecord.class
            };
            ClassLoader loader = ForkServer.class.getClassLoader();
            for (Class klass : bootstrap) {
                String path = klass.getName().replace('.', '/') + ".class";
                try (InputStream input = loader.getResourceAsStream(path)) {
                    jar.putNextEntry(new JarEntry(path));
                    IOUtils.copy(input, jar);
                }
            }
        }
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy