All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jwat.common.Payload Maven / Gradle / Ivy

/**
 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
 * and GZip files. (http://jwat.org/)
 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.jwat.common;

import java.io.BufferedInputStream;
import java.io.Closeable;
import java.io.IOException;
import java.io.InputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;

/**
 * This class makes the archived payload of an ARC/WARC record accessible
 * through a stream. The stream is fixed length so only the payload is
 * available.
 *
 * @author lbihanic, selghissassi, nicl
 */
public class Payload implements Closeable {

    /** Payload content. */
    private static final int BUFFER_SIZE = 8192;

    /** Has record been closed before. */
    protected boolean bClosed;

    /** Payload length. */
    protected long length;

    /** Base stream used to limit payload access to only the payload and
     * not any record data beyond that. Also detects unexpected EOF. */
    protected FixedLengthInputStream in_fl;

    /** Message digest object. */
    protected MessageDigest md;

    /** Digest bytes. */
    protected byte[] digest;

    /** Automatic digesting of payload input stream. */
    protected DigestInputStream in_digest;

    /** Boolean indicating no such algorithm exception under initialization. */
    protected boolean bNoSuchAlgorithmException;

    /** Payload content. */
    protected BufferedInputStream in_buffered;

    /** Pushback input stream exposed to the outside, usable by payload processors. */
    protected ByteCountingPushBackInputStream in_pb_exposed;

    /** Payload stream. */
    //protected ByteCountingPushBackInputStream in_exposed;

    /** Pushback size. */
    protected int pushback_size;

    /** Header wrapped payload, if present. */
    protected PayloadWithHeaderAbstract payloadHeaderWrapped;

    /** Handler called when this payloads stream has been fully consumed. */
    protected PayloadOnClosedHandler onClosedHandler;

    /**
     * Non public constructor.
     */
    protected Payload() {
    }

    /**
     * Creates new ArcPayload instance.
     * @param in the input stream to parse.
     * @param length payload length.
     * @param pushback_size   pushback size
     * @param digestAlgorithm digest algorithm to use on payload or null
     * @return the payload
     * @throws IOException if an i/o error occurs while initializing
     */
    public static Payload processPayload(InputStream in, long length,
            int pushback_size, String digestAlgorithm) throws IOException {
        if (in == null) {
            throw new IllegalArgumentException(
                    "The inputstream 'in' is null");
        }
        if (length < 0) {
            throw new IllegalArgumentException(
                    "The 'length' is less than zero: " + length);
        }
        if (pushback_size <= 0) {
            throw new IllegalArgumentException(
                    "The 'pushback_size' is less than or equal to zero: " +
                            pushback_size);
        }
        Payload pl = new Payload();
        pl.length = length;
        pl.pushback_size = pushback_size;
        pl.in_fl = new FixedLengthInputStream(in, length);
        /*
         * Block Digest.
         */
        if (digestAlgorithm != null) {
            try {
                pl.md = MessageDigest.getInstance(digestAlgorithm);
            } catch (NoSuchAlgorithmException e) {
                pl.bNoSuchAlgorithmException = true;
            }
        }
        if (pl.md != null) {
            pl.in_digest = new DigestInputStreamNoSkip(pl.in_fl, pl.md);
            pl.in_buffered = new BufferedInputStream(pl.in_digest, BUFFER_SIZE);
        } else {
            pl.in_buffered = new BufferedInputStream(pl.in_fl, BUFFER_SIZE);
        }
        /*
         * Ensure close() is not called on the payload stream!
         */
        pl.in_pb_exposed = new ByteCountingPushBackInputStream(pl.in_buffered, pushback_size) {
            @Override
            public void close() throws IOException {
            }
        };
        return pl;
    }

    /**
     * Set optional handler to be called when payload is closed.
     * This method should not be called by the payload consumer.
     * @param onClosedHandler on closed handler implementation
     */
    public void setOnClosedHandler(PayloadOnClosedHandler onClosedHandler) {
        this.onClosedHandler = onClosedHandler;
    }

    /**
     * Returns the calculated digest.
     * @return the calculated digest
     */
    public byte[] getDigest() {
        if (digest == null && md != null) {
            digest = md.digest();
        }
        return digest;
    }

    /**
     * Get payload total length.
     * @return payload total length
     */
    public long getTotalLength() {
        return length;
    }

    /**
     * Get the number of unavailable bytes missing due to unexpected EOF.
     * This method always returns 0 as long as the stream is open.
     * @return number of unavailable bytes missing due to unexpected EOF
     * @throws IOException if an i/o error occurs calling available method on stream
     */
    public long getUnavailable() throws IOException {
        return in_fl.available();
    }

    /**
     * Get pushback buffer size.
     * @return pushback buffer size
     */
    public int getPushbackSize() {
        return pushback_size;
    }

    /**
     * Set PayloadHeaderWrapper object in case of recognized payload content.
     * This method should not be called by the payload consumer.
     * @param payloadHeaderWrapped PayloadHeaderWrapper object
     */
    public void setPayloadHeaderWrapped(PayloadWithHeaderAbstract payloadHeaderWrapped) {
        this.payloadHeaderWrapped = payloadHeaderWrapped;
    }

    /**
     * Get the PayloadHeaderWrapper object associated with this payload.
     * @return PayloadHeaderWrapper object or null
     */
    public PayloadWithHeaderAbstract getPayloadHeaderWrapped() {
        return payloadHeaderWrapped;
    }

    /**
     * Get InputStream to read the complete payload even though
     * a http response header may have been read.
     * @return InputStream to read payload data (in)directly.
     */
    public InputStream getInputStreamComplete() {
        if (payloadHeaderWrapped != null) {
            return payloadHeaderWrapped.getInputStreamComplete();
        } else {
            return in_pb_exposed;
        }
    }

    /**
     * Get InputStream to read the payload directly from the
     * source bypassing any existing HttpResponse object.
     * A parsed http response header will not be accessible through this
     * stream.
     * @return InputStream to read payload data.
     */
    public ByteCountingPushBackInputStream getInputStream() {
        if (payloadHeaderWrapped != null) {
            return payloadHeaderWrapped.getPayloadInputStream();
        } else {
            return in_pb_exposed;
        }
    }

    /**
     * Get payload remaining length.
     * @return payload remaining length
     * @throws IOException if an i/o error occurs calling available method on stream
     */
    public long getRemaining() throws IOException {
        if (payloadHeaderWrapped != null) {
            return payloadHeaderWrapped.getPayloadInputStream().available();
        } else {
            return in_pb_exposed.available();
        }
    }

    /**
     * Check to see if the payload has been closed.
     * @return boolean indicating whether this payload is closed or not
     */
    public boolean isClosed() {
        return bClosed;
    }

    /**
     * Closes the this payload stream, skipping unread bytes in the process.
     * @throws IOException if an i/o error occurs in the closing process
     */
    public void close() throws IOException {
        if (!bClosed) {
            if (payloadHeaderWrapped != null) {
                payloadHeaderWrapped.close();
            }
            if (md != null) {
                // Skip remaining unread bytes to ensure payload is completely
                // digested. Skipping because the DigestInputStreamNoSkip
                // has been altered to read when skipping.
                while (in_digest.skip(length) > 0) {
                }
            }
            if (in_buffered != null) {
                in_buffered.close();
                in_buffered = null;
            }
            if (onClosedHandler != null) {
                onClosedHandler.payloadClosed();
                onClosedHandler = null;
            }
            bClosed = true;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy