All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.jwat.common.PayloadWithHeaderAbstract Maven / Gradle / Ivy

/**
 * Java Web Archive Toolkit - Software to read and validate ARC, WARC
 * and GZip files. (http://jwat.org/)
 * Copyright 2011-2012 Netarkivet.dk (http://netarkivet.dk/)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.jwat.common;

import java.io.ByteArrayInputStream;
import java.io.Closeable;
import java.io.FilterInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.security.DigestInputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.List;

/**
 * Abstract base class for payload types that include a header. The header
 * is parse and validated. If the header is valid the header is exposed as
 * a byte array including optionally extra data implemented as fields,
 * the rest of the payload is exposed as the complete payload of this object.
 * If on the other hand the header is not valid the input stream is left
 * as is so the complete payload can be read instead.
 * Streams are also available to read the complete payload even though the
 * header has been identified and parsed into optional extra fields.
 *
 * @author nicl
 */
public abstract class PayloadWithHeaderAbstract implements Closeable {

    /** Has record been closed before. */
    protected boolean bClosed;

    /** InputStream to read payload. */
    protected ByteCountingPushBackInputStream in_pb;

    /** Payload length. */
    protected long totalLength;

    protected String digestAlgorithm;

    /** Could the header be validated. */
    protected boolean bIsValid;

    /** Stream used to ensure we don't use more than the pushback buffer on
     *  headers and at the same time store everything read in an array. */
    protected MaxLengthRecordingInputStream in_flr;

    /** The raw header read as bytes. */
    protected byte[] header;

    /** Message digest object. */
    protected MessageDigest md;

    /** Digest bytes. */
    protected byte[] digest;

    /** Automatic digesting of payload input stream. */
    protected DigestInputStream in_digest;

    /** Boolean indicating no such algorithm exception under initialization. */
    protected boolean bNoSuchAlgorithmException;

    /** Payload stream returned to user. */
    protected InputStream in_payload;

    /** Pushback input stream exposed to the outside. */
    protected ByteCountingPushBackInputStream in_pb_exposed;

    /** Sequence of the header as a stream combined with the payload stream. */
    protected SequenceInputStream in_complete;

    /** Object size, in bytes. */
    public long payloadLength = 0L;

    /** Validation errors and warnings. */
    public Diagnostics diagnostics;

    protected void initProcess() throws IOException {
        in_flr = new MaxLengthRecordingInputStream(
                in_pb, in_pb.getPushbackSize());
        bIsValid = readHeader(in_flr, totalLength);
        if (bIsValid) {
            /*
             * Payload Digest.
             */
            if (digestAlgorithm != null) {
                try {
                    md = MessageDigest.getInstance(digestAlgorithm);
                } catch (NoSuchAlgorithmException e) {
                    bNoSuchAlgorithmException = true;
                }
            }
            if (md != null) {
                in_digest = new DigestInputStreamNoSkip(in_pb, md);
                in_payload = in_digest;
            } else {
                in_payload = in_pb;
            }
            /*
             * Ensure close() is not called on the payload stream!
             */
            in_payload = new FilterInputStream(in_payload) {
                @Override
                public void close() throws IOException {
                }
            };
            header = in_flr.getRecording();
            in_pb_exposed = new ByteCountingPushBackInputStream(in_payload, in_pb.getPushbackSize());
            in_complete = new SequenceInputStream(new ByteArrayInputStream(header), in_payload);
            in_flr = null;
        } else {
            // Undo read and leave callers input stream in original state.
            header = in_flr.getRecording();
            in_pb.unread(header);
            in_flr = null;
            bClosed = true;
        }
    }

    /**
     * Reads the header and updates the payloadLength field if the header is valid.
     * @param in the input stream to parse.
     * @param payloadLength the record length.
     * @return boolean indicating whether the header could be read
     * @throws IOException i/o exception while reading headers
     */
    protected abstract boolean readHeader(MaxLengthRecordingInputStream in,
            long payloadLength) throws IOException;

    /**
     * Get a List of all the headers found during parsing.
     * @return List of HeaderLine
     */
    public List getHeaderList() {
        throw new UnsupportedOperationException();
    }

    /**
     * Get a header line structure or null, if no header line structure is
     * stored with the given header name.
     * @param field header name
     * @return HeaderLine structure or null
     */
    public HeaderLine getHeader(String field) {
        throw new UnsupportedOperationException();
    }

    /**
     * Returns the result of the header validation.
     * @return the result of the header validation
     */
    public boolean isValid() {
        return bIsValid;
    }

    /**
     * Get the raw header as bytes.
     * @return raw header as bytes
     */
    public byte[] getHeader() {
        return header;
    }

    /**
     * Returns the MessageDigest used on payload stream.
     * @return MessageDigest used on payload stream
     */
    public byte[] getDigest() {
        if (digest == null && md != null) {
            digest = md.digest();
        }
        return digest;
    }

    /**
     * Get HTTP payload length.
     * @return HTTP payload length
     */
    public long getPayloadLength() {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return payloadLength;
    }

    /**
     * Get payload total length, header and payload.
     * @return payload total length, header and payload
     */
    public long getTotalLength() {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return totalLength;
    }

    /**
     * Get the number of unavailable bytes missing due to unexpected EOF.
     * This method always returns 0 as long as the stream is open.
     * @return number of unavailable bytes missing due to unexpected EOF
     * @throws IOException if errors occur calling available method on stream
     */
    public long getUnavailable() throws IOException {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return totalLength - in_pb.getConsumed();
    }

    /**
     * Get an InputStream containing both the header and the
     * payload.
     * @return InputStream containing both the header and the
     * payload.
     */
    public InputStream getInputStreamComplete() {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return in_complete;
    }

    /**
     * Get an InputStream containing only the payload.
     * @return InputStream containing only the payload.
     */
    public ByteCountingPushBackInputStream getPayloadInputStream() {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return in_pb_exposed;
    }

    /**
     * Get payload remaining length.
     * @return payload remaining length
     * @throws IOException if errors occur calling available method on stream
     */
    public long getRemaining() throws IOException {
        if (!bIsValid) {
            throw new IllegalStateException("HttpHeader not valid");
        }
        return totalLength - in_pb.getConsumed();
    }

    /**
     * Check to see if the header has been closed.
     * @return boolean indicating whether this header is closed or not
     */
    public boolean isClosed() {
        return bClosed;
    }

    /**
     * Closes the this payload stream, skipping unread bytes in the process.
     * @throws IOException i/o exception in closing process
     */
    public void close() throws IOException {
        if (!bClosed) {
            if (md != null) {
                // Skip remaining unread bytes to ensure payload is completely
                // digested. Skipping because the DigestInputStreamNoSkip
                // has been altered to read when skipping.
                while (in_digest.skip(totalLength) > 0) {
                }
            }
            if (in_pb != null) {
                in_pb.close();
                in_pb = null;
            }
            bClosed = true;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy