All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.compress.archivers.tar.TarUtils Maven / Gradle / Ivy

Go to download

Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.

There is a newer version: 1.27.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.archivers.tar;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.UncheckedIOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.commons.compress.archivers.zip.ZipEncoding;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.compress.utils.IOUtils;

/**
 * This class provides static utility methods to work with byte streams.
 *
 * @Immutable
 */
// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
public class TarUtils {

    private static final int BYTE_MASK = 255;

    static final ZipEncoding DEFAULT_ENCODING =
        ZipEncodingHelper.getZipEncoding(null);

    /**
     * Encapsulates the algorithms used up to Commons Compress 1.3 as
     * ZipEncoding.
     */
    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
            @Override
            public boolean canEncode(final String name) { return true; }

            @Override
            public String decode(final byte[] buffer) {
                final int length = buffer.length;
                final StringBuilder result = new StringBuilder(length);

                for (final byte b : buffer) {
                    if (b == 0) { // Trailing null
                        break;
                    }
                    result.append((char) (b & 0xFF)); // Allow for sign-extension
                }

                return result.toString();
            }

            @Override
            public ByteBuffer encode(final String name) {
                final int length = name.length();
                final byte[] buf = new byte[length];

                // copy until end of input or output is reached.
                for (int i = 0; i < length; ++i) {
                    buf[i] = (byte) name.charAt(i);
                }
                return ByteBuffer.wrap(buf);
            }
        };

    /**
     * Compute the checksum of a tar entry header.
     *
     * @param buf The tar entry's header buffer.
     * @return The computed checksum.
     */
    public static long computeCheckSum(final byte[] buf) {
        long sum = 0;

        for (final byte element : buf) {
            sum += BYTE_MASK & element;
        }

        return sum;
    }

    // Helper method to generate the exception message
    private static String exceptionMessage(final byte[] buffer, final int offset,
            final int length, final int current, final byte currentByte) {
        // default charset is good enough for an exception message,
        //
        // the alternative was to modify parseOctal and
        // parseOctalOrBinary to receive the ZipEncoding of the
        // archive (deprecating the existing public methods, of
        // course) and dealing with the fact that ZipEncoding#decode
        // can throw an IOException which parseOctal* doesn't declare
        String string = new String(buffer, offset, length, Charset.defaultCharset());

        string = string.replace("\0", "{NUL}"); // Replace NULs to allow string to be printed
        return "Invalid byte " + currentByte + " at offset " + (current - offset) + " in '" + string + "' len=" + length;
    }

    private static void formatBigIntegerBinary(final long value, final byte[] buf,
                                               final int offset,
                                               final int length,
                                               final boolean negative) {
        final BigInteger val = BigInteger.valueOf(value);
        final byte[] b = val.toByteArray();
        final int len = b.length;
        if (len > length - 1) {
            throw new IllegalArgumentException("Value " + value +
                " is too large for " + length + " byte field.");
        }
        final int off = offset + length - len;
        System.arraycopy(b, 0, buf, off, len);
        final byte fill = (byte) (negative ? 0xff : 0);
        for (int i = offset + 1; i < off; i++) {
            buf[i] = fill;
        }
    }

    /**
     * Writes an octal value into a buffer.
     *
     * Uses {@link #formatUnsignedOctalString} to format
     * the value as an octal string with leading zeros.
     * The converted number is followed by NUL and then space.
     *
     * @param value The value to convert
     * @param buf The destination buffer
     * @param offset The starting offset into the buffer.
     * @param length The size of the buffer.
     * @return The updated value of offset, i.e. offset+length
     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
     */
    public static int formatCheckSumOctalBytes(final long value, final byte[] buf, final int offset, final int length) {

        int idx=length-2; // for NUL and space
        formatUnsignedOctalString(value, buf, offset, idx);

        buf[offset + idx++]   = 0; // Trailing null
        buf[offset + idx]     = (byte) ' '; // Trailing space

        return offset + length;
    }

    private static void formatLongBinary(final long value, final byte[] buf,
                                         final int offset, final int length,
                                         final boolean negative) {
        final int bits = (length - 1) * 8;
        final long max = 1L << bits;
        long val = Math.abs(value); // Long.MIN_VALUE stays Long.MIN_VALUE
        if (val < 0 || val >= max) {
            throw new IllegalArgumentException("Value " + value +
                " is too large for " + length + " byte field.");
        }
        if (negative) {
            val ^= max - 1;
            val++;
            val |= 0xffL << bits;
        }
        for (int i = offset + length - 1; i >= offset; i--) {
            buf[i] = (byte) val;
            val >>= 8;
        }
    }

    /**
     * Write an octal long integer into a buffer.
     *
     * Uses {@link #formatUnsignedOctalString} to format
     * the value as an octal string with leading zeros.
     * The converted number is followed by a space.
     *
     * @param value The value to write as octal
     * @param buf The destinationbuffer.
     * @param offset The starting offset into the buffer.
     * @param length The length of the buffer
     * @return The updated offset
     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
     */
    public static int formatLongOctalBytes(final long value, final byte[] buf, final int offset, final int length) {

        final int idx=length-1; // For space

        formatUnsignedOctalString(value, buf, offset, idx);
        buf[offset + idx] = (byte) ' '; // Trailing space

        return offset + length;
    }

    /**
     * Write a long integer into a buffer as an octal string if this
     * will fit, or as a binary number otherwise.
     *
     * Uses {@link #formatUnsignedOctalString} to format
     * the value as an octal string with leading zeros.
     * The converted number is followed by a space.
     *
     * @param value The value to write into the buffer.
     * @param buf The destination buffer.
     * @param offset The starting offset into the buffer.
     * @param length The length of the buffer.
     * @return The updated offset.
     * @throws IllegalArgumentException if the value (and trailer)
     * will not fit in the buffer.
     * @since 1.4
     */
    public static int formatLongOctalOrBinaryBytes(
        final long value, final byte[] buf, final int offset, final int length) {

        // Check whether we are dealing with UID/GID or SIZE field
        final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE;

        final boolean negative = value < 0;
        if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars
            return formatLongOctalBytes(value, buf, offset, length);
        }

        if (length < 9) {
            formatLongBinary(value, buf, offset, length, negative);
        } else {
            formatBigIntegerBinary(value, buf, offset, length, negative);
        }

        buf[offset] = (byte) (negative ? 0xff : 0x80);
        return offset + length;
    }

    /**
     * Copy a name into a buffer.
     * Copies characters from the name into the buffer
     * starting at the specified offset.
     * If the buffer is longer than the name, the buffer
     * is filled with trailing NULs.
     * If the name is longer than the buffer,
     * the output is truncated.
     *
     * @param name The header name from which to copy the characters.
     * @param buf The buffer where the name is to be stored.
     * @param offset The starting offset into the buffer
     * @param length The maximum number of header bytes to copy.
     * @return The updated offset, i.e. offset + length
     */
    public static int formatNameBytes(final String name, final byte[] buf, final int offset, final int length) {
        try {
            return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING);
        } catch (final IOException ex) { // NOSONAR
            try {
                return formatNameBytes(name, buf, offset, length,
                                       FALLBACK_ENCODING);
            } catch (final IOException ex2) {
                // impossible
                throw new UncheckedIOException(ex2); //NOSONAR
            }
        }
    }

    /**
     * Copy a name into a buffer.
     * Copies characters from the name into the buffer
     * starting at the specified offset.
     * If the buffer is longer than the name, the buffer
     * is filled with trailing NULs.
     * If the name is longer than the buffer,
     * the output is truncated.
     *
     * @param name The header name from which to copy the characters.
     * @param buf The buffer where the name is to be stored.
     * @param offset The starting offset into the buffer
     * @param length The maximum number of header bytes to copy.
     * @param encoding name of the encoding to use for file names
     * @since 1.4
     * @return The updated offset, i.e. offset + length
     * @throws IOException on error
     */
    public static int formatNameBytes(final String name, final byte[] buf, final int offset,
                                      final int length,
                                      final ZipEncoding encoding)
        throws IOException {
        int len = name.length();
        ByteBuffer b = encoding.encode(name);
        while (b.limit() > length && len > 0) {
            b = encoding.encode(name.substring(0, --len));
        }
        final int limit = b.limit() - b.position();
        System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit);

        // Pad any remaining output bytes with NUL
        for (int i = limit; i < length; ++i) {
            buf[offset + i] = 0;
        }

        return offset + length;
    }

    /**
     * Write an octal integer into a buffer.
     *
     * Uses {@link #formatUnsignedOctalString} to format
     * the value as an octal string with leading zeros.
     * The converted number is followed by space and NUL
     *
     * @param value The value to write
     * @param buf The buffer to receive the output
     * @param offset The starting offset into the buffer
     * @param length The size of the output buffer
     * @return The updated offset, i.e. offset+length
     * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer
     */
    public static int formatOctalBytes(final long value, final byte[] buf, final int offset, final int length) {

        int idx=length-2; // For space and trailing null
        formatUnsignedOctalString(value, buf, offset, idx);

        buf[offset + idx++] = (byte) ' '; // Trailing space
        buf[offset + idx]   = 0; // Trailing null

        return offset + length;
    }

    /**
     * Fill buffer with unsigned octal number, padded with leading zeroes.
     *
     * @param value number to convert to octal - treated as unsigned
     * @param buffer destination buffer
     * @param offset starting offset in buffer
     * @param length length of buffer to fill
     * @throws IllegalArgumentException if the value will not fit in the buffer
     */
    public static void formatUnsignedOctalString(final long value, final byte[] buffer,
            final int offset, final int length) {
        int remaining = length;
        remaining--;
        if (value == 0) {
            buffer[offset + remaining--] = (byte) '0';
        } else {
            long val = value;
            for (; remaining >= 0 && val != 0; --remaining) {
                // CheckStyle:MagicNumber OFF
                buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7));
                val = val >>> 3;
                // CheckStyle:MagicNumber ON
            }
            if (val != 0){
                throw new IllegalArgumentException
                (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length);
            }
        }

        for (; remaining >= 0; --remaining) { // leading zeros
            buffer[offset + remaining] = (byte) '0';
        }
    }

    private static long parseBinaryBigInteger(final byte[] buffer,
                                              final int offset,
                                              final int length,
                                              final boolean negative) {
        final byte[] remainder = new byte[length - 1];
        System.arraycopy(buffer, offset + 1, remainder, 0, length - 1);
        BigInteger val = new BigInteger(remainder);
        if (negative) {
            // 2's complement
            val = val.add(BigInteger.valueOf(-1)).not();
        }
        if (val.bitLength() > 63) {
            throw new IllegalArgumentException("At offset " + offset + ", "
                                               + length + " byte binary number"
                                               + " exceeds maximum signed long"
                                               + " value");
        }
        return negative ? -val.longValue() : val.longValue();
    }

    private static long parseBinaryLong(final byte[] buffer, final int offset,
                                        final int length,
                                        final boolean negative) {
        if (length >= 9) {
            throw new IllegalArgumentException("At offset " + offset + ", "
                                               + length + " byte binary number"
                                               + " exceeds maximum signed long"
                                               + " value");
        }
        long val = 0;
        for (int i = 1; i < length; i++) {
            val = (val << 8) + (buffer[offset + i] & 0xff);
        }
        if (negative) {
            // 2's complement
            val--;
            val ^= (long) Math.pow(2.0, (length - 1) * 8.0) - 1;
        }
        return negative ? -val : val;
    }

    /**
     * Parse a boolean byte from a buffer.
     * Leading spaces and NUL are ignored.
     * The buffer may contain trailing spaces or NULs.
     *
     * @param buffer The buffer from which to parse.
     * @param offset The offset into the buffer from which to parse.
     * @return The boolean value of the bytes.
     * @throws IllegalArgumentException if an invalid byte is detected.
     */
    public static boolean parseBoolean(final byte[] buffer, final int offset) {
        return buffer[offset] == 1;
    }

    /**
     * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map
     * GNU.sparse.map
     *    Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
     *
     * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]"
     * @return unmodifiable list of sparse headers parsed from sparse map
     * @throws IOException Corrupted TAR archive.
     * @since 1.21
     */
    protected static List parseFromPAX01SparseHeaders(final String sparseMap)
        throws IOException {
        final List sparseHeaders = new ArrayList<>();
        final String[] sparseHeaderStrings = sparseMap.split(",");
        if (sparseHeaderStrings.length % 2 == 1) {
            throw new IOException("Corrupted TAR archive. Bad format in GNU.sparse.map PAX Header");
        }

        for (int i = 0; i < sparseHeaderStrings.length; i += 2) {
            long sparseOffset;
            try {
                sparseOffset = Long.parseLong(sparseHeaderStrings[i]);
            } catch (final NumberFormatException ex) {
                throw new IOException("Corrupted TAR archive."
                    + " Sparse struct offset contains a non-numeric value");
            }
            if (sparseOffset < 0) {
                throw new IOException("Corrupted TAR archive."
                    + " Sparse struct offset contains negative value");
            }
            long sparseNumbytes;
            try {
                sparseNumbytes = Long.parseLong(sparseHeaderStrings[i + 1]);
            } catch (final NumberFormatException ex) {
                throw new IOException("Corrupted TAR archive."
                    + " Sparse struct numbytes contains a non-numeric value");
            }
            if (sparseNumbytes < 0) {
                throw new IOException("Corrupted TAR archive."
                    + " Sparse struct numbytes contains negative value");
            }
            sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes));
        }

        return Collections.unmodifiableList(sparseHeaders);
    }

    /**
     * Parse an entry name from a buffer.
     * Parsing stops when a NUL is found
     * or the buffer length is reached.
     *
     * @param buffer The buffer from which to parse.
     * @param offset The offset into the buffer from which to parse.
     * @param length The maximum number of bytes to parse.
     * @return The entry name.
     */
    public static String parseName(final byte[] buffer, final int offset, final int length) {
        try {
            return parseName(buffer, offset, length, DEFAULT_ENCODING);
        } catch (final IOException ex) { // NOSONAR
            try {
                return parseName(buffer, offset, length, FALLBACK_ENCODING);
            } catch (final IOException ex2) {
                // impossible
                throw new UncheckedIOException(ex2); //NOSONAR
            }
        }
    }

    /**
     * Parse an entry name from a buffer.
     * Parsing stops when a NUL is found
     * or the buffer length is reached.
     *
     * @param buffer The buffer from which to parse.
     * @param offset The offset into the buffer from which to parse.
     * @param length The maximum number of bytes to parse.
     * @param encoding name of the encoding to use for file names
     * @since 1.4
     * @return The entry name.
     * @throws IOException on error
     */
    public static String parseName(final byte[] buffer, final int offset,
                                   final int length,
                                   final ZipEncoding encoding)
        throws IOException {

        int len = 0;
        for (int i = offset; len < length && buffer[i] != 0; i++) {
            len++;
        }
        if (len > 0) {
            final byte[] b = new byte[len];
            System.arraycopy(buffer, offset, b, 0, len);
            return encoding.decode(b);
        }
        return "";
    }

    /**
     * Parse an octal string from a buffer.
     *
     * 

Leading spaces are ignored. * The buffer must contain a trailing space or NUL, * and may contain an additional trailing space or NUL.

* *

The input buffer is allowed to contain all NULs, * in which case the method returns 0L * (this allows for missing fields).

* *

To work-around some tar implementations that insert a * leading NUL this method returns 0 if it detects a leading NUL * since Commons Compress 1.4.

* * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse - must be at least 2 bytes. * @return The long value of the octal string. * @throws IllegalArgumentException if the trailing space/NUL is missing or if an invalid byte is detected. */ public static long parseOctal(final byte[] buffer, final int offset, final int length) { long result = 0; int end = offset + length; int start = offset; if (length < 2) { throw new IllegalArgumentException("Length " + length + " must be at least 2"); } if (buffer[start] == 0) { return 0L; } // Skip leading spaces while (start < end) { if (buffer[start] != ' ') { break; } start++; } // Trim all trailing NULs and spaces. // The ustar and POSIX tar specs require a trailing NUL or // space but some implementations use the extra digit for big // sizes/uids/gids ... byte trailer = buffer[end - 1]; while (start < end && (trailer == 0 || trailer == ' ')) { end--; trailer = buffer[end - 1]; } for (; start < end; start++) { final byte currentByte = buffer[start]; // CheckStyle:MagicNumber OFF if (currentByte < '0' || currentByte > '7') { throw new IllegalArgumentException(exceptionMessage(buffer, offset, length, start, currentByte)); } result = (result << 3) + (currentByte - '0'); // convert from ASCII // CheckStyle:MagicNumber ON } return result; } /** * Compute the value contained in a byte buffer. If the most * significant bit of the first byte in the buffer is set, this * bit is ignored and the rest of the buffer is interpreted as a * binary number. Otherwise, the buffer is interpreted as an * octal number as per the parseOctal function above. * * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse. * @return The long value of the octal or binary string. * @throws IllegalArgumentException if the trailing space/NUL is * missing or an invalid byte is detected in an octal number, or * if a binary number would exceed the size of a signed long * 64-bit integer. * @since 1.4 */ public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { if ((buffer[offset] & 0x80) == 0) { return parseOctal(buffer, offset, length); } final boolean negative = buffer[offset] == (byte) 0xff; if (length < 9) { return parseBinaryLong(buffer, offset, length, negative); } return parseBinaryBigInteger(buffer, offset, length, negative); } /** * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map * GNU.sparse.map * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" * *

Will internally invoke {@link #parseFromPAX01SparseHeaders} and map IOExceptions to a RzuntimeException, You * should use {@link #parseFromPAX01SparseHeaders} directly instead. * * @param sparseMap the sparse map string consisting of comma-separated values "offset,size[,offset-1,size-1...]" * @return sparse headers parsed from sparse map * @deprecated use #parseFromPAX01SparseHeaders instead */ @Deprecated protected static List parsePAX01SparseHeaders(final String sparseMap) { try { return parseFromPAX01SparseHeaders(sparseMap); } catch (final IOException ex) { throw new UncheckedIOException(ex.getMessage(), ex); } } /** * For PAX Format 1.X: * The sparse map itself is stored in the file data block, preceding the actual file data. * It consists of a series of decimal numbers delimited by newlines. The map is padded with nulls to the nearest block boundary. * The first number gives the number of entries in the map. Following are map entries, each one consisting of two numbers * giving the offset and size of the data block it describes. * @param inputStream parsing source. * @param recordSize The size the TAR header * @return sparse headers * @throws IOException if an I/O error occurs. */ protected static List parsePAX1XSparseHeaders(final InputStream inputStream, final int recordSize) throws IOException { // for 1.X PAX Headers final List sparseHeaders = new ArrayList<>(); long bytesRead = 0; long[] readResult = readLineOfNumberForPax1X(inputStream); long sparseHeadersCount = readResult[0]; if (sparseHeadersCount < 0) { // overflow while reading number? throw new IOException("Corrupted TAR archive. Negative value in sparse headers block"); } bytesRead += readResult[1]; while (sparseHeadersCount-- > 0) { readResult = readLineOfNumberForPax1X(inputStream); final long sparseOffset = readResult[0]; if (sparseOffset < 0) { throw new IOException("Corrupted TAR archive." + " Sparse header block offset contains negative value"); } bytesRead += readResult[1]; readResult = readLineOfNumberForPax1X(inputStream); final long sparseNumbytes = readResult[0]; if (sparseNumbytes < 0) { throw new IOException("Corrupted TAR archive." + " Sparse header block numbytes contains negative value"); } bytesRead += readResult[1]; sparseHeaders.add(new TarArchiveStructSparse(sparseOffset, sparseNumbytes)); } // skip the rest of this record data final long bytesToSkip = recordSize - bytesRead % recordSize; IOUtils.skip(inputStream, bytesToSkip); return sparseHeaders; } /** * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) * may appear multi times, and they look like: * * GNU.sparse.size=size * GNU.sparse.numblocks=numblocks * repeat numblocks times * GNU.sparse.offset=offset * GNU.sparse.numbytes=numbytes * end repeat * * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map * * GNU.sparse.map * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" * * @param inputStream input stream to read keys and values * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, * the sparse headers need to be stored in an array, not a map * @param globalPaxHeaders global PAX headers of the tar archive * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. * @throws IOException if an I/O error occurs. * @deprecated use the four-arg version instead */ @Deprecated protected static Map parsePaxHeaders(final InputStream inputStream, final List sparseHeaders, final Map globalPaxHeaders) throws IOException { return parsePaxHeaders(inputStream, sparseHeaders, globalPaxHeaders, -1); } /** * For PAX Format 0.0, the sparse headers(GNU.sparse.offset and GNU.sparse.numbytes) * may appear multi times, and they look like: * * GNU.sparse.size=size * GNU.sparse.numblocks=numblocks * repeat numblocks times * GNU.sparse.offset=offset * GNU.sparse.numbytes=numbytes * end repeat * * For PAX Format 0.1, the sparse headers are stored in a single variable : GNU.sparse.map * * GNU.sparse.map * Map of non-null data chunks. It is a string consisting of comma-separated values "offset,size[,offset-1,size-1...]" * * @param inputStream input stream to read keys and values * @param sparseHeaders used in PAX Format 0.0 & 0.1, as it may appear multiple times, * the sparse headers need to be stored in an array, not a map * @param globalPaxHeaders global PAX headers of the tar archive * @param headerSize total size of the PAX header, will be ignored if negative * @return map of PAX headers values found inside the current (local or global) PAX headers tar entry. * @throws IOException if an I/O error occurs. * @since 1.21 */ protected static Map parsePaxHeaders(final InputStream inputStream, final List sparseHeaders, final Map globalPaxHeaders, final long headerSize) throws IOException { final Map headers = new HashMap<>(globalPaxHeaders); Long offset = null; // Format is "length keyword=value\n"; int totalRead = 0; while(true) { // get length int ch; int len = 0; int read = 0; while((ch = inputStream.read()) != -1) { read++; totalRead++; if (ch == '\n') { // blank line in header break; } if (ch == ' '){ // End of length string // Get keyword final ByteArrayOutputStream coll = new ByteArrayOutputStream(); while((ch = inputStream.read()) != -1) { read++; totalRead++; if (totalRead < 0 || (headerSize >= 0 && totalRead >= headerSize)) { break; } if (ch == '='){ // end of keyword final String keyword = coll.toString(CharsetNames.UTF_8); // Get rest of entry final int restLen = len - read; if (restLen <= 1) { // only NL headers.remove(keyword); } else if (headerSize >= 0 && restLen > headerSize - totalRead) { throw new IOException("Paxheader value size " + restLen + " exceeds size of header record"); } else { final byte[] rest = IOUtils.readRange(inputStream, restLen); final int got = rest.length; if (got != restLen) { throw new IOException("Failed to read " + "Paxheader. Expected " + restLen + " bytes, read " + got); } totalRead += restLen; // Drop trailing NL if (rest[restLen - 1] != '\n') { throw new IOException("Failed to read Paxheader." + "Value should end with a newline"); } final String value = new String(rest, 0, restLen - 1, StandardCharsets.UTF_8); headers.put(keyword, value); // for 0.0 PAX Headers if (keyword.equals(TarGnuSparseKeys.OFFSET)) { if (offset != null) { // previous GNU.sparse.offset header but no numBytes sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); } try { offset = Long.valueOf(value); } catch (final NumberFormatException ex) { throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains a non-numeric value"); } if (offset < 0) { throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " contains negative value"); } } // for 0.0 PAX Headers if (keyword.equals(TarGnuSparseKeys.NUMBYTES)) { if (offset == null) { throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.OFFSET + " is expected before GNU.sparse.numbytes shows up."); } long numbytes; try { numbytes = Long.parseLong(value); } catch (final NumberFormatException ex) { throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains a non-numeric value."); } if (numbytes < 0) { throw new IOException("Failed to read Paxheader." + TarGnuSparseKeys.NUMBYTES + " contains negative value"); } sparseHeaders.add(new TarArchiveStructSparse(offset, numbytes)); offset = null; } } break; } coll.write((byte) ch); } break; // Processed single header } // COMPRESS-530 : throw if we encounter a non-number while reading length if (ch < '0' || ch > '9') { throw new IOException("Failed to read Paxheader. Encountered a non-number while reading length"); } len *= 10; len += ch - '0'; } if (ch == -1){ // EOF break; } } if (offset != null) { // offset but no numBytes sparseHeaders.add(new TarArchiveStructSparse(offset, 0)); } return headers; } /** * Parses the content of a PAX 1.0 sparse block. * @since 1.20 * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @return a parsed sparse struct */ public static TarArchiveStructSparse parseSparse(final byte[] buffer, final int offset) { final long sparseOffset = parseOctalOrBinary(buffer, offset, TarConstants.SPARSE_OFFSET_LEN); final long sparseNumbytes = parseOctalOrBinary(buffer, offset + TarConstants.SPARSE_OFFSET_LEN, TarConstants.SPARSE_NUMBYTES_LEN); return new TarArchiveStructSparse(sparseOffset, sparseNumbytes); } /** * For 1.X PAX Format, the sparse headers are stored in the file data block, preceding the actual file data. * It consists of a series of decimal numbers delimited by newlines. * * @param inputStream the input stream of the tar file * @return the decimal number delimited by '\n', and the bytes read from input stream * @throws IOException */ private static long[] readLineOfNumberForPax1X(final InputStream inputStream) throws IOException { int number; long result = 0; long bytesRead = 0; while ((number = inputStream.read()) != '\n') { bytesRead += 1; if (number == -1) { throw new IOException("Unexpected EOF when reading parse information of 1.X PAX format"); } if (number < '0' || number > '9') { throw new IOException("Corrupted TAR archive. Non-numeric value in sparse headers block"); } result = result * 10 + (number - '0'); } bytesRead += 1; return new long[]{result, bytesRead}; } /** * @since 1.21 */ static List readSparseStructs(final byte[] buffer, final int offset, final int entries) throws IOException { final List sparseHeaders = new ArrayList<>(); for (int i = 0; i < entries; i++) { try { final TarArchiveStructSparse sparseHeader = parseSparse(buffer, offset + i * (TarConstants.SPARSE_OFFSET_LEN + TarConstants.SPARSE_NUMBYTES_LEN)); if (sparseHeader.getOffset() < 0) { throw new IOException("Corrupted TAR archive, sparse entry with negative offset"); } if (sparseHeader.getNumbytes() < 0) { throw new IOException("Corrupted TAR archive, sparse entry with negative numbytes"); } sparseHeaders.add(sparseHeader); } catch (final IllegalArgumentException ex) { // thrown internally by parseOctalOrBinary throw new IOException("Corrupted TAR archive, sparse entry is invalid", ex); } } return Collections.unmodifiableList(sparseHeaders); } /** * Wikipedia says: *

* The checksum is calculated by taking the sum of the unsigned byte values * of the header block with the eight checksum bytes taken to be ascii * spaces (decimal value 32). It is stored as a six digit octal number with * leading zeroes followed by a NUL and then a space. Various * implementations do not adhere to this format. For better compatibility, * ignore leading and trailing whitespace, and get the first six digits. In * addition, some historic tar implementations treated bytes as signed. * Implementations typically calculate the checksum both ways, and treat it * as good if either the signed or unsigned sum matches the included * checksum. *
*

* The return value of this method should be treated as a best-effort * heuristic rather than an absolute and final truth. The checksum * verification logic may well evolve over time as more special cases * are encountered. * * @param header tar header * @return whether the checksum is reasonably good * @see COMPRESS-191 * @since 1.5 */ public static boolean verifyCheckSum(final byte[] header) { final long storedSum = parseOctal(header, TarConstants.CHKSUM_OFFSET, TarConstants.CHKSUMLEN); long unsignedSum = 0; long signedSum = 0; for (int i = 0; i < header.length; i++) { byte b = header[i]; if (TarConstants.CHKSUM_OFFSET <= i && i < TarConstants.CHKSUM_OFFSET + TarConstants.CHKSUMLEN) { b = ' '; } unsignedSum += 0xff & b; signedSum += b; } return storedSum == unsignedSum || storedSum == signedSum; } /** Private constructor to prevent instantiation of this utility class. */ private TarUtils(){ } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy