All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.compress.archivers.tar.TarUtils Maven / Gradle / Ivy

Go to download

Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.

There is a newer version: 1.27.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.commons.compress.archivers.tar;

import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUMLEN;
import static org.apache.commons.compress.archivers.tar.TarConstants.CHKSUM_OFFSET;

import java.io.IOException;
import java.math.BigInteger;
import java.nio.ByteBuffer;
import org.apache.commons.compress.archivers.zip.ZipEncoding;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;

/**
 * This class provides static utility methods to work with byte streams.
 *
 * @Immutable
 */
// CheckStyle:HideUtilityClassConstructorCheck OFF (bc)
public class TarUtils {

    private static final int BYTE_MASK = 255;

    static final ZipEncoding DEFAULT_ENCODING =
        ZipEncodingHelper.getZipEncoding(null);

    /**
     * Encapsulates the algorithms used up to Commons Compress 1.3 as
     * ZipEncoding.
     */
    static final ZipEncoding FALLBACK_ENCODING = new ZipEncoding() {
            public boolean canEncode(String name) { return true; }

            public ByteBuffer encode(String name) {
                final int length = name.length();
                byte[] buf = new byte[length];

                // copy until end of input or output is reached.
                for (int i = 0; i < length; ++i) {
                    buf[i] = (byte) name.charAt(i);
                }
                return ByteBuffer.wrap(buf);
            }

            public String decode(byte[] buffer) {
                final int length = buffer.length;
                StringBuilder result = new StringBuilder(length);

                for (int i = 0; i < length; ++i) {
                    byte b = buffer[i];
                    if (b == 0) { // Trailing null
                        break;
                    }
                    result.append((char) (b & 0xFF)); // Allow for sign-extension
                }

                return result.toString();
            }
        };

    /** Private constructor to prevent instantiation of this utility class. */
    private TarUtils(){
    }

    /**
     * Parse an octal string from a buffer.
     *
     * 

Leading spaces are ignored. * The buffer must contain a trailing space or NUL, * and may contain an additional trailing space or NUL.

* *

The input buffer is allowed to contain all NULs, * in which case the method returns 0L * (this allows for missing fields).

* *

To work-around some tar implementations that insert a * leading NUL this method returns 0 if it detects a leading NUL * since Commons Compress 1.4.

* * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse - must be at least 2 bytes. * @return The long value of the octal string. * @throws IllegalArgumentException if the trailing space/NUL is missing or if a invalid byte is detected. */ public static long parseOctal(final byte[] buffer, final int offset, final int length) { long result = 0; int end = offset + length; int start = offset; if (length < 2){ throw new IllegalArgumentException("Length "+length+" must be at least 2"); } if (buffer[start] == 0) { return 0L; } // Skip leading spaces while (start < end){ if (buffer[start] == ' '){ start++; } else { break; } } // Trim all trailing NULs and spaces. // The ustar and POSIX tar specs require a trailing NUL or // space but some implementations use the extra digit for big // sizes/uids/gids ... byte trailer = buffer[end - 1]; while (start < end && (trailer == 0 || trailer == ' ')) { end--; trailer = buffer[end - 1]; } if (start == end) { throw new IllegalArgumentException( exceptionMessage(buffer, offset, length, start, trailer)); } for ( ;start < end; start++) { final byte currentByte = buffer[start]; // CheckStyle:MagicNumber OFF if (currentByte < '0' || currentByte > '7'){ throw new IllegalArgumentException( exceptionMessage(buffer, offset, length, start, currentByte)); } result = (result << 3) + (currentByte - '0'); // convert from ASCII // CheckStyle:MagicNumber ON } return result; } /** * Compute the value contained in a byte buffer. If the most * significant bit of the first byte in the buffer is set, this * bit is ignored and the rest of the buffer is interpreted as a * binary number. Otherwise, the buffer is interpreted as an * octal number as per the parseOctal function above. * * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse. * @return The long value of the octal or binary string. * @throws IllegalArgumentException if the trailing space/NUL is * missing or an invalid byte is detected in an octal number, or * if a binary number would exceed the size of a signed long * 64-bit integer. * @since 1.4 */ public static long parseOctalOrBinary(final byte[] buffer, final int offset, final int length) { if ((buffer[offset] & 0x80) == 0) { return parseOctal(buffer, offset, length); } final boolean negative = buffer[offset] == (byte) 0xff; if (length < 9) { return parseBinaryLong(buffer, offset, length, negative); } return parseBinaryBigInteger(buffer, offset, length, negative); } private static long parseBinaryLong(final byte[] buffer, final int offset, final int length, final boolean negative) { if (length >= 9) { throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value"); } long val = 0; for (int i = 1; i < length; i++) { val = (val << 8) + (buffer[offset + i] & 0xff); } if (negative) { // 2's complement val--; val ^= (long) Math.pow(2, (length - 1) * 8) - 1; } return negative ? -val : val; } private static long parseBinaryBigInteger(final byte[] buffer, final int offset, final int length, final boolean negative) { byte[] remainder = new byte[length - 1]; System.arraycopy(buffer, offset + 1, remainder, 0, length - 1); BigInteger val = new BigInteger(remainder); if (negative) { // 2's complement val = val.add(BigInteger.valueOf(-1)).not(); } if (val.bitLength() > 63) { throw new IllegalArgumentException("At offset " + offset + ", " + length + " byte binary number" + " exceeds maximum signed long" + " value"); } return negative ? -val.longValue() : val.longValue(); } /** * Parse a boolean byte from a buffer. * Leading spaces and NUL are ignored. * The buffer may contain trailing spaces or NULs. * * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @return The boolean value of the bytes. * @throws IllegalArgumentException if an invalid byte is detected. */ public static boolean parseBoolean(final byte[] buffer, final int offset) { return buffer[offset] == 1; } // Helper method to generate the exception message private static String exceptionMessage(byte[] buffer, final int offset, final int length, int current, final byte currentByte) { // default charset is good enough for an exception message, // // the alternative was to modify parseOctal and // parseOctalOrBinary to receive the ZipEncoding of the // archive (deprecating the existing public methods, of // course) and dealing with the fact that ZipEncoding#decode // can throw an IOException which parseOctal* doesn't declare String string = new String(buffer, offset, length); string=string.replaceAll("\0", "{NUL}"); // Replace NULs to allow string to be printed final String s = "Invalid byte "+currentByte+" at offset "+(current-offset)+" in '"+string+"' len="+length; return s; } /** * Parse an entry name from a buffer. * Parsing stops when a NUL is found * or the buffer length is reached. * * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse. * @return The entry name. */ public static String parseName(byte[] buffer, final int offset, final int length) { try { return parseName(buffer, offset, length, DEFAULT_ENCODING); } catch (IOException ex) { try { return parseName(buffer, offset, length, FALLBACK_ENCODING); } catch (IOException ex2) { // impossible throw new RuntimeException(ex2); } } } /** * Parse an entry name from a buffer. * Parsing stops when a NUL is found * or the buffer length is reached. * * @param buffer The buffer from which to parse. * @param offset The offset into the buffer from which to parse. * @param length The maximum number of bytes to parse. * @param encoding name of the encoding to use for file names * @since 1.4 * @return The entry name. */ public static String parseName(byte[] buffer, final int offset, final int length, final ZipEncoding encoding) throws IOException { int len = length; for (; len > 0; len--) { if (buffer[offset + len - 1] != 0) { break; } } if (len > 0) { byte[] b = new byte[len]; System.arraycopy(buffer, offset, b, 0, len); return encoding.decode(b); } return ""; } /** * Copy a name into a buffer. * Copies characters from the name into the buffer * starting at the specified offset. * If the buffer is longer than the name, the buffer * is filled with trailing NULs. * If the name is longer than the buffer, * the output is truncated. * * @param name The header name from which to copy the characters. * @param buf The buffer where the name is to be stored. * @param offset The starting offset into the buffer * @param length The maximum number of header bytes to copy. * @return The updated offset, i.e. offset + length */ public static int formatNameBytes(String name, byte[] buf, final int offset, final int length) { try { return formatNameBytes(name, buf, offset, length, DEFAULT_ENCODING); } catch (IOException ex) { try { return formatNameBytes(name, buf, offset, length, FALLBACK_ENCODING); } catch (IOException ex2) { // impossible throw new RuntimeException(ex2); } } } /** * Copy a name into a buffer. * Copies characters from the name into the buffer * starting at the specified offset. * If the buffer is longer than the name, the buffer * is filled with trailing NULs. * If the name is longer than the buffer, * the output is truncated. * * @param name The header name from which to copy the characters. * @param buf The buffer where the name is to be stored. * @param offset The starting offset into the buffer * @param length The maximum number of header bytes to copy. * @param encoding name of the encoding to use for file names * @since 1.4 * @return The updated offset, i.e. offset + length */ public static int formatNameBytes(String name, byte[] buf, final int offset, final int length, final ZipEncoding encoding) throws IOException { int len = name.length(); ByteBuffer b = encoding.encode(name); while (b.limit() > length && len > 0) { b = encoding.encode(name.substring(0, --len)); } final int limit = b.limit() - b.position(); System.arraycopy(b.array(), b.arrayOffset(), buf, offset, limit); // Pad any remaining output bytes with NUL for (int i = limit; i < length; ++i) { buf[offset + i] = 0; } return offset + length; } /** * Fill buffer with unsigned octal number, padded with leading zeroes. * * @param value number to convert to octal - treated as unsigned * @param buffer destination buffer * @param offset starting offset in buffer * @param length length of buffer to fill * @throws IllegalArgumentException if the value will not fit in the buffer */ public static void formatUnsignedOctalString(final long value, byte[] buffer, final int offset, final int length) { int remaining = length; remaining--; if (value == 0) { buffer[offset + remaining--] = (byte) '0'; } else { long val = value; for (; remaining >= 0 && val != 0; --remaining) { // CheckStyle:MagicNumber OFF buffer[offset + remaining] = (byte) ((byte) '0' + (byte) (val & 7)); val = val >>> 3; // CheckStyle:MagicNumber ON } if (val != 0){ throw new IllegalArgumentException (value+"="+Long.toOctalString(value)+ " will not fit in octal number buffer of length "+length); } } for (; remaining >= 0; --remaining) { // leading zeros buffer[offset + remaining] = (byte) '0'; } } /** * Write an octal integer into a buffer. * * Uses {@link #formatUnsignedOctalString} to format * the value as an octal string with leading zeros. * The converted number is followed by space and NUL * * @param value The value to write * @param buf The buffer to receive the output * @param offset The starting offset into the buffer * @param length The size of the output buffer * @return The updated offset, i.e offset+length * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer */ public static int formatOctalBytes(final long value, byte[] buf, final int offset, final int length) { int idx=length-2; // For space and trailing null formatUnsignedOctalString(value, buf, offset, idx); buf[offset + idx++] = (byte) ' '; // Trailing space buf[offset + idx] = 0; // Trailing null return offset + length; } /** * Write an octal long integer into a buffer. * * Uses {@link #formatUnsignedOctalString} to format * the value as an octal string with leading zeros. * The converted number is followed by a space. * * @param value The value to write as octal * @param buf The destinationbuffer. * @param offset The starting offset into the buffer. * @param length The length of the buffer * @return The updated offset * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer */ public static int formatLongOctalBytes(final long value, byte[] buf, final int offset, final int length) { int idx=length-1; // For space formatUnsignedOctalString(value, buf, offset, idx); buf[offset + idx] = (byte) ' '; // Trailing space return offset + length; } /** * Write an long integer into a buffer as an octal string if this * will fit, or as a binary number otherwise. * * Uses {@link #formatUnsignedOctalString} to format * the value as an octal string with leading zeros. * The converted number is followed by a space. * * @param value The value to write into the buffer. * @param buf The destination buffer. * @param offset The starting offset into the buffer. * @param length The length of the buffer. * @return The updated offset. * @throws IllegalArgumentException if the value (and trailer) * will not fit in the buffer. * @since 1.4 */ public static int formatLongOctalOrBinaryBytes( final long value, byte[] buf, final int offset, final int length) { // Check whether we are dealing with UID/GID or SIZE field final long maxAsOctalChar = length == TarConstants.UIDLEN ? TarConstants.MAXID : TarConstants.MAXSIZE; final boolean negative = value < 0; if (!negative && value <= maxAsOctalChar) { // OK to store as octal chars return formatLongOctalBytes(value, buf, offset, length); } if (length < 9) { formatLongBinary(value, buf, offset, length, negative); } formatBigIntegerBinary(value, buf, offset, length, negative); buf[offset] = (byte) (negative ? 0xff : 0x80); return offset + length; } private static void formatLongBinary(final long value, byte[] buf, final int offset, final int length, final boolean negative) { final int bits = (length - 1) * 8; final long max = 1l << bits; long val = Math.abs(value); if (val >= max) { throw new IllegalArgumentException("Value " + value + " is too large for " + length + " byte field."); } if (negative) { val ^= max - 1; val |= 0xff << bits; val++; } for (int i = offset + length - 1; i >= offset; i--) { buf[i] = (byte) val; val >>= 8; } } private static void formatBigIntegerBinary(final long value, byte[] buf, final int offset, final int length, final boolean negative) { BigInteger val = BigInteger.valueOf(value); final byte[] b = val.toByteArray(); final int len = b.length; final int off = offset + length - len; System.arraycopy(b, 0, buf, off, len); final byte fill = (byte) (negative ? 0xff : 0); for (int i = offset + 1; i < off; i++) { buf[i] = fill; } } /** * Writes an octal value into a buffer. * * Uses {@link #formatUnsignedOctalString} to format * the value as an octal string with leading zeros. * The converted number is followed by NUL and then space. * * @param value The value to convert * @param buf The destination buffer * @param offset The starting offset into the buffer. * @param length The size of the buffer. * @return The updated value of offset, i.e. offset+length * @throws IllegalArgumentException if the value (and trailer) will not fit in the buffer */ public static int formatCheckSumOctalBytes(final long value, byte[] buf, final int offset, final int length) { int idx=length-2; // for NUL and space formatUnsignedOctalString(value, buf, offset, idx); buf[offset + idx++] = 0; // Trailing null buf[offset + idx] = (byte) ' '; // Trailing space return offset + length; } /** * Compute the checksum of a tar entry header. * * @param buf The tar entry's header buffer. * @return The computed checksum. */ public static long computeCheckSum(final byte[] buf) { long sum = 0; for (byte element : buf) { sum += BYTE_MASK & element; } return sum; } /** * Wikipedia says: *
* The checksum is calculated by taking the sum of the unsigned byte values * of the header block with the eight checksum bytes taken to be ascii * spaces (decimal value 32). It is stored as a six digit octal number with * leading zeroes followed by a NUL and then a space. Various * implementations do not adhere to this format. For better compatibility, * ignore leading and trailing whitespace, and get the first six digits. In * addition, some historic tar implementations treated bytes as signed. * Implementations typically calculate the checksum both ways, and treat it * as good if either the signed or unsigned sum matches the included * checksum. *
*

* In addition there are * some tar files * that seem to have parts of their header cleared to zero (no detectable * magic bytes, etc.) but still have a reasonable-looking checksum field * present. It looks like we can detect such cases reasonably well by * checking whether the stored checksum is greater than the * computed unsigned checksum. That check is unlikely to pass on some * random file header, as it would need to have a valid sequence of * octal digits in just the right place. *

* The return value of this method should be treated as a best-effort * heuristic rather than an absolute and final truth. The checksum * verification logic may well evolve over time as more special cases * are encountered. * * @param header tar header * @return whether the checksum is reasonably good * @see COMPRESS-191 * @since 1.5 */ public static boolean verifyCheckSum(byte[] header) { long storedSum = 0; long unsignedSum = 0; long signedSum = 0; int digits = 0; for (int i = 0; i < header.length; i++) { byte b = header[i]; if (CHKSUM_OFFSET <= i && i < CHKSUM_OFFSET + CHKSUMLEN) { if ('0' <= b && b <= '7' && digits++ < 6) { storedSum = storedSum * 8 + b - '0'; } else if (digits > 0) { digits = 6; // only look at the first octal digit sequence } b = ' '; } unsignedSum += 0xff & b; signedSum += b; } return storedSum == unsignedSum || storedSum == signedSum || storedSum > unsignedSum; // COMPRESS-177 } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy