org.apache.commons.compress.archivers.tar.TarArchiveOutputStream Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-compress Show documentation
Show all versions of commons-compress Show documentation
Apache Commons Compress software defines an API for working with
compression and archive formats. These include: bzip2, gzip, pack200,
lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4,
Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers.tar;
import java.io.File;
import java.io.IOException;
import java.io.OutputStream;
import java.io.StringWriter;
import java.io.UnsupportedEncodingException;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;
import org.apache.commons.compress.archivers.ArchiveEntry;
import org.apache.commons.compress.archivers.ArchiveOutputStream;
import org.apache.commons.compress.archivers.zip.ZipEncoding;
import org.apache.commons.compress.archivers.zip.ZipEncodingHelper;
import org.apache.commons.compress.utils.CharsetNames;
import org.apache.commons.compress.utils.CountingOutputStream;
import org.apache.commons.compress.utils.FixedLengthBlockOutputStream;
/**
* The TarOutputStream writes a UNIX tar archive as an OutputStream. Methods are provided to put
* entries, and then write their contents by writing to this stream using write().
*
* tar archives consist of a sequence of records of 512 bytes each
* that are grouped into blocks. Prior to Apache Commons Compress 1.14
* it has been possible to configure a record size different from 512
* bytes and arbitrary block sizes. Starting with Compress 1.15 512 is
* the only valid option for the record size and the block size must
* be a multiple of 512. Also the default block size changed from
* 10240 bytes prior to Compress 1.15 to 512 bytes with Compress
* 1.15.
*
* @NotThreadSafe
*/
public class TarArchiveOutputStream extends ArchiveOutputStream {
/**
* Fail if a long file name is required in the archive.
*/
public static final int LONGFILE_ERROR = 0;
/**
* Long paths will be truncated in the archive.
*/
public static final int LONGFILE_TRUNCATE = 1;
/**
* GNU tar extensions are used to store long file names in the archive.
*/
public static final int LONGFILE_GNU = 2;
/**
* POSIX/PAX extensions are used to store long file names in the archive.
*/
public static final int LONGFILE_POSIX = 3;
/**
* Fail if a big number (e.g. size > 8GiB) is required in the archive.
*/
public static final int BIGNUMBER_ERROR = 0;
/**
* star/GNU tar/BSD tar extensions are used to store big number in the archive.
*/
public static final int BIGNUMBER_STAR = 1;
/**
* POSIX/PAX extensions are used to store big numbers in the archive.
*/
public static final int BIGNUMBER_POSIX = 2;
private static final int RECORD_SIZE = 512;
private long currSize;
private String currName;
private long currBytes;
private final byte[] recordBuf;
private int longFileMode = LONGFILE_ERROR;
private int bigNumberMode = BIGNUMBER_ERROR;
private int recordsWritten;
private final int recordsPerBlock;
private boolean closed = false;
/**
* Indicates if putArchiveEntry has been called without closeArchiveEntry
*/
private boolean haveUnclosedEntry = false;
/**
* indicates if this archive is finished
*/
private boolean finished = false;
private final FixedLengthBlockOutputStream out;
private final CountingOutputStream countingOut;
private final ZipEncoding zipEncoding;
// the provided encoding (for unit tests)
final String encoding;
private boolean addPaxHeadersForNonAsciiNames = false;
private static final ZipEncoding ASCII =
ZipEncodingHelper.getZipEncoding("ASCII");
private static final int BLOCK_SIZE_UNSPECIFIED = -511;
/**
* Constructor for TarArchiveOutputStream.
*
* Uses a block size of 512 bytes.
*
* @param os the output stream to use
*/
public TarArchiveOutputStream(final OutputStream os) {
this(os, BLOCK_SIZE_UNSPECIFIED);
}
/**
* Constructor for TarArchiveOutputStream.
*
* Uses a block size of 512 bytes.
*
* @param os the output stream to use
* @param encoding name of the encoding to use for file names
* @since 1.4
*/
public TarArchiveOutputStream(final OutputStream os, final String encoding) {
this(os, BLOCK_SIZE_UNSPECIFIED, encoding);
}
/**
* Constructor for TarArchiveOutputStream.
*
* @param os the output stream to use
* @param blockSize the block size to use. Must be a multiple of 512 bytes.
*/
public TarArchiveOutputStream(final OutputStream os, final int blockSize) {
this(os, blockSize, null);
}
/**
* Constructor for TarArchiveOutputStream.
*
* @param os the output stream to use
* @param blockSize the block size to use
* @param recordSize the record size to use. Must be 512 bytes.
* @deprecated recordSize must always be 512 bytes. An IllegalArgumentException will be thrown
* if any other value is used
*/
@Deprecated
public TarArchiveOutputStream(final OutputStream os, final int blockSize,
final int recordSize) {
this(os, blockSize, recordSize, null);
}
/**
* Constructor for TarArchiveOutputStream.
*
* @param os the output stream to use
* @param blockSize the block size to use . Must be a multiple of 512 bytes.
* @param recordSize the record size to use. Must be 512 bytes.
* @param encoding name of the encoding to use for file names
* @since 1.4
* @deprecated recordSize must always be 512 bytes. An IllegalArgumentException will be thrown
* if any other value is used.
*/
@Deprecated
public TarArchiveOutputStream(final OutputStream os, final int blockSize,
final int recordSize, final String encoding) {
this(os, blockSize, encoding);
if (recordSize != RECORD_SIZE) {
throw new IllegalArgumentException(
"Tar record size must always be 512 bytes. Attempt to set size of " + recordSize);
}
}
/**
* Constructor for TarArchiveOutputStream.
*
* @param os the output stream to use
* @param blockSize the block size to use. Must be a multiple of 512 bytes.
* @param encoding name of the encoding to use for file names
* @since 1.4
*/
public TarArchiveOutputStream(final OutputStream os, final int blockSize,
final String encoding) {
int realBlockSize;
if (BLOCK_SIZE_UNSPECIFIED == blockSize) {
realBlockSize = RECORD_SIZE;
} else {
realBlockSize = blockSize;
}
if (realBlockSize <=0 || realBlockSize % RECORD_SIZE != 0) {
throw new IllegalArgumentException("Block size must be a multiple of 512 bytes. Attempt to use set size of " + blockSize);
}
out = new FixedLengthBlockOutputStream(countingOut = new CountingOutputStream(os),
RECORD_SIZE);
this.encoding = encoding;
this.zipEncoding = ZipEncodingHelper.getZipEncoding(encoding);
this.recordBuf = new byte[RECORD_SIZE];
this.recordsPerBlock = realBlockSize / RECORD_SIZE;
}
/**
* Set the long file mode. This can be LONGFILE_ERROR(0), LONGFILE_TRUNCATE(1) or
* LONGFILE_GNU(2). This specifies the treatment of long file names (names >=
* TarConstants.NAMELEN). Default is LONGFILE_ERROR.
*
* @param longFileMode the mode to use
*/
public void setLongFileMode(final int longFileMode) {
this.longFileMode = longFileMode;
}
/**
* Set the big number mode. This can be BIGNUMBER_ERROR(0), BIGNUMBER_POSIX(1) or
* BIGNUMBER_STAR(2). This specifies the treatment of big files (sizes >
* TarConstants.MAXSIZE) and other numeric values to big to fit into a traditional tar header.
* Default is BIGNUMBER_ERROR.
*
* @param bigNumberMode the mode to use
* @since 1.4
*/
public void setBigNumberMode(final int bigNumberMode) {
this.bigNumberMode = bigNumberMode;
}
/**
* Whether to add a PAX extension header for non-ASCII file names.
*
* @param b whether to add a PAX extension header for non-ASCII file names.
* @since 1.4
*/
public void setAddPaxHeadersForNonAsciiNames(final boolean b) {
addPaxHeadersForNonAsciiNames = b;
}
@Deprecated
@Override
public int getCount() {
return (int) getBytesWritten();
}
@Override
public long getBytesWritten() {
return countingOut.getBytesWritten();
}
/**
* Ends the TAR archive without closing the underlying OutputStream.
*
* An archive consists of a series of file entries terminated by an
* end-of-archive entry, which consists of two 512 blocks of zero bytes.
* POSIX.1 requires two EOF records, like some other implementations.
*
* @throws IOException on error
*/
@Override
public void finish() throws IOException {
if (finished) {
throw new IOException("This archive has already been finished");
}
if (haveUnclosedEntry) {
throw new IOException("This archive contains unclosed entries.");
}
writeEOFRecord();
writeEOFRecord();
padAsNeeded();
out.flush();
finished = true;
}
/**
* Closes the underlying OutputStream.
*
* @throws IOException on error
*/
@Override
public void close() throws IOException {
try {
if (!finished) {
finish();
}
} finally {
if (!closed) {
out.close();
closed = true;
}
}
}
/**
* Get the record size being used by this stream's TarBuffer.
*
* @return The TarBuffer record size.
* @deprecated
*/
@Deprecated
public int getRecordSize() {
return RECORD_SIZE;
}
/**
* Put an entry on the output stream. This writes the entry's header record and positions the
* output stream for writing the contents of the entry. Once this method is called, the stream
* is ready for calls to write() to write the entry's contents. Once the contents are written,
* closeArchiveEntry() MUST be called to ensure that all buffered data is completely
* written to the output stream.
*
* @param archiveEntry The TarEntry to be written to the archive.
* @throws IOException on error
* @throws ClassCastException if archiveEntry is not an instance of TarArchiveEntry
*/
@Override
public void putArchiveEntry(final ArchiveEntry archiveEntry) throws IOException {
if (finished) {
throw new IOException("Stream has already been finished");
}
final TarArchiveEntry entry = (TarArchiveEntry) archiveEntry;
if (entry.isGlobalPaxHeader()) {
final byte[] data = encodeExtendedPaxHeadersContents(entry.getExtraPaxHeaders());
entry.setSize(data.length);
entry.writeEntryHeader(recordBuf, zipEncoding, bigNumberMode == BIGNUMBER_STAR);
writeRecord(recordBuf);
currSize= entry.getSize();
currBytes = 0;
this.haveUnclosedEntry = true;
write(data);
closeArchiveEntry();
} else {
final Map paxHeaders = new HashMap<>();
final String entryName = entry.getName();
final boolean paxHeaderContainsPath = handleLongName(entry, entryName, paxHeaders, "path",
TarConstants.LF_GNUTYPE_LONGNAME, "file name");
final String linkName = entry.getLinkName();
final boolean paxHeaderContainsLinkPath = linkName != null && linkName.length() > 0
&& handleLongName(entry, linkName, paxHeaders, "linkpath",
TarConstants.LF_GNUTYPE_LONGLINK, "link name");
if (bigNumberMode == BIGNUMBER_POSIX) {
addPaxHeadersForBigNumbers(paxHeaders, entry);
} else if (bigNumberMode != BIGNUMBER_STAR) {
failForBigNumbers(entry);
}
if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsPath
&& !ASCII.canEncode(entryName)) {
paxHeaders.put("path", entryName);
}
if (addPaxHeadersForNonAsciiNames && !paxHeaderContainsLinkPath
&& (entry.isLink() || entry.isSymbolicLink())
&& !ASCII.canEncode(linkName)) {
paxHeaders.put("linkpath", linkName);
}
paxHeaders.putAll(entry.getExtraPaxHeaders());
if (paxHeaders.size() > 0) {
writePaxHeaders(entry, entryName, paxHeaders);
}
entry.writeEntryHeader(recordBuf, zipEncoding, bigNumberMode == BIGNUMBER_STAR);
writeRecord(recordBuf);
currBytes = 0;
if (entry.isDirectory()) {
currSize = 0;
} else {
currSize = entry.getSize();
}
currName = entryName;
haveUnclosedEntry = true;
}
}
/**
* Close an entry. This method MUST be called for all file entries that contain data. The reason
* is that we must buffer data written to the stream in order to satisfy the buffer's record
* based writes. Thus, there may be data fragments still being assembled that must be written to
* the output stream before this entry is closed and the next entry written.
*
* @throws IOException on error
*/
@Override
public void closeArchiveEntry() throws IOException {
if (finished) {
throw new IOException("Stream has already been finished");
}
if (!haveUnclosedEntry) {
throw new IOException("No current entry to close");
}
out.flushBlock();
if (currBytes < currSize) {
throw new IOException("Entry '" + currName + "' closed at '"
+ currBytes
+ "' before the '" + currSize
+ "' bytes specified in the header were written");
}
recordsWritten += (currSize / RECORD_SIZE);
if (0 != currSize % RECORD_SIZE) {
recordsWritten++;
}
haveUnclosedEntry = false;
}
/**
* Writes bytes to the current tar archive entry. This method is aware of the current entry and
* will throw an exception if you attempt to write bytes past the length specified for the
* current entry.
*
* @param wBuf The buffer to write to the archive.
* @param wOffset The offset in the buffer from which to get bytes.
* @param numToWrite The number of bytes to write.
* @throws IOException on error
*/
@Override
public void write(final byte[] wBuf, int wOffset, int numToWrite) throws IOException {
if (!haveUnclosedEntry) {
throw new IllegalStateException("No current tar entry");
}
if (currBytes + numToWrite > currSize) {
throw new IOException("Request to write '" + numToWrite
+ "' bytes exceeds size in header of '"
+ currSize + "' bytes for entry '"
+ currName + "'");
}
out.write(wBuf, wOffset, numToWrite);
currBytes += numToWrite;
}
/**
* Writes a PAX extended header with the given map as contents.
*
* @since 1.4
*/
void writePaxHeaders(final TarArchiveEntry entry,
final String entryName,
final Map headers) throws IOException {
String name = "./PaxHeaders.X/" + stripTo7Bits(entryName);
if (name.length() >= TarConstants.NAMELEN) {
name = name.substring(0, TarConstants.NAMELEN - 1);
}
final TarArchiveEntry pex = new TarArchiveEntry(name,
TarConstants.LF_PAX_EXTENDED_HEADER_LC);
transferModTime(entry, pex);
final byte[] data = encodeExtendedPaxHeadersContents(headers);
pex.setSize(data.length);
putArchiveEntry(pex);
write(data);
closeArchiveEntry();
}
private byte[] encodeExtendedPaxHeadersContents(Map headers)
throws UnsupportedEncodingException {
final StringWriter w = new StringWriter();
for (final Map.Entry h : headers.entrySet()) {
final String key = h.getKey();
final String value = h.getValue();
int len = key.length() + value.length()
+ 3 /* blank, equals and newline */
+ 2 /* guess 9 < actual length < 100 */;
String line = len + " " + key + "=" + value + "\n";
int actualLength = line.getBytes(CharsetNames.UTF_8).length;
while (len != actualLength) {
// Adjust for cases where length < 10 or > 100
// or where UTF-8 encoding isn't a single octet
// per character.
// Must be in loop as size may go from 99 to 100 in
// first pass so we'd need a second.
len = actualLength;
line = len + " " + key + "=" + value + "\n";
actualLength = line.getBytes(CharsetNames.UTF_8).length;
}
w.write(line);
}
return w.toString().getBytes(CharsetNames.UTF_8);
}
private String stripTo7Bits(final String name) {
final int length = name.length();
final StringBuilder result = new StringBuilder(length);
for (int i = 0; i < length; i++) {
final char stripped = (char) (name.charAt(i) & 0x7F);
if (shouldBeReplaced(stripped)) {
result.append("_");
} else {
result.append(stripped);
}
}
return result.toString();
}
/**
* @return true if the character could lead to problems when used inside a TarArchiveEntry name
* for a PAX header.
*/
private boolean shouldBeReplaced(final char c) {
return c == 0 // would be read as Trailing null
|| c == '/' // when used as last character TAE will consider the PAX header a directory
|| c == '\\'; // same as '/' as slashes get "normalized" on Windows
}
/**
* Write an EOF (end of archive) record to the tar archive. An EOF record consists of a record
* of all zeros.
*/
private void writeEOFRecord() throws IOException {
Arrays.fill(recordBuf, (byte) 0);
writeRecord(recordBuf);
}
@Override
public void flush() throws IOException {
out.flush();
}
@Override
public ArchiveEntry createArchiveEntry(final File inputFile, final String entryName)
throws IOException {
if (finished) {
throw new IOException("Stream has already been finished");
}
return new TarArchiveEntry(inputFile, entryName);
}
/**
* Write an archive record to the archive.
*
* @param record The record data to write to the archive.
* @throws IOException on error
*/
private void writeRecord(final byte[] record) throws IOException {
if (record.length != RECORD_SIZE) {
throw new IOException("Record to write has length '"
+ record.length
+ "' which is not the record size of '"
+ RECORD_SIZE + "'");
}
out.write(record);
recordsWritten++;
}
private void padAsNeeded() throws IOException {
final int start = recordsWritten % recordsPerBlock;
if (start != 0) {
for (int i = start; i < recordsPerBlock; i++) {
writeEOFRecord();
}
}
}
private void addPaxHeadersForBigNumbers(final Map paxHeaders,
final TarArchiveEntry entry) {
addPaxHeaderForBigNumber(paxHeaders, "size", entry.getSize(),
TarConstants.MAXSIZE);
addPaxHeaderForBigNumber(paxHeaders, "gid", entry.getLongGroupId(),
TarConstants.MAXID);
addPaxHeaderForBigNumber(paxHeaders, "mtime",
entry.getModTime().getTime() / 1000,
TarConstants.MAXSIZE);
addPaxHeaderForBigNumber(paxHeaders, "uid", entry.getLongUserId(),
TarConstants.MAXID);
// star extensions by J\u00f6rg Schilling
addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devmajor",
entry.getDevMajor(), TarConstants.MAXID);
addPaxHeaderForBigNumber(paxHeaders, "SCHILY.devminor",
entry.getDevMinor(), TarConstants.MAXID);
// there is no PAX header for file mode
failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
}
private void addPaxHeaderForBigNumber(final Map paxHeaders,
final String header, final long value,
final long maxValue) {
if (value < 0 || value > maxValue) {
paxHeaders.put(header, String.valueOf(value));
}
}
private void failForBigNumbers(final TarArchiveEntry entry) {
failForBigNumber("entry size", entry.getSize(), TarConstants.MAXSIZE);
failForBigNumberWithPosixMessage("group id", entry.getLongGroupId(), TarConstants.MAXID);
failForBigNumber("last modification time",
entry.getModTime().getTime() / 1000,
TarConstants.MAXSIZE);
failForBigNumber("user id", entry.getLongUserId(), TarConstants.MAXID);
failForBigNumber("mode", entry.getMode(), TarConstants.MAXID);
failForBigNumber("major device number", entry.getDevMajor(),
TarConstants.MAXID);
failForBigNumber("minor device number", entry.getDevMinor(),
TarConstants.MAXID);
}
private void failForBigNumber(final String field, final long value, final long maxValue) {
failForBigNumber(field, value, maxValue, "");
}
private void failForBigNumberWithPosixMessage(final String field, final long value,
final long maxValue) {
failForBigNumber(field, value, maxValue,
" Use STAR or POSIX extensions to overcome this limit");
}
private void failForBigNumber(final String field, final long value, final long maxValue,
final String additionalMsg) {
if (value < 0 || value > maxValue) {
throw new RuntimeException(field + " '" + value //NOSONAR
+ "' is too big ( > "
+ maxValue + " )." + additionalMsg);
}
}
/**
* Handles long file or link names according to the longFileMode setting.
*
* I.e. if the given name is too long to be written to a plain tar header then
- it
* creates a pax header who's name is given by the paxHeaderName parameter if longFileMode is
* POSIX
- it creates a GNU longlink entry who's type is given by the linkType parameter
* if longFileMode is GNU
- it throws an exception if longFileMode is ERROR
- it
* truncates the name if longFileMode is TRUNCATE
*
* @param entry entry the name belongs to
* @param name the name to write
* @param paxHeaders current map of pax headers
* @param paxHeaderName name of the pax header to write
* @param linkType type of the GNU entry to write
* @param fieldName the name of the field
* @return whether a pax header has been written.
*/
private boolean handleLongName(final TarArchiveEntry entry, final String name,
final Map paxHeaders,
final String paxHeaderName, final byte linkType, final String fieldName)
throws IOException {
final ByteBuffer encodedName = zipEncoding.encode(name);
final int len = encodedName.limit() - encodedName.position();
if (len >= TarConstants.NAMELEN) {
if (longFileMode == LONGFILE_POSIX) {
paxHeaders.put(paxHeaderName, name);
return true;
} else if (longFileMode == LONGFILE_GNU) {
// create a TarEntry for the LongLink, the contents
// of which are the link's name
final TarArchiveEntry longLinkEntry = new TarArchiveEntry(TarConstants.GNU_LONGLINK,
linkType);
longLinkEntry.setSize(len + 1L); // +1 for NUL
transferModTime(entry, longLinkEntry);
putArchiveEntry(longLinkEntry);
write(encodedName.array(), encodedName.arrayOffset(), len);
write(0); // NUL terminator
closeArchiveEntry();
} else if (longFileMode != LONGFILE_TRUNCATE) {
throw new RuntimeException(fieldName + " '" + name //NOSONAR
+ "' is too long ( > "
+ TarConstants.NAMELEN + " bytes)");
}
}
return false;
}
private void transferModTime(final TarArchiveEntry from, final TarArchiveEntry to) {
Date fromModTime = from.getModTime();
final long fromModTimeSeconds = fromModTime.getTime() / 1000;
if (fromModTimeSeconds < 0 || fromModTimeSeconds > TarConstants.MAXSIZE) {
fromModTime = new Date(0);
}
to.setModTime(fromModTime);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy