org.apache.commons.compress.archivers.zip.NioZipEncoding Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of commons-compress Show documentation
Show all versions of commons-compress Show documentation
Apache Commons Compress software defines an API for working with
compression and archive formats. These include: bzip2, gzip, pack200,
lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4,
Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.commons.compress.archivers.zip;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
/**
* A ZipEncoding, which uses a {@link Charset} to encode names.
*
* The methods of this class are reentrant.
*
*
* @Immutable
*/
final class NioZipEncoding implements ZipEncoding, CharsetAccessor {
private static final char REPLACEMENT = '?';
private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
private static final char[] HEX_CHARS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
private static ByteBuffer encodeFully(final CharsetEncoder enc, final CharBuffer cb, final ByteBuffer out) {
ByteBuffer o = out;
while (cb.hasRemaining()) {
final CoderResult result = enc.encode(cb, o, false);
if (result.isOverflow()) {
final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
o = ZipEncodingHelper.growBufferBy(o, increment);
}
}
return o;
}
private static CharBuffer encodeSurrogate(final CharBuffer cb, final char c) {
cb.position(0).limit(6);
cb.put('%');
cb.put('U');
cb.put(HEX_CHARS[c >> 12 & 0x0f]);
cb.put(HEX_CHARS[c >> 8 & 0x0f]);
cb.put(HEX_CHARS[c >> 4 & 0x0f]);
cb.put(HEX_CHARS[c & 0x0f]);
cb.flip();
return cb;
}
/**
* Estimate the size needed for remaining characters
*
* @param enc encoder to use for estimates
* @param charCount number of characters remaining
* @return estimated size in bytes.
*/
private static int estimateIncrementalEncodingSize(final CharsetEncoder enc, final int charCount) {
return (int) Math.ceil(charCount * enc.averageBytesPerChar());
}
/**
* Estimate the initial encoded size (in bytes) for a character buffer.
*
* The estimate assumes that one character consumes uses the maximum length encoding, whilst the rest use an average size encoding. This accounts for any
* BOM for UTF-16, at the expense of a couple of extra bytes for UTF-8 encoded ASCII.
*
*
* @param enc encoder to use for estimates
* @param charChount number of characters in string
* @return estimated size in bytes.
*/
private static int estimateInitialBufferSize(final CharsetEncoder enc, final int charChount) {
final float first = enc.maxBytesPerChar();
final float rest = (charChount - 1) * enc.averageBytesPerChar();
return (int) Math.ceil(first + rest);
}
private final Charset charset;
private final boolean useReplacement;
/**
* Constructs an NioZipEncoding using the given charset.
*
* @param charset The character set to use.
* @param useReplacement should invalid characters be replaced, or reported.
*/
NioZipEncoding(final Charset charset, final boolean useReplacement) {
this.charset = charset;
this.useReplacement = useReplacement;
}
/**
* @see ZipEncoding#canEncode(String)
*/
@Override
public boolean canEncode(final String name) {
return newEncoder().canEncode(name);
}
/**
* @see ZipEncoding#decode(byte[])
*/
@Override
public String decode(final byte[] data) throws IOException {
return newDecoder().decode(ByteBuffer.wrap(data)).toString();
}
/**
* @see ZipEncoding#encode(String)
*/
@Override
public ByteBuffer encode(final String name) {
final CharsetEncoder enc = newEncoder();
final CharBuffer cb = CharBuffer.wrap(name);
CharBuffer tmp = null;
ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
while (cb.hasRemaining()) {
final CoderResult res = enc.encode(cb, out, false);
if (res.isUnmappable() || res.isMalformed()) {
// write the unmappable characters in utf-16
// pseudo-URL encoding style to ByteBuffer.
final int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
if (spaceForSurrogate > out.remaining()) {
// if the destination buffer isn't oversized, assume that the presence of one
// unmappable character makes it likely that there will be more. Find all the
// un-encoded characters and allocate space based on those estimates.
int charCount = 0;
for (int i = cb.position(); i < cb.limit(); i++) {
charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
}
final int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
}
if (tmp == null) {
tmp = CharBuffer.allocate(6);
}
for (int i = 0; i < res.length(); ++i) {
out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
}
} else if (res.isOverflow()) {
final int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
out = ZipEncodingHelper.growBufferBy(out, increment);
} else if (res.isUnderflow() || res.isError()) {
break;
}
}
// tell the encoder we are done
enc.encode(cb, out, true);
// may have caused underflow, but that's been ignored traditionally
out.limit(out.position());
out.rewind();
return out;
}
@Override
public Charset getCharset() {
return charset;
}
private CharsetDecoder newDecoder() {
if (!useReplacement) {
return this.charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
}
return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(REPLACEMENT_STRING);
}
private CharsetEncoder newEncoder() {
if (useReplacement) {
return charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(REPLACEMENT_BYTES);
}
return charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
}
}