com.feilong.lib.compress.archivers.zip.NioZipEncoding Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of feilong Show documentation
Show all versions of feilong Show documentation
feilong is a suite of core and expanded libraries that include utility classes, http, excel,cvs, io classes, and much much more.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package com.feilong.lib.compress.archivers.zip;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CoderResult;
import java.nio.charset.CodingErrorAction;
/**
* A ZipEncoding, which uses a java.nio {@link
* java.nio.charset.Charset Charset} to encode names.
*
* The methods of this class are reentrant.
*
*
* @Immutable
*/
class NioZipEncoding implements ZipEncoding,CharsetAccessor{
private final Charset charset;
private final boolean useReplacement;
private static final char REPLACEMENT = '?';
private static final byte[] REPLACEMENT_BYTES = { (byte) REPLACEMENT };
private static final String REPLACEMENT_STRING = String.valueOf(REPLACEMENT);
private static final char[] HEX_CHARS = new char[] {
'0',
'1',
'2',
'3',
'4',
'5',
'6',
'7',
'8',
'9',
'A',
'B',
'C',
'D',
'E',
'F' };
/**
* Construct an NioZipEncoding using the given charset.
*
* @param charset
* The character set to use.
* @param useReplacement
* should invalid characters be replaced, or reported.
*/
NioZipEncoding(final Charset charset, boolean useReplacement){
this.charset = charset;
this.useReplacement = useReplacement;
}
@Override
public Charset getCharset(){
return charset;
}
/**
* @see ZipEncoding#canEncode(java.lang.String)
*/
@Override
public boolean canEncode(final String name){
final CharsetEncoder enc = newEncoder();
return enc.canEncode(name);
}
/**
* @see ZipEncoding#encode(java.lang.String)
*/
@Override
public ByteBuffer encode(final String name){
final CharsetEncoder enc = newEncoder();
final CharBuffer cb = CharBuffer.wrap(name);
CharBuffer tmp = null;
ByteBuffer out = ByteBuffer.allocate(estimateInitialBufferSize(enc, cb.remaining()));
while (cb.hasRemaining()){
final CoderResult res = enc.encode(cb, out, false);
if (res.isUnmappable() || res.isMalformed()){
// write the unmappable characters in utf-16
// pseudo-URL encoding style to ByteBuffer.
int spaceForSurrogate = estimateIncrementalEncodingSize(enc, 6 * res.length());
if (spaceForSurrogate > out.remaining()){
// if the destination buffer isn't over sized, assume that the presence of one
// unmappable character makes it likely that there will be more. Find all the
// un-encoded characters and allocate space based on those estimates.
int charCount = 0;
for (int i = cb.position(); i < cb.limit(); i++){
charCount += !enc.canEncode(cb.get(i)) ? 6 : 1;
}
int totalExtraSpace = estimateIncrementalEncodingSize(enc, charCount);
out = ZipEncodingHelper.growBufferBy(out, totalExtraSpace - out.remaining());
}
if (tmp == null){
tmp = CharBuffer.allocate(6);
}
for (int i = 0; i < res.length(); ++i){
out = encodeFully(enc, encodeSurrogate(tmp, cb.get()), out);
}
}else if (res.isOverflow()){
int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
out = ZipEncodingHelper.growBufferBy(out, increment);
}else if (res.isUnderflow() || res.isError()){
break;
}
}
// tell the encoder we are done
enc.encode(cb, out, true);
// may have caused underflow, but that's been ignored traditionally
out.limit(out.position());
out.rewind();
return out;
}
/**
* @see
* ZipEncoding#decode(byte[])
*/
@Override
public String decode(final byte[] data) throws IOException{
return newDecoder().decode(ByteBuffer.wrap(data)).toString();
}
private static ByteBuffer encodeFully(CharsetEncoder enc,CharBuffer cb,ByteBuffer out){
ByteBuffer o = out;
while (cb.hasRemaining()){
CoderResult result = enc.encode(cb, o, false);
if (result.isOverflow()){
int increment = estimateIncrementalEncodingSize(enc, cb.remaining());
o = ZipEncodingHelper.growBufferBy(o, increment);
}
}
return o;
}
private static CharBuffer encodeSurrogate(CharBuffer cb,char c){
cb.position(0).limit(6);
cb.put('%');
cb.put('U');
cb.put(HEX_CHARS[(c >> 12) & 0x0f]);
cb.put(HEX_CHARS[(c >> 8) & 0x0f]);
cb.put(HEX_CHARS[(c >> 4) & 0x0f]);
cb.put(HEX_CHARS[c & 0x0f]);
cb.flip();
return cb;
}
private CharsetEncoder newEncoder(){
if (useReplacement){
return charset.newEncoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(REPLACEMENT_BYTES);
}else{
return charset.newEncoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
}
}
private CharsetDecoder newDecoder(){
if (!useReplacement){
return this.charset.newDecoder().onMalformedInput(CodingErrorAction.REPORT).onUnmappableCharacter(CodingErrorAction.REPORT);
}else{
return charset.newDecoder().onMalformedInput(CodingErrorAction.REPLACE).onUnmappableCharacter(CodingErrorAction.REPLACE)
.replaceWith(REPLACEMENT_STRING);
}
}
/**
* Estimate the initial encoded size (in bytes) for a character buffer.
*
* The estimate assumes that one character consumes uses the maximum length encoding,
* whilst the rest use an average size encoding. This accounts for any BOM for UTF-16, at
* the expense of a couple of extra bytes for UTF-8 encoded ASCII.
*
*
* @param enc
* encoder to use for estimates
* @param charChount
* number of characters in string
* @return estimated size in bytes.
*/
private static int estimateInitialBufferSize(CharsetEncoder enc,int charChount){
float first = enc.maxBytesPerChar();
float rest = (charChount - 1) * enc.averageBytesPerChar();
return (int) Math.ceil(first + rest);
}
/**
* Estimate the size needed for remaining characters
*
* @param enc
* encoder to use for estimates
* @param charCount
* number of characters remaining
* @return estimated size in bytes.
*/
private static int estimateIncrementalEncodingSize(CharsetEncoder enc,int charCount){
return (int) Math.ceil(charCount * enc.averageBytesPerChar());
}
}