All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.commons.compress.archivers.zip.Simple8BitZipEncoding Maven / Gradle / Ivy

Go to download

Apache Commons Compress software defines an API for working with compression and archive formats. These include: bzip2, gzip, pack200, lzma, xz, Snappy, traditional Unix Compress, DEFLATE, DEFLATE64, LZ4, Brotli, Zstandard and ar, cpio, jar, tar, zip, dump, 7z, arj.

There is a newer version: 1.27.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.commons.compress.archivers.zip;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

/**
 * This ZipEncoding implementation implements a simple 8bit character
 * set, which mets the following restrictions:
 * 
 * 
    *
  • Characters 0x0000 to 0x007f are encoded as the corresponding * byte values 0x00 to 0x7f.
  • *
  • All byte codes from 0x80 to 0xff are mapped to a unique unicode * character in the range 0x0080 to 0x7fff. (No support for * UTF-16 surrogates) *
* *

These restrictions most notably apply to the most prominent * omissions of java-1.4's {@link java.nio.charset.Charset Charset} * implementation, Cp437 and Cp850.

* *

The methods of this class are reentrant.

* @Immutable */ class Simple8BitZipEncoding implements ZipEncoding { /** * A character entity, which is put to the reverse mapping table * of a simple encoding. */ private static final class Simple8BitChar implements Comparable { public final char unicode; public final byte code; Simple8BitChar(final byte code, final char unicode) { this.code = code; this.unicode = unicode; } @Override public int compareTo(final Simple8BitChar a) { return this.unicode - a.unicode; } @Override public String toString() { return "0x" + Integer.toHexString(0xffff & unicode) + "->0x" + Integer.toHexString(0xff & code); } @Override public boolean equals(final Object o) { if (o instanceof Simple8BitChar) { final Simple8BitChar other = (Simple8BitChar) o; return unicode == other.unicode && code == other.code; } return false; } @Override public int hashCode() { return unicode; } } /** * The characters for byte values of 128 to 255 stored as an array of * 128 chars. */ private final char[] highChars; /** * A list of {@link Simple8BitChar} objects sorted by the unicode * field. This list is used to binary search reverse mapping of * unicode characters with a character code greater than 127. */ private final List reverseMapping; /** * @param highChars The characters for byte values of 128 to 255 * stored as an array of 128 chars. */ public Simple8BitZipEncoding(final char[] highChars) { this.highChars = highChars.clone(); final List temp = new ArrayList<>(this.highChars.length); byte code = 127; for (final char highChar : this.highChars) { temp.add(new Simple8BitChar(++code, highChar)); } Collections.sort(temp); this.reverseMapping = Collections.unmodifiableList(temp); } /** * Return the character code for a given encoded byte. * * @param b The byte to decode. * @return The associated character value. */ public char decodeByte(final byte b) { // code 0-127 if (b >= 0) { return (char) b; } // byte is signed, so 128 == -128 and 255 == -1 return this.highChars[128 + b]; } /** * @param c The character to encode. * @return Whether the given unicode character is covered by this encoding. */ public boolean canEncodeChar(final char c) { if (c >= 0 && c < 128) { return true; } final Simple8BitChar r = this.encodeHighChar(c); return r != null; } /** * Pushes the encoded form of the given character to the given byte buffer. * * @param bb The byte buffer to write to. * @param c The character to encode. * @return Whether the given unicode character is covered by this encoding. * If {@code false} is returned, nothing is pushed to the * byte buffer. */ public boolean pushEncodedChar(final ByteBuffer bb, final char c) { if (c >= 0 && c < 128) { bb.put((byte) c); return true; } final Simple8BitChar r = this.encodeHighChar(c); if (r == null) { return false; } bb.put(r.code); return true; } /** * @param c A unicode character in the range from 0x0080 to 0x7f00 * @return A Simple8BitChar, if this character is covered by this encoding. * A {@code null} value is returned, if this character is not * covered by this encoding. */ private Simple8BitChar encodeHighChar(final char c) { // for performance an simplicity, yet another reincarnation of // binary search... int i0 = 0; int i1 = this.reverseMapping.size(); while (i1 > i0) { final int i = i0 + (i1 - i0) / 2; final Simple8BitChar m = this.reverseMapping.get(i); if (m.unicode == c) { return m; } if (m.unicode < c) { i0 = i + 1; } else { i1 = i; } } if (i0 >= this.reverseMapping.size()) { return null; } final Simple8BitChar r = this.reverseMapping.get(i0); if (r.unicode != c) { return null; } return r; } /** * @see * org.apache.commons.compress.archivers.zip.ZipEncoding#canEncode(java.lang.String) */ @Override public boolean canEncode(final String name) { for (int i=0;i




© 2015 - 2024 Weber Informatics LLC | Privacy Policy