org.robolectric.res.android.ResourceString Maven / Gradle / Ivy
/*
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.robolectric.res.android;
import static java.nio.charset.StandardCharsets.UTF_16LE;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.io.ByteArrayDataOutput;
import com.google.common.io.ByteStreams;
import com.google.common.primitives.UnsignedBytes;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
/** Provides utilities to decode/encode a String packed in an arsc resource file. */
public final class ResourceString {
/** Type of {@link ResourceString} to encode / decode. */
public enum Type {
UTF8(UTF_8),
CESU8(Charset.forName("CESU8")),
UTF16(UTF_16LE);
private final Charset charset;
Type(Charset charset) {
this.charset = charset;
}
public Charset charset() {
return charset;
}
public CharsetDecoder decoder() {
return charset.newDecoder();
}
}
private ResourceString() {} // Private constructor
/**
* Given a buffer and an offset into the buffer, returns a String. The {@code offset} is the
* 0-based byte offset from the start of the buffer where the string resides. This should be the
* location in memory where the string's character count, followed by its byte count, and then
* followed by the actual string is located.
*
* Here's an example UTF-8-encoded string of ab©:
*
* 03 04 61 62 C2 A9 00
* ^ Offset should be here
*
*
* @param buffer The buffer containing the string to decode.
* @param offset Offset into the buffer where the string resides.
* @param type The encoding type that the {@link ResourceString} is encoded in.
* @return The decoded string.
*/
public static String decodeString(ByteBuffer buffer, int offset, Type type) {
int length;
int characterCount = decodeLength(buffer, offset, type);
offset += computeLengthOffset(characterCount, type);
// UTF-8 strings have 2 lengths: the number of characters, and then the encoding length.
// UTF-16 strings, however, only have 1 length: the number of characters.
if (type == Type.UTF8) {
length = decodeLength(buffer, offset, type);
offset += computeLengthOffset(length, type);
} else {
length = characterCount * 2;
}
ByteBuffer stringBuffer = ByteBuffer.wrap(buffer.array(), offset, length);
// Use normal UTF-8 and UTF-16 decoder to decode string
try {
return type.decoder().decode(stringBuffer).toString();
} catch (CharacterCodingException e) {
if (type == Type.UTF16) {
return null;
}
}
stringBuffer = ByteBuffer.wrap(buffer.array(), offset, length);
// Use CESU8 decoder to try decode failed UTF-8 string, especially modified UTF-8.
// See
// https://source.android.com/devices/tech/dalvik/dex-format?hl=hr-HR&skip_cache=true#mutf-8.
try {
return Type.CESU8.decoder().decode(stringBuffer).toString();
} catch (CharacterCodingException e) {
return null;
}
}
/**
* Encodes a string in either UTF-8 or UTF-16 and returns the bytes of the encoded string.
* Strings are prefixed by 2 values. The first is the number of characters in the string.
* The second is the encoding length (number of bytes in the string).
*
* Here's an example UTF-8-encoded string of ab©:
*
03 04 61 62 C2 A9 00
*
* @param str The string to be encoded.
* @param type The encoding type that the {@link ResourceString} should be encoded in.
* @return The encoded string.
*/
public static byte[] encodeString(String str, Type type) {
byte[] bytes = str.getBytes(type.charset());
// The extra 5 bytes is for metadata (character count + byte count) and the NULL terminator.
ByteArrayDataOutput output = ByteStreams.newDataOutput(bytes.length + 5);
encodeLength(output, str.length(), type);
if (type == Type.UTF8) { // Only UTF-8 strings have the encoding length.
encodeLength(output, bytes.length, type);
}
output.write(bytes);
// NULL-terminate the string
if (type == Type.UTF8) {
output.write(0);
} else {
output.writeShort(0);
}
return output.toByteArray();
}
/**
* Builds a string from a null-terminated char data.
*/
public static String buildString(char[] data) {
int count = 0;
for (count=0; count < data.length; count++) {
if (data[count] == 0) {
break;
}
}
return new String(data, 0, count);
}
private static void encodeLength(ByteArrayDataOutput output, int length, Type type) {
if (length < 0) {
output.write(0);
return;
}
if (type == Type.UTF8) {
if (length > 0x7F) {
output.write(((length & 0x7F00) >> 8) | 0x80);
}
output.write(length & 0xFF);
} else { // UTF-16
// TODO(acornwall): Replace output with a little-endian output.
if (length > 0x7FFF) {
int highBytes = ((length & 0x7FFF0000) >> 16) | 0x8000;
output.write(highBytes & 0xFF);
output.write((highBytes & 0xFF00) >> 8);
}
int lowBytes = length & 0xFFFF;
output.write(lowBytes & 0xFF);
output.write((lowBytes & 0xFF00) >> 8);
}
}
static int computeLengthOffset(int length, Type type) {
return (type == Type.UTF8 ? 1 : 2) * (length >= (type == Type.UTF8 ? 0x80 : 0x8000) ? 2 : 1);
}
static int decodeLength(ByteBuffer buffer, int offset, Type type) {
return type == Type.UTF8 ? decodeLengthUTF8(buffer, offset) : decodeLengthUTF16(buffer, offset);
}
static int decodeLengthUTF8(ByteBuffer buffer, int offset) {
// UTF-8 strings use a clever variant of the 7-bit integer for packing the string length.
// If the first byte is >= 0x80, then a second byte follows. For these values, the length
// is WORD-length in big-endian & 0x7FFF.
int length = UnsignedBytes.toInt(buffer.get(offset));
if ((length & 0x80) != 0) {
length = ((length & 0x7F) << 8) | UnsignedBytes.toInt(buffer.get(offset + 1));
}
return length;
}
static int decodeLengthUTF16(ByteBuffer buffer, int offset) {
// UTF-16 strings use a clever variant of the 7-bit integer for packing the string length.
// If the first word is >= 0x8000, then a second word follows. For these values, the length
// is DWORD-length in big-endian & 0x7FFFFFFF.
int length = (buffer.getShort(offset) & 0xFFFF);
if ((length & 0x8000) != 0) {
length = ((length & 0x7FFF) << 16) | (buffer.getShort(offset + 2) & 0xFFFF);
}
return length;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy