org.robolectric.res.android.ResourceString Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of resources Show documentation
Show all versions of resources Show documentation
An alternative Android testing framework.
/*
* Copyright 2016 Google Inc. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.robolectric.res.android;
import static java.nio.charset.StandardCharsets.UTF_16LE;
import static java.nio.charset.StandardCharsets.UTF_8;
import com.google.common.io.ByteArrayDataOutput;
import com.google.common.io.ByteStreams;
import com.google.common.primitives.UnsignedBytes;
import java.nio.ByteBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
/** Provides utilities to decode/encode a String packed in an arsc resource file. */
public final class ResourceString {
/** Type of {@link ResourceString} to encode / decode. */
public enum Type {
UTF8(UTF_8),
CESU8(Charset.forName("CESU8")),
UTF16(UTF_16LE);
private final Charset charset;
Type(Charset charset) {
this.charset = charset;
}
public Charset charset() {
return charset;
}
public CharsetDecoder decoder() {
return charset.newDecoder();
}
}
private ResourceString() {} // Private constructor
/**
* Given a buffer and an offset into the buffer, returns a String. The {@code offset} is the
* 0-based byte offset from the start of the buffer where the string resides. This should be the
* location in memory where the string's character count, followed by its byte count, and then
* followed by the actual string is located.
*
* Here's an example UTF-8-encoded string of ab©:
*
*
* 03 04 61 62 C2 A9 00
* ^ Offset should be here
*
*
* @param buffer The buffer containing the string to decode.
* @param offset Offset into the buffer where the string resides.
* @param type The encoding type that the {@link ResourceString} is encoded in.
* @return The decoded string.
*/
@SuppressWarnings("ByteBufferBackingArray")
public static String decodeString(ByteBuffer buffer, int offset, Type type) {
int length;
int characterCount = decodeLength(buffer, offset, type);
offset += computeLengthOffset(characterCount, type);
// UTF-8 strings have 2 lengths: the number of characters, and then the encoding length.
// UTF-16 strings, however, only have 1 length: the number of characters.
if (type == Type.UTF8) {
length = decodeLength(buffer, offset, type);
offset += computeLengthOffset(length, type);
} else {
length = characterCount * 2;
}
ByteBuffer stringBuffer = ByteBuffer.wrap(buffer.array(), offset, length);
// Use normal UTF-8 and UTF-16 decoder to decode string
try {
return type.decoder().decode(stringBuffer).toString();
} catch (CharacterCodingException e) {
if (type == Type.UTF16) {
return null;
}
}
stringBuffer = ByteBuffer.wrap(buffer.array(), offset, length);
// Use CESU8 decoder to try decode failed UTF-8 string, especially modified UTF-8.
// See
// https://source.android.com/devices/tech/dalvik/dex-format?hl=hr-HR&skip_cache=true#mutf-8.
try {
return Type.CESU8.decoder().decode(stringBuffer).toString();
} catch (CharacterCodingException e) {
return null;
}
}
/**
* Encodes a string in either UTF-8 or UTF-16 and returns the bytes of the encoded string. Strings
* are prefixed by 2 values. The first is the number of characters in the string. The second is
* the encoding length (number of bytes in the string).
*
* Here's an example UTF-8-encoded string of ab©:
*
*
03 04 61 62 C2 A9 00
*
* @param str The string to be encoded.
* @param type The encoding type that the {@link ResourceString} should be encoded in.
* @return The encoded string.
*/
public static byte[] encodeString(String str, Type type) {
byte[] bytes = str.getBytes(type.charset());
// The extra 5 bytes is for metadata (character count + byte count) and the NULL terminator.
ByteArrayDataOutput output = ByteStreams.newDataOutput(bytes.length + 5);
encodeLength(output, str.length(), type);
if (type == Type.UTF8) { // Only UTF-8 strings have the encoding length.
encodeLength(output, bytes.length, type);
}
output.write(bytes);
// NULL-terminate the string
if (type == Type.UTF8) {
output.write(0);
} else {
output.writeShort(0);
}
return output.toByteArray();
}
/**
* Builds a string from a null-terminated char data.
*/
public static String buildString(char[] data) {
int count = 0;
for (count=0; count < data.length; count++) {
if (data[count] == 0) {
break;
}
}
return new String(data, 0, count);
}
private static void encodeLength(ByteArrayDataOutput output, int length, Type type) {
if (length < 0) {
output.write(0);
return;
}
if (type == Type.UTF8) {
if (length > 0x7F) {
output.write(((length & 0x7F00) >> 8) | 0x80);
}
output.write(length & 0xFF);
} else { // UTF-16
// TODO(acornwall): Replace output with a little-endian output.
if (length > 0x7FFF) {
int highBytes = ((length & 0x7FFF0000) >> 16) | 0x8000;
output.write(highBytes & 0xFF);
output.write((highBytes & 0xFF00) >> 8);
}
int lowBytes = length & 0xFFFF;
output.write(lowBytes & 0xFF);
output.write((lowBytes & 0xFF00) >> 8);
}
}
static int computeLengthOffset(int length, Type type) {
return (type == Type.UTF8 ? 1 : 2) * (length >= (type == Type.UTF8 ? 0x80 : 0x8000) ? 2 : 1);
}
static int decodeLength(ByteBuffer buffer, int offset, Type type) {
return type == Type.UTF8 ? decodeLengthUTF8(buffer, offset) : decodeLengthUTF16(buffer, offset);
}
static int decodeLengthUTF8(ByteBuffer buffer, int offset) {
// UTF-8 strings use a clever variant of the 7-bit integer for packing the string length.
// If the first byte is >= 0x80, then a second byte follows. For these values, the length
// is WORD-length in big-endian & 0x7FFF.
int length = UnsignedBytes.toInt(buffer.get(offset));
if ((length & 0x80) != 0) {
length = ((length & 0x7F) << 8) | UnsignedBytes.toInt(buffer.get(offset + 1));
}
return length;
}
static int decodeLengthUTF16(ByteBuffer buffer, int offset) {
// UTF-16 strings use a clever variant of the 7-bit integer for packing the string length.
// If the first word is >= 0x8000, then a second word follows. For these values, the length
// is DWORD-length in big-endian & 0x7FFFFFFF.
int length = (buffer.getShort(offset) & 0xFFFF);
if ((length & 0x8000) != 0) {
length = ((length & 0x7FFF) << 16) | (buffer.getShort(offset + 2) & 0xFFFF);
}
return length;
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy