cdc.util.pstrings.PackedString Maven / Gradle / Ivy
package cdc.util.pstrings;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.WeakHashMap;
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
/**
* Base abstract class of packed strings.
*
* @author Damien Carbonne
*
*/
public abstract class PackedString {
private static final Logger LOGGER = LogManager.getLogger(PackedString.class);
private static final CharsetEncoder ENCODER = StandardCharsets.UTF_8.newEncoder();
private static final Map POOL = new WeakHashMap<>();
/**
* Retrieve the index-th byte as an int.
*
* Conversion is done by setting low byte of the resulting int to the extracted value,
* and setting other bytes to 0.
*
* @param bytes The bytes array.
* @param index The byte index.
* @return The inex-th byte.
*/
protected static int get(byte[] bytes,
int index) {
return index < bytes.length ? bytes[index] & 0xFF : 0;
}
/**
* @return The bytes array of encoded chars.
*/
protected abstract byte[] toBytes();
private static String toString(byte[] bytes) {
// Skip zero bytes at the tail of the string
int last = bytes.length - 1;
while (last > 0 && bytes[last] == 0) {
--last;
}
return new String(bytes, 0, last + 1, StandardCharsets.UTF_8);
}
PackedString intern() {
final PackedString value = POOL.get(this);
if (value == null) {
POOL.put(this, this);
return this;
} else {
return value;
}
}
/**
* Converts a (valid) object to an intern version.
*
* @param o The object. Must be null or a String or a PackedString.
* @return The interned version of {@code o}.
* @throws IllegalArgumentException When {@code o} is neither null, nor a String nor a PackedString.
*/
public static synchronized Object intern(Object o) {
if (o == null) {
return null;
} else if (o instanceof PackedString) {
return ((PackedString) o).intern();
} else if (o instanceof String) {
return ((String) o).intern();
} else {
throw new IllegalArgumentException("Unexpected object class " + o.getClass().getCanonicalName());
}
}
/**
* Converts a String to a PackedString, or a String.
*
* - If {@code s} is null returns null.
*
- If {@code s} is empty, returns "".
*
- Otherwise, tries to encode s as an UTF-8 byte array.
* If this succeeds, stores that array in the more appropriate class.
* If this fails (encoding issue), returns {@code s}.
*
* Normally, if string is legal, UTF-8 encoding should always succeed.
*
* @param s The String to convert.
* @return The conversion of {@code s} to a packed equivalent.
*/
public static Object convert(String s) {
if (s == null) {
return null;
} else if (s.length() == 0) {
return "";
}
final CharBuffer cbuffer = CharBuffer.wrap(s);
try {
final ByteBuffer bbuffer = ENCODER.encode(cbuffer);
final byte[] bytes = bbuffer.array();
final int length = bbuffer.limit();
if (/* s.indexOf('\0') != -1 || */ length > 64) {
return new PackedStringN(bytes);
}
if (length <= 8) {
// Always intern small strings
return new PackedString8(bytes).intern();
} else if (length <= 16) {
return new PackedString16(bytes);
} else if (length <= 24) {
return new PackedString24(bytes);
} else if (length <= 32) {
return new PackedString32(bytes);
} else if (length <= 40) {
return new PackedString40(bytes);
} else if (length <= 48) {
return new PackedString48(bytes);
} else if (length <= 56) {
return new PackedString56(bytes);
} else {
assert length <= 64;
return new PackedString64(bytes);
}
} catch (final CharacterCodingException e) {
// Some chars don't fit encoding
LOGGER.error("Failed to encode '" + s + "'", e);
return s;
}
}
public static Object convert(String s,
boolean intern) {
final Object o = convert(s);
return intern ? intern(o) : o;
}
@Override
public final String toString() {
return toString(toBytes());
}
}