All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cdc.util.pstrings.PackedString Maven / Gradle / Ivy

There is a newer version: 0.6.0
Show newest version
package cdc.util.pstrings;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharacterCodingException;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.util.Map;
import java.util.WeakHashMap;

import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;

/**
 * Base abstract class of packed strings.
 *
 * @author Damien Carbonne
 *
 */
public abstract class PackedString {
    private static final Logger LOGGER = LogManager.getLogger(PackedString.class);
    private static final CharsetEncoder ENCODER = StandardCharsets.UTF_8.newEncoder();
    private static final Map POOL = new WeakHashMap<>();

    /**
     * Retrieve the index-th byte as an int.
     * 

* Conversion is done by setting low byte of the resulting int to the extracted value, * and setting other bytes to 0. * * @param bytes The bytes array. * @param index The byte index. * @return The inex-th byte. */ protected static int get(byte[] bytes, int index) { return index < bytes.length ? bytes[index] & 0xFF : 0; } /** * @return The bytes array of encoded chars. */ protected abstract byte[] toBytes(); private static String toString(byte[] bytes) { // Skip zero bytes at the tail of the string int last = bytes.length - 1; while (last > 0 && bytes[last] == 0) { --last; } return new String(bytes, 0, last + 1, StandardCharsets.UTF_8); } PackedString intern() { final PackedString value = POOL.get(this); if (value == null) { POOL.put(this, this); return this; } else { return value; } } /** * Converts a (valid) object to an intern version. * * @param o The object. Must be null or a String or a PackedString. * @return The interned version of {@code o}. * @throws IllegalArgumentException When {@code o} is neither null, nor a String nor a PackedString. */ public static synchronized Object intern(Object o) { if (o == null) { return null; } else if (o instanceof PackedString) { return ((PackedString) o).intern(); } else if (o instanceof String) { return ((String) o).intern(); } else { throw new IllegalArgumentException("Unexpected object class " + o.getClass().getCanonicalName()); } } /** * Converts a String to a PackedString, or a String. *

    *
  • If {@code s} is null returns null. *
  • If {@code s} is empty, returns "". *
  • Otherwise, tries to encode s as an UTF-8 byte array.
    * If this succeeds, stores that array in the more appropriate class.
    * If this fails (encoding issue), returns {@code s}. *
* Normally, if string is legal, UTF-8 encoding should always succeed. * * @param s The String to convert. * @return The conversion of {@code s} to a packed equivalent. */ public static Object convert(String s) { if (s == null) { return null; } else if (s.length() == 0) { return ""; } final CharBuffer cbuffer = CharBuffer.wrap(s); try { final ByteBuffer bbuffer = ENCODER.encode(cbuffer); final byte[] bytes = bbuffer.array(); final int length = bbuffer.limit(); if (/* s.indexOf('\0') != -1 || */ length > 64) { return new PackedStringN(bytes); } if (length <= 8) { // Always intern small strings return new PackedString8(bytes).intern(); } else if (length <= 16) { return new PackedString16(bytes); } else if (length <= 24) { return new PackedString24(bytes); } else if (length <= 32) { return new PackedString32(bytes); } else if (length <= 40) { return new PackedString40(bytes); } else if (length <= 48) { return new PackedString48(bytes); } else if (length <= 56) { return new PackedString56(bytes); } else { assert length <= 64; return new PackedString64(bytes); } } catch (final CharacterCodingException e) { // Some chars don't fit encoding LOGGER.error("Failed to encode '" + s + "'", e); return s; } } public static Object convert(String s, boolean intern) { final Object o = convert(s); return intern ? intern(o) : o; } @Override public final String toString() { return toString(toBytes()); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy