All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.permazen.encoding.Encoding Maven / Gradle / Ivy

The newest version!

/*
 * Copyright (C) 2015 Archie L. Cobbs. All rights reserved.
 */

package io.permazen.encoding;

import com.google.common.base.Preconditions;
import com.google.common.reflect.TypeToken;

import io.permazen.kv.KeyRange;
import io.permazen.util.BoundType;
import io.permazen.util.Bounds;
import io.permazen.util.ByteReader;
import io.permazen.util.ByteUtil;
import io.permazen.util.ByteWriter;
import io.permazen.util.NaturalSortAware;
import io.permazen.util.XMLUtil;

import java.io.Serializable;
import java.util.Comparator;
import java.util.OptionalInt;

/**
 * A range of values of some Java type, along with string and binary encodings and a total ordering of those values.
 *
 * 

* {@link Encoding}'s are used to map between instances of some Java type and the {@code byte[]} encodings of those instances * stored in a Permazen database. The {@code byte[]} encoding defines the database sort order (via unsigned lexicographical * ordering), and this same ordering is reflected in Java via {@link #compare compare()}. * *

* An {@link Encoding} also defines a mapping between Java instances and {@link String} values. * *

* Instances may have an associated {@link EncodingId}, which is a globally unique URN-style identifier that allows the encoding * to be referred to by name (e.g., in an {@link EncodingRegistry}). Encodings with no {@link EncodingId} are called anonymous. * *

* {@link Encoding}s must satsify these requirements: *

    *
  • Instances have an associated Java type which can represent any of the encoding's supported values (see * {@link #getTypeToken}). However, an encoding is not required to support every value of the Java type. * For example, there could be an encoding of type {@link Integer} that only supports non-negative values. *
  • Instances totally order their supported Java values (see {@link #compare compare()}). If the associated Java type itself * implements {@link Comparable}, then the two orderings do not necessarily have to agree, but they should if possible. * In that case, {@link #sortsNaturally sortsNaturally()} should return true. *
  • {@code null} may or may not be a supported value (see {@link #supportsNull}). If so, it must be fully supported value * just like any other; for example, it must be handled by {@link #compare compare()} (typically null values sort last). * Note that this is an additional requirement beyond what {@link Comparator} strictly requires. *
  • There is a {@linkplain #getDefaultValue default value}. For types that support null, the default value must be null, * and for types that don't support null, obviously the default value must not be null; however, an exception can be made * for encodings that don't support null but also don't need default values, e.g., anonymous encodings that are always * wrapped within a {@link NullSafeEncoding}; for such encodings, {@link #getDefaultValue} should throw an * {@link UnsupportedOperationException}. *
  • All non-null values can be encoded/decoded into a {@link String} without losing information (see * {@link #toString(Object) toString()} and {@link #fromString fromString()}). These strings must contain characters * that are valid in an XML document only. *
  • All values, including null if supported, can be encoded/decoded into a self-delimiting binary string (i.e., {@code byte[]} * array) without losing information (see {@link #read read()} and {@link #write write()}). Moreover, these binary strings, * when sorted lexicographically as unsigned values, sort consistently with {@link #compare compare()}. *
  • An {@link Encoding}'s string and binary encodings and sort ordering is guaranteed to never change, unless the * {@link EncodingId} is also changed, which effectively defines a new encoding (in such scenarios, automatic schema * migration is possible by adding the appropriate logic to {@link #convert convert()}). *
* *

* Two {@link Encoding} instances should be equal according to {@link #equals equals()} only when they behave identically * with respect to all of the above. * *

* Instances must be stateless (and therefore also thread safe). * * @param The associated Java type * @see EncodingRegistry */ public interface Encoding extends Comparator, NaturalSortAware, Serializable { /** * The maximum number of supported array dimensions ({@value #MAX_ARRAY_DIMENSIONS}). */ int MAX_ARRAY_DIMENSIONS = 255; /** * Get the globally unique encoding ID that identifies this encoding, if any. * *

* Once associated with a specific encoding, an encoding ID must never be changed or reused. If an {@link Encoding}'s * behavior changes in any way, then its encoding ID must also change. This applies only to the encoding itself, * and not the {@linkplain #getTypeToken associated Java type}. For example, an {@link Encoding}'s associated Java type * can change over time, e.g., if its class or package name changes. * * @return this encoding's unique ID, or null if this encoding is anonymous */ EncodingId getEncodingId(); /** * Get the Java type corresponding to this encoding's values. * * @return the Java type used to represent this encoding's values */ TypeToken getTypeToken(); /** * Read a value from the given input. * * @param reader byte input * @return decoded value (possibly null) * @throws IllegalArgumentException if invalid input is encountered * @throws IndexOutOfBoundsException if input is truncated * @throws IllegalArgumentException if {@code reader} is null */ T read(ByteReader reader); /** * Write a value to the given output. * * @param writer byte output * @param value value to write (possibly null) * @throws IllegalArgumentException if {@code value} is null and this encoding does not support null * @throws IllegalArgumentException if {@code writer} is null */ void write(ByteWriter writer, T value); /** * Get the default value for this encoding encoded as a {@code byte[]} array. * *

* The implementation in {@link Encoding} returns the binary encoding of the value returned by * {@link #getDefaultValue}. * * @return encoded default value * @throws UnsupportedOperationException if this encoding does not have a default value */ default byte[] getDefaultValueBytes() { final ByteWriter writer = new ByteWriter(); try { this.write(writer, this.getDefaultValue()); } catch (IllegalArgumentException e) { throw new UnsupportedOperationException(String.format("%s does not have a default value", this)); } return writer.getBytes(); } /** * Get the default value for this encoding. * *

* If this encoding {@linkplain #supportsNull supports null values}, then this must return null. * * @return default value * @throws UnsupportedOperationException if this encoding does not have a default value */ T getDefaultValue(); /** * Read and discard a {@code byte[]} encoded value from the given input. * *

* If the value skipped over is invalid, this method may, but is not required to, throw {@link IllegalArgumentException}. * *

* If the value skipped over is truncated, this method must throw {@link IndexOutOfBoundsException}. * * @param reader byte input * @throws IllegalArgumentException if invalid input is encountered * @throws IndexOutOfBoundsException if input is truncated * @throws IllegalArgumentException if {@code reader} is null */ void skip(ByteReader reader); /** * Encode a non-null value as a {@link String} for later decoding by {@link #fromString fromString()}. * *

* Each of the characters in the returned {@link String}, when decoded as 32-bit Unicode codepoints, * must contain only valid XML characters (see {@link XMLUtil#isValidChar}). * * @param value actual value, never null * @return string encoding of {@code value} acceptable to {@link #fromString fromString()} * @throws IllegalArgumentException if {@code value} is null * @see The XML 1.0 Specification */ String toString(T value); /** * Parse a non-null value previously encoded by {@link #toString(Object) toString(T)}. * * @param string non-null value previously encoded as a {@link String} by {@link #toString(Object) toString(T)} * @return actual value * @throws IllegalArgumentException if the input is invalid * @throws IllegalArgumentException if {@code string} is null */ T fromString(String string); /** * Attempt to convert a value from the given {@link Encoding} into a value of this {@link Encoding}. * *

* For a non-null {@code value}, the implementation in {@link Encoding} first checks whether the {@code value} is already * a valid value for this encoding; if so, the value is returned. Otherwise, it invokes * {@code encoding.}{@link #toString(Object) toString(value)} to convert {@code value} into a {@link String}, and then * attempts to parse that string via {@code this.}{@link #fromString fromString()}; if the parse fails, * an {@link IllegalArgumentException} is thrown. Note this means that any value will convert successfully * to a {@link String}, as long as it doesn't contain an invalid escape sequence (see {@link StringEncoding#toString}). * *

* If {@code value} is null, the implementation in {@link Encoding} returns null, unless this encoding does not support * null values, in which case an {@link IllegalArgumentException} is thrown. * *

* Permazen's built-in encodings include the following conversions: *

    *
  • Non-boolean Primitive types: *
      *
    • Convert from other non-boolean primitive types as if by the corresponding Java cast *
    • Convert from boolean by converting to zero (if false) or one (if true) *
    *
  • Boolean: converts from other primitive types as if by {@code value != 0} *
  • A {@code char[]} array and a {@link String} are convertible to each other *
  • A {@code char} and a {@link String} of length one are convertible to each other (other {@link String}s are not) *
  • Arrays: converted by converting each array element individually (if possible) *
* * @param encoding the {@link Encoding} of {@code value} * @param value the value to convert * @param source encoding * @return {@code value} converted to this instance's type * @throws IllegalArgumentException if the conversion fails */ default T convert(Encoding encoding, S value) { Preconditions.checkArgument(encoding != null, "null encoding"); try { return this.validate(value); } catch (IllegalArgumentException e) { if (value == null) throw e; } return this.fromString(encoding.toString(value)); } /** * Verify the given object is a valid instance of this {@link Encoding}'s Java type and cast it to that type. * *

* Note that this method must throw {@link IllegalArgumentException}, not {@link ClassCastException} * or {@link NullPointerException}, if {@code obj} does not have the correct type, or is an unsupported value * - including null if null is not supported. * *

* This method is allowed to perform widening conversions of the object that lose no information, e.g., * from {@link Integer} to {@link Long}. * *

* The implementation in {@link Encoding} first verifies the value is not null if this instance * {@linkplain #supportsNull does not allow null values}, and then attempts to cast the value using * this instance's raw Java type. Subclasses should override this method to implement any other restrictions. * * @param obj object to validate * @return {@code obj} cast to this encoding's type * @throws IllegalArgumentException if {@code obj} in not of type T * @throws IllegalArgumentException if {@code obj} is null and this encoding does not support null values * @throws IllegalArgumentException if {@code obj} is in any other way not supported by this {@link Encoding} */ @SuppressWarnings("unchecked") default T validate(Object obj) { Preconditions.checkArgument(obj != null || this.supportsNull(), "invalid null value"); try { return (T)this.getTypeToken().getRawType().cast(obj); } catch (ClassCastException e) { throw new IllegalArgumentException(String.format( "%s does not support values of type %s", this, obj.getClass().getName())); } } /** * Order two values. * *

* This method must provide a total ordering of all supported Java values that is consistent with the database ordering, * i.e., the unsigned lexicographical ordering of the corresponding {@code byte[]} encoded values. * *

* If null is a supported Java value, then the this method must accept null parameters without throwing an exception * (note, this is a stronger requirement than the {@link Comparator} interface normally requires). * *

* Note: by convention, null values usually sort last. * * @throws IllegalArgumentException if {@code value1} or {@code value2} is null and this encoding does not support null */ @Override int compare(T value1, T value2); /** * Determine whether this encoding supports null values. * * @return true if null is a valid value, otherwise false */ boolean supportsNull(); /** * Determine whether any of this encoding's encoded values start with a {@code 0x00} byte. * Certain optimizations are possible when this is not the case. It is safe for this method to always return true. * *

* Note: changing the result of this method may result in an incompatible encoding if this encoding * is wrapped in another class. * * @return true if an encoded value starting with {@code 0x00} exists */ boolean hasPrefix0x00(); /** * Determine whether any of this encoding's encoded values start with a {@code 0xff} byte. * Certain optimizations are possible when this is not the case. It is safe for this method to always return true. * *

* Note: changing the result of this method may result in an incompatible encoding if this encoding * is wrapped in another class. * * @return true if an encoded value starting with {@code 0xff} exists */ boolean hasPrefix0xff(); /** * Get the fixed width of this encoding, if any. * *

* Some encodings encode every value into the same number of bytes. For such encodings, this method returns * that number. For variable width encodings, this method must return empty. * * @return the number of bytes of every encoded value, or empty if the encoding length varies */ OptionalInt getFixedWidth(); /** * Convenience method that both validates and encodes a value. * *

* Equivalent to: *

     * this.write(writer, this.validate(obj))
     * 
* * @param writer byte output * @param obj object to validate * @throws IllegalArgumentException if {@code obj} in not of type T * @throws IllegalArgumentException if {@code obj} is null and this encoding does not support null values * @throws IllegalArgumentException if {@code obj} is in any other way not supported by this {@link Encoding} * @throws IllegalArgumentException if {@code writer} is null */ default void validateAndWrite(ByteWriter writer, Object obj) { this.write(writer, this.validate(obj)); } /** * Calculate the {@link KeyRange} that includes exactly those encoded values that lie within the given bounds. * * @param bounds bounds to impose * @return {@link KeyRange} corresponding to {@code bounds} * @throws IllegalArgumentException if {@code bounds} is null */ default KeyRange getKeyRange(Bounds bounds) { // Sanity check Preconditions.checkArgument(bounds != null); // Get inclusive byte[] lower bound byte[] lowerBound = ByteUtil.EMPTY; final BoundType lowerBoundType = bounds.getLowerBoundType(); if (!BoundType.NONE.equals(lowerBoundType)) { final ByteWriter writer = new ByteWriter(); try { this.write(writer, bounds.getLowerBound()); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(String.format("invalid lower bound %s for %s", bounds.getLowerBound(), this), e); } lowerBound = writer.getBytes(); if (!lowerBoundType.isInclusive()) lowerBound = ByteUtil.getNextKey(lowerBound); } // Get exclusive byte[] upper bound byte[] upperBound = null; final BoundType upperBoundType = bounds.getUpperBoundType(); if (!BoundType.NONE.equals(upperBoundType)) { final ByteWriter writer = new ByteWriter(); try { this.write(writer, bounds.getUpperBound()); } catch (IllegalArgumentException e) { throw new IllegalArgumentException(String.format("invalid upper bound %s for %s", bounds.getUpperBound(), this), e); } upperBound = writer.getBytes(); if (upperBoundType.isInclusive()) upperBound = ByteUtil.getNextKey(upperBound); } // Done return new KeyRange(lowerBound, upperBound); } /** * Encode the given value into a {@code byte[]} array. * *

* The implementation in {@link Encoding} creates a temporary {@link ByteWriter} * and then delegates to {@link #write write()}. * * @param value value to encode, possibly null * @return encoded value * @throws IllegalArgumentException if {@code obj} is invalid */ default byte[] encode(T value) { final ByteWriter writer = new ByteWriter(); this.write(writer, value); return writer.getBytes(); } /** * Decode a valid from the given {@code byte[]} array. * *

* The implementation in {@link Encoding} creates a temporary {@link ByteReader} * and then delegates to {@link #read read()}. * * @param bytes encoded value * @return decoded value, possibly null * @throws IllegalArgumentException if {@code bytes} is null, invalid, or contains trailing garbage */ default T decode(byte[] bytes) { final ByteReader reader = new ByteReader(bytes); final T value = this.read(reader); if (reader.remain() > 0) throw new IllegalArgumentException("trailing garbage"); return value; } }