com.google.gwtorm.nosql.IndexKeyBuilder Maven / Gradle / Ivy
// Copyright 2010 Google Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.gwtorm.nosql;
import java.io.ByteArrayOutputStream;
import java.io.UnsupportedEncodingException;
/**
* Encoder support for {@link IndexFunction} computed strings.
*
* This class provides a string that may contain multiple values, using
* delimiters between fields and big-endian encoded numerics. Sorting the
* resulting strings using unsigned byte orderings produces a stable sorting.
*
* The encoding used by this class relies on having 258 tokens. To get the extra
* 2 tokens within a 256 byte range, escapes are used according to the following
* simple table:
*
* - delimiter = \x00\x01
*
- byte \x00 = \x00\xff
*
- byte \xff = \xff\x00
*
- infinity = \xff\xff
*
*
* Integers are encoded as variable length big-endian values, skipping leading
* zero bytes, prefixed by the number of bytes used to encode them. Therefore 0
* is encoded as "\x00", and 256 is encoded as "\x02\x01\x00". Negative values
* are encoded in their twos complement encoding and therefore sort after the
* maximum positive value.
*
* Strings and byte arrays supplied by the caller have their \x00 and \xff
* values escaped according to the table above, but are otherwise written as-is
* without a length prefix.
*
* Callers are responsible for inserting {@link #delimiter()} markers at the
* appropriate positions in the sequence.
*/
public class IndexKeyBuilder {
private final ByteArrayOutputStream buf = new ByteArrayOutputStream();
/**
* Add a delimiter marker to the string.
*/
public void delimiter() {
buf.write(0x00);
buf.write(0x01);
}
/**
* Add the special infinity symbol to the string.
*
* The infinity symbol sorts after all other values in the same position.
*/
public void infinity() {
buf.write(0xff);
buf.write(0xff);
}
/**
* Add \0 to the string.
*
* \0 can be used during searches to enforce greater then or less then clauses
* in a query.
*/
public void nul() {
buf.write(0x00);
}
/**
* Add a raw sequence of bytes.
*
* The bytes 0x00 and 0xff are escaped by this method according to the
* encoding table described in the class documentation.
*
* @param bin array to copy from.
* @param pos first index to copy.
* @param cnt number of bytes to copy.
*/
public void add(byte[] bin, int pos, int cnt) {
while (0 < cnt--) {
byte b = bin[pos++];
if (b == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (b == -1) {
buf.write(0xff);
buf.write(0x00);
} else {
buf.write(b);
}
}
}
public void desc(byte[] bin, int pos, int cnt) {
while (0 < cnt--) {
int b = 0xff - (bin[pos++] & 0xff);
if (b == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (b == 0xff) {
buf.write(0xff);
buf.write(0x00);
} else {
buf.write(b);
}
}
}
/**
* Add a raw sequence of bytes.
*
* The bytes 0x00 and 0xff are escaped by this method according to the
* encoding table described in the class documentation.
*
* @param bin the complete array to copy.
*/
public void add(byte[] bin) {
add(bin, 0, bin.length);
}
public void desc(byte[] bin) {
desc(bin, 0, bin.length);
}
/**
* Encode a string into UTF-8 and append as a sequence of bytes.
*
* @param str the string to encode and append.
*/
public void add(String str) {
try {
add(str.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("JVM does not support UTF-8", e);
}
}
public void desc(String str) {
try {
desc(str.getBytes("UTF-8"));
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("JVM does not support UTF-8", e);
}
}
/**
* Add a single character as though it were part of a UTF-8 string.
*
* @param ch the character to encode and append.
*/
public void add(char ch) {
if (ch == 0x00) {
buf.write(0x00);
buf.write(0xff);
} else if (ch <= 255) {
buf.write(ch);
} else {
add(Character.toString(ch));
}
}
public void desc(char ch) {
desc(Character.toString(ch));
}
/**
* Add an integer value as a big-endian variable length integer.
*
* @param val the value to add.
*/
public void add(long val) {
final byte[] t = new byte[9];
int i = t.length;
while (val != 0) {
t[--i] = (byte) (val & 0xff);
val >>>= 8;
}
t[i - 1] = (byte) (t.length - i);
buf.write(t, i - 1, t.length - i + 1);
}
public void desc(long val) {
add(~val);
}
/**
* Add a byte array as-is, without escaping.
*
* This should only be used the byte array came from a prior index key and the
* caller is trying to create a new key with this key embedded at the end.
*
* @param bin the binary to append as-is, without further escaping.
*/
public void addRaw(byte[] bin) {
buf.write(bin, 0, bin.length);
}
/**
* Obtain a copy of the internal storage array.
*
* @return the current state of this, converted into a flat byte array.
*/
public byte[] toByteArray() {
return buf.toByteArray();
}
}