![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.btree.raba.codec.FrontCodedRabaCoder Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
package com.bigdata.btree.raba.codec;
import it.unimi.dsi.fastutil.bytes.ByteArrayFrontCodedList;
import it.unimi.dsi.fastutil.bytes.custom.CustomByteArrayFrontCodedList;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.OutputStream;
import java.util.Iterator;
import org.apache.log4j.Logger;
import com.bigdata.btree.raba.IRaba;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.util.Bytes;
/**
* Class provides (de-)compression for logical byte[][]s based on front coding.
* The data MUST be ordered. null
values are not allowed.
*
* @author Bryan Thompson
*/
public class FrontCodedRabaCoder implements IRabaCoder, Externalizable {
/**
*
*/
private static final long serialVersionUID = 4943035649252818747L;
protected static final Logger log = Logger
.getLogger(FrontCodedRabaCoder.class);
private int ratio;
@Override
public String toString() {
return super.toString() + "{ratio=" + ratio + "}";
}
/**
* A pre-parameterized version of the {@link FrontCodedRabaCoder} which is
* used as the default {@link IRabaCoder} for B+Tree keys for both nodes and
* leaves.
*
* @author Bryan
* Thompson
*/
public static class DefaultFrontCodedRabaCoder extends FrontCodedRabaCoder {
/**
*
*/
private static final long serialVersionUID = 7300378339686013560L;
public static final transient DefaultFrontCodedRabaCoder INSTANCE = new DefaultFrontCodedRabaCoder();
protected transient static final int DEFAULT_RATIO = 8;
public DefaultFrontCodedRabaCoder() {
super(DEFAULT_RATIO);
}
@Override
public void readExternal(ObjectInput in) throws IOException,
ClassNotFoundException {
// NOP
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
// NOP
}
}
/**
* De-serialization ctor.
*/
public FrontCodedRabaCoder() {
}
/**
* @param ratio
* The ratio as defined by {@link ByteArrayFrontCodedList}. For
* front-coding, compression trades directly for search
* performance. Every ratio byte[]s is fully coded. Binary search
* is used on the fully coded byte[]s and will identify a bucket
* ratio front-coded values. Linear search is then
* performed within the bucket of front-coded values in which the
* key would be found if it is present. Therefore the ratio is
* also the maximum of steps in the linear scan.
*
* Let m := n / ratio
, where n is the #of
* entries in the byte[][]
(the size of the total
* search problem), m is the size of the binary search
* problem and ratio is the size of the linear search problem.
* Solving for ratio, we have: ratio := n / m
. Some
* examples:
*
*
* m = n(64)/ratio(16) = 4
*
* m = n(64)/ratio(8) = 8
*
* m = n(64)/ratio(6) ˜ 11
*
* m = n(64)/ratio(4) = 16
*
*/
public FrontCodedRabaCoder(final int ratio) {
this.ratio = ratio;
}
@Override
final public boolean isKeyCoder() {
return true;
}
@Override
final public boolean isValueCoder() {
return false;
}
@Override
public boolean isDuplicateKeys() {
return false;
}
@Override
public void readExternal(ObjectInput in) throws IOException,
ClassNotFoundException {
ratio = in.readInt();
}
@Override
public void writeExternal(ObjectOutput out) throws IOException {
out.writeInt(ratio);
}
private static final byte VERSION0 = 0x00;
private static final int SIZEOF_VERSION = Bytes.SIZEOF_BYTE;
private static final int SIZEOF_SIZE = Bytes.SIZEOF_INT;
private static final int SIZEOF_RATIO = Bytes.SIZEOF_INT;
/** The byte offset of the version identifier. */
private static final int O_VERSION = 0;
/**
* The byte offset of the field coding the #of entries in the logical
* byte[][].
*/
private static final int O_SIZE = O_VERSION + SIZEOF_VERSION;
/** The byte offset of the field coding the ratio. */
private static final int O_RATIO = O_SIZE + SIZEOF_SIZE;
/** The byte offset of the start of the front-coded representation. */
private static final int O_DATA = O_RATIO + SIZEOF_RATIO;
@Override
public ICodedRaba encodeLive(final IRaba raba, final DataOutputBuffer buf) {
if (raba == null)
throw new IllegalArgumentException();
if (!raba.isKeys())
throw new UnsupportedOperationException("Must be keys.");
if (buf == null)
throw new IllegalArgumentException();
final int size = raba.size();
if (log.isInfoEnabled())
log.info("n=" + raba.size() + ", capacity=" + raba.capacity()
+ ", ratio=" + ratio);
// The byte offset of the origin of the coded record into the buffer.
final int O_origin = buf.pos();
// front-code the byte[][].
final CustomByteArrayFrontCodedList decoder = new CustomByteArrayFrontCodedList(
raba.iterator(), ratio, isDuplicateKeys());
try {
// The record version identifier.
buf.write(VERSION0);
// #of entries (zero length indicates NO data)
buf.writeInt(size);
// The ratio used to front code the data.
buf.writeInt(ratio);
decoder.getBackingBuffer().writeOn(buf);
buf.flush();
} catch (IOException ex) {
throw new RuntimeException(ex);
}
final AbstractFixedByteArrayBuffer slice = buf.slice(//
O_origin, buf.pos() - O_origin);
return new CodedRabaImpl(slice, decoder);
}
@Override
public AbstractFixedByteArrayBuffer encode(final IRaba raba,
final DataOutputBuffer buf) {
/*
* Note: there is nearly zero overhead associated with this code path.
* The only unnecessary action is wrapping the CodedRabaImpl with the
* (slice,decoder), which is pretty near zero cost.
*/
return encodeLive(raba, buf).data();
}
@Override
public ICodedRaba decode(final AbstractFixedByteArrayBuffer data) {
return new CodedRabaImpl(data, isDuplicateKeys());
}
/**
* Decoder for an ordered logical byte[][] without null
s.
*
* @author Bryan
* Thompson
* @version $Id$
*/
private static class CodedRabaImpl extends AbstractCodedRaba {
private final AbstractFixedByteArrayBuffer data;
private final CustomByteArrayFrontCodedList decoder;
/**
*
* @param data
* The record containing the coded data.
* @param hasDups
* true
iff the {@link IRabaCoder} supports
* duplicate keys.
*/
public CodedRabaImpl(final AbstractFixedByteArrayBuffer data,
final boolean hasDups) {
final byte version = data.getByte(O_VERSION);
if (version != VERSION0) {
throw new RuntimeException("Unknown version: " + version);
}
// The #of entries in the logical byte[][].
final int size = data.getInt(O_SIZE);
// The ratio.
final int ratio = data.getInt(O_RATIO);
// wrap slice with decoder.
this.decoder = new CustomByteArrayFrontCodedList(size, ratio, data
.array(), data.off() + O_DATA, data.len(), hasDups);
this.data = data;
}
/**
* Alternative constructor avoids the cost of constructing the decoder
* when it is already available.
*
* @param data
* The record containing the coded data.
* @param decoder
* The decoder object.
*/
public CodedRabaImpl(final AbstractFixedByteArrayBuffer data,
final CustomByteArrayFrontCodedList decoder) {
this.data = data;
this.decoder = decoder;
}
@Override
public AbstractFixedByteArrayBuffer data() {
return data;
}
/**
* Represents B+Tree keys.
*/
@Override
final public boolean isKeys() {
return true;
}
@Override
final public int size() {
return decoder.size();
}
@Override
final public int capacity() {
return decoder.size();
}
@Override
final public boolean isEmpty() {
return size() == 0;
}
/**
* Always returns true
since the front-coded representation
* is dense.
*/
@Override
final public boolean isFull() {
return true;
}
/**
* Always returns false
(null
s are not
* allowed).
*/
@Override
final public boolean isNull(final int index) {
return false;
}
@Override
final public byte[] get(final int index) {
return decoder.get(index);
}
@Override
final public int length(final int index) {
return decoder.arrayLength(index);
}
@Override
public int copy(final int index, final OutputStream os) {
try {
return decoder.writeOn(os, index);
} catch (IOException ex) {
throw new RuntimeException(ex);
}
}
@Override
public Iterator iterator() {
return decoder.iterator();
}
@Override
public int search(final byte[] searchKey) {
// optimization: always keys.
// if(isKeys()) {
return decoder.search(searchKey);
// }
//
// throw new UnsupportedOperationException();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy