All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.spo.FastRDFValueCoder2 Maven / Gradle / Ivy

package com.bigdata.rdf.spo;

import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.io.OutputStream;

import org.apache.log4j.Logger;

import com.bigdata.btree.raba.IRaba;
import com.bigdata.btree.raba.codec.AbstractCodedRaba;
import com.bigdata.btree.raba.codec.ICodedRaba;
import com.bigdata.btree.raba.codec.IRabaCoder;
import com.bigdata.io.AbstractFixedByteArrayBuffer;
import com.bigdata.io.DataOutputBuffer;
import com.bigdata.rdf.model.StatementEnum;
import com.bigdata.rdf.store.AbstractTripleStore;
import com.bigdata.util.Bytes;
import com.bigdata.util.BytesUtil;

/**
 * Coder for values in statement index with inference enabled but without SIDS.
 * We encode the value in 4 bits per statement. The 1st bit is the override
 * flag. The remaining next two bits are the statement type {inferred, explicit,
 * or axiom}. The last bit is not used. The bit sequence 0111 is
 * used as a place holder for a null value and de-serializes to a
 * [null]. This is just the low nibble of the {@link StatementEnum#code()}. This
 * "nibble" encoding makes it fast and easy to extract the value from the coded
 * record. The first value is stored in the low nibble, the next in the high
 * nibble, then it is on to the low nibble of the next byte.
 * 

* Note: the 'override' flag is NOT stored in the statement indices, but it is * passed by the procedure that writes on the statement indices so that we can * decide whether or not to override the type when the statement is pre-existing * in the index. *

* Note: this procedure can not be used if * {@link AbstractTripleStore.Options#STATEMENT_IDENTIFIERS} are enabled. * * @see StatementEnum * * @author Bryan Thompson * * @todo A mutable coded value raba could be implemented for the statement * indices. With a fixed bit length per value, we can represent the data * in m/2 bytes. This is also true for things like TERM2ID where the * values could be represented as a long[]. */ public class FastRDFValueCoder2 implements Externalizable, IRabaCoder { protected static final Logger log = Logger .getLogger(FastRDFValueCoder2.class); /** * */ private static final long serialVersionUID = 1933430721504168533L; /** * The only version defined so far. */ private static transient final byte VERSION0 = 0x00; /** * The byte offset of the start of the coded values. Each value is 4 bits. */ private static transient final int O_values = (1/* version */+ Bytes.SIZEOF_INT/* size */); /** * No. */ @Override final public boolean isKeyCoder() { return false; } /** * Yes. */ @Override final public boolean isValueCoder() { return true; } @Override public boolean isDuplicateKeys() { return false; } /** * Sole constructor (handles de-serialization also). */ public FastRDFValueCoder2() { } @Override public void writeExternal(ObjectOutput out) throws IOException { // NOP } @Override public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { // NOP } @Override public AbstractFixedByteArrayBuffer encode(final IRaba raba, final DataOutputBuffer buf) { /* * Note: This code path has nearly zero overhead when compared to * encodeLive(). */ return encodeLive(raba, buf).data(); } @Override public ICodedRaba encodeLive(final IRaba raba, final DataOutputBuffer buf) { if (raba == null) throw new UnsupportedOperationException(); if (buf == null) throw new UnsupportedOperationException(); final int n = raba.size(); // This is the exact capacity required to code the data. final int initialCapacity = 1 + Bytes.SIZEOF_INT + BytesUtil.bitFlagByteLength(4 * n); buf.ensureCapacity(initialCapacity); // The byte offset of the start of the coded record in the buffer. final int O_origin = buf.pos(); final int size = raba.size(); buf.putByte(VERSION0); buf.putInt(size); for (int i = 0; i < size; i += 2) { final byte lowNibble = (raba.isNull(i) ? 7 : raba.get(i)[0]); final byte highNibble = (i + 1 == size ? 0 : ((raba.isNull(i + 1) ? 7 : raba.get(i + 1)[0]))); final byte b = (byte) (0xff & (highNibble << 4 | lowNibble)); buf.putByte(b); } // slice on just the coded data record. final AbstractFixedByteArrayBuffer slice = buf.slice(// O_origin, buf.pos() - O_origin); return new CodedRabaImpl(slice, size); } @Override public ICodedRaba decode(final AbstractFixedByteArrayBuffer data) { return new CodedRabaImpl(data); } /** * Decoder. * * @author Bryan Thompson */ private static class CodedRabaImpl extends AbstractCodedRaba { private final AbstractFixedByteArrayBuffer data; /** * Cached. */ private final int size; @Override final public AbstractFixedByteArrayBuffer data() { return data; } /** * No. */ @Override final public boolean isKeys() { return false; } @Override final public int size() { return size; } @Override final public int capacity() { return size; } @Override final public boolean isEmpty() { return size == 0; } /** * Always true. */ @Override final public boolean isFull() { return true; } /** * Constructor used when encoding a data record. * * @param data * The coded data record. * @param size * The size of the coded {@link IRaba}. */ public CodedRabaImpl(final AbstractFixedByteArrayBuffer data, final int size) { this.data = data; this.size = size; } /** * Constructor used when decoding a data record. * * @param data * The coded data record. */ public CodedRabaImpl(final AbstractFixedByteArrayBuffer data) { if (data == null) throw new IllegalArgumentException(); this.data = data; final byte version = data.getByte(0/* offset */); if (version != VERSION0) { throw new RuntimeException("Unknown version=" + version); } size = data.getInt(1/* offset */); } /** * Thread-safe extract of the bits coded value for the specified index. * * @param index * The specified index. * * @return The bit coded value. * * @throws IndexOutOfBoundsException * unless the index is in [0:size-1]. */ final protected byte getBits(final int index) { if (index < 0 || index >= size) throw new IndexOutOfBoundsException(); final byte b = data.getByte(O_values + index / 2); final int t = (index % 2 == 0) ? (b & 0x0f) : (b >> 4); return (byte) (0xff & t); } @Override final public int copy(final int index, final OutputStream os) { final byte bits = getBits(index); if (bits == 7) { // A null. throw new NullPointerException(); } else { try { os.write(bits); } catch (IOException e) { throw new RuntimeException(e); } } return 1; } @Override final public byte[] get(final int index) { final byte bits = getBits(index); if (bits == 7) { // A null. return null; } else { // return new byte[] { bits }; return RDFValueFactory.getValue(bits); } } @Override final public boolean isNull(final int index) { return getBits(index) == 7; } /** * Returns ONE (1) unless the value is a null. * * {@inheritDoc} */ @Override final public int length(final int index) { if (isNull(index)) throw new NullPointerException(); return 1; } /** * Not supported. */ @Override final public int search(final byte[] searchKey) { throw new UnsupportedOperationException(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy