![JAR search and dependency download from the Maven repository](/logo.png)
com.bigdata.rdf.lexicon.Term2IdTupleSerializer Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Jul 7, 2008
*/
package com.bigdata.rdf.lexicon;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Properties;
import org.openrdf.model.Value;
import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.internal.impl.TermId;
/**
* Handles the term:id index (forward mapping for the lexicon). The keys are
* unsigned byte[]s representing a total order for the RDF {@link Value} space.
* The index assigns term identifiers, and those term identifiers are stored in
* the values of the index.
*
* @author Bryan Thompson
* @version $Id: Term2IdTupleSerializer.java 4818 2011-06-29 20:01:56Z
* thompsonbry $
*/
public class Term2IdTupleSerializer extends DefaultTupleSerializer {
/**
*
*/
private static final long serialVersionUID = 1486882823994548034L;
/**
* De-serialization ctor.
*/
public Term2IdTupleSerializer() {
super();
}
/**
* Configures the {@link IKeyBuilderFactory} from the caller's properties.
*
* @param properties
*/
public Term2IdTupleSerializer(final Properties properties) {
this(new DefaultKeyBuilderFactory(properties));
}
/**
* Uses the caller's {@link IKeyBuilderFactory}.
*
* @param keyBuilderFactory
*/
public Term2IdTupleSerializer(final IKeyBuilderFactory keyBuilderFactory) {
/*
* The FixedLengthValueRabaCoder is MUCH faster than the
* CannonicalHuffman coder and takes the same space on the disk. An all
* around win.
*
* @see https://sourceforge.net/apps/trac/bigdata/ticket/506 (Load,
* closure and query performance in 1.1.x versus 1.0.x)
*/
// super(keyBuilderFactory);
super(keyBuilderFactory, getDefaultLeafKeysCoder(),
IVUtility.PACK_TIDS //
? new com.bigdata.btree.raba.codec.SimpleRabaCoder()//
: new com.bigdata.btree.raba.codec.FixedLengthValueRabaCoder(9)//
);
}
/**
* Thread-local object for constructing keys for the lexicon.
*/
public LexiconKeyBuilder getLexiconKeyBuilder() {
/*
* FIXME We should save off a reference to this to reduce heap churn
* and then use that reference in this class.
*/
return new LexiconKeyBuilder(getKeyBuilder());
}
/**
* Return a {@link LexiconKeyBuilder} that is setup with collation strength
* PRIMARY.
*
* @see
* Name2Addr.indexNameScan(prefix) uses scan + filter
*/
public LexiconKeyBuilder getLexiconPrimaryKeyBuilder() {
/*
* FIXME We should save off a reference to this to reduce heap churn
* and then use that reference in this class.
*/
return new LexiconKeyBuilder(getPrimaryKeyBuilder());
}
/**
* You can not decode the term:id keys since they include Unicode sort keys
* and that is a lossy transform.
*
* @throws UnsupportedOperationException
* always
*/
@Override
public Object deserializeKey(ITuple tuple) {
throw new UnsupportedOperationException();
}
/**
* Return the unsigned byte[] key for an RDF {@link Value}.
*
* @param obj
* The RDF {@link Value}.
*/
@Override
public byte[] serializeKey(Object obj) {
return getLexiconKeyBuilder().value2Key((Value)obj);
}
/**
* Return the byte[] value, which is a term identifier written as a packed
* long integer.
*
* @param obj
* A term identifier expressed as a {@link TermId}.
*/
@Override
public byte[] serializeVal(final Object obj) {
final IV,?> iv = (IV,?>) obj;
/*
* Note: reusing the same KeyBuilder as the keys, but that is Ok since
* the IV encoding does not rely on the Unicode properties and the
* KeyBuilder is a thread-local instance so there is no contention for
* it.
*/
final byte[] key = iv.encode(getKeyBuilder()).getKey();
return key;
}
/**
* De-serializes the {@link ITuple} as a {@link IV} whose value is the
* term identifier associated with the key. The key itself is not decodable.
*/
@Override
public IV deserialize(final ITuple tuple) {
final ByteArrayBuffer b = tuple.getValueBuffer();
return IVUtility.decodeFromOffset(b.array(), 0);
}
/**
* The initial version (no additional persistent state).
*/
private final static transient byte VERSION0 = 0;
/**
* The current version.
*/
private final static transient byte VERSION = VERSION0;
@Override
public void readExternal(final ObjectInput in) throws IOException,
ClassNotFoundException {
super.readExternal(in);
final byte version = in.readByte();
switch (version) {
case VERSION0:
break;
default:
throw new UnsupportedOperationException("Unknown version: "
+ version);
}
}
@Override
public void writeExternal(final ObjectOutput out) throws IOException {
super.writeExternal(out);
out.writeByte(VERSION);
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy