com.bigdata.rdf.vocab.BaseVocabulary Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Aug 26, 2008
*/
package com.bigdata.rdf.vocab;
import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Map;
import org.apache.log4j.Logger;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import com.bigdata.bop.Constant;
import com.bigdata.bop.IConstant;
import com.bigdata.io.LongPacker;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.impl.uri.VocabURIByteIV;
import com.bigdata.rdf.internal.impl.uri.VocabURIShortIV;
import com.bigdata.rdf.model.BigdataURI;
import com.bigdata.rdf.model.BigdataValue;
import com.bigdata.rdf.model.BigdataValueFactory;
import com.bigdata.rdf.model.BigdataValueFactoryImpl;
import com.bigdata.rdf.store.AbstractTripleStore;
/**
* Base class for {@link Vocabulary} implementations.
*
* @author Bryan Thompson
* @version $Id$
*/
abstract public class BaseVocabulary implements Vocabulary, Externalizable {
final static private transient Logger log = Logger
.getLogger(BaseVocabulary.class);
/**
* The serialVersionUID as reported by the trunk on Oct 6, 2010.
*/
private static final long serialVersionUID = 1560142397515291331L;
/**
* The {@link BigdataValueFactory} for the namespace associated with the KB
* instance.
*/
private transient BigdataValueFactory valueFactory;
/**
* An ordered set of the declared vocabulary classes in the order in which
* they were declared.
*/
private transient LinkedHashSet decls;
/**
* The {@link Value}s together with their assigned {@link IV}s.
*
* Note: The {@link IV} is permanently attached to each {@link BigdataValue}.
*
* Note: A {@link Map} is used for O(1) lookup of a {@link BigdataValue}
* from a {@link Value}, but the keys and values for a given entry are
* always the same reference.
*/
private transient LinkedHashMap val2iv;
/**
* Reverse lookup from {@link IV} to {@link Value}.
*/
@SuppressWarnings("rawtypes")
private transient Map iv2val;
/**
* De-serialization ctor.
*/
protected BaseVocabulary() {
/*
* Note: [namespace] is set by readExternal().
*/
}
/**
* Ctor used by {@link AbstractTripleStore#create()}.
*
* @param database
* The database.
*/
protected BaseVocabulary(final String namespace) {
if (namespace == null)
throw new IllegalArgumentException();
this.valueFactory = BigdataValueFactoryImpl
.getInstance(namespace);
}
/**
* Invoked by {@link AbstractTripleStore#create()} to initialize the
* {@link Vocabulary}.
*
* @throws IllegalStateException
* if {@link #init()} has already been invoked.
*/
synchronized final public void init() {
/*
* Note: This just passes in the default initial capacity for a hash map
* since we do not have better information when invoked in this manner.
*/
init(16/* ndecls */, 16/* nvalues */);
}
/**
* Invoked by {@link AbstractTripleStore#create()} to initialize the
* {@link Vocabulary}.
*
* @throws IllegalStateException
* if {@link #init()} has already been invoked.
*/
synchronized final private void init(final int declsInitialCapacity,
final int valuesInitialCapacity) {
if (valueFactory == null)
throw new IllegalStateException();
if (val2iv != null)
throw new IllegalStateException();
if (iv2val != null)
throw new IllegalStateException();
// Setup declarations set.
this.decls = new LinkedHashSet(declsInitialCapacity);
// Setup forward map.
val2iv = new LinkedHashMap(valuesInitialCapacity);
// Hook for subclass to provide its vocabulary decls.
addValues();
// Setup reverse map now that we know the exact size.
iv2val = new LinkedHashMap(val2iv.size());
addAllDecls();
// Make stable assignment of IVs to each Value, populating maps.
generateIVs();
}
/**
* Hook for subclasses to provide their {@link VocabularyDecl}s using
* {@link #addDecl(VocabularyDecl)}.
*/
abstract protected void addValues();
/**
* Add a declared vocabulary.
*
* @param decl
* The vocabulary declaration.
*/
final protected void addDecl(final VocabularyDecl decl) {
if (decl == null)
throw new IllegalArgumentException();
if(log.isInfoEnabled())
log.info(decl.getClass().getName());
decls.add(decl);
}
// /**
// * Adds a {@link Value} into the internal collection.
// *
// * @param value
// * The value.
// *
// * @throws IllegalArgumentException
// * if the value is null
.
// */
// final protected void add(final URI value) {
//
// if (value == null)
// throw new IllegalArgumentException();
//
// // convert to BigdataValues when adding to the collection.
// val2iv.add(valueFactory.asValue(value));
//
// }
/**
* Add all vocabulary items from all declaring classes.
*/
private void addAllDecls() {
for (VocabularyDecl decl : decls) {
final Iterator itr = decl.values();
while (itr.hasNext()) {
// Convert to BigdataValues when adding to the collection.
final BigdataValue value = valueFactory.asValue(itr.next());
// Add to the collection.
if (val2iv.put(value, value) != null) {
/*
* This has already been declared by some vocabulary. There
* is no harm in this, but the vocabularies should be
* distinct.
*/
log.warn("Duplicate declaration: " + value);
} else {
if (log.isDebugEnabled())
log.debug(decl.getClass().getName() + ":" + value);
}
}
}
}
/**
* Make a stable assignment of {@link IV}s to declared {@link Value}s.
*
* Note: The {@link Value}s are converted to {@link BigdataValue}s by
* {@link #add(Value)} so that we can invoke
* {@link AbstractTripleStore#addTerms(BigdataValue[])} directly and get
* back the assigned {@link IV}s. We rely on the namespace
of
* the {@link AbstractTripleStore} to deserialize {@link BigdataValue}s
* using the appropriate {@link BigdataValueFactory}.
*/
private void generateIVs() {
/*
* Assign IVs to each vocabulary item.
*/
final int n = size();
if (n > MAX_ITEMS)
throw new UnsupportedOperationException(
"Too many vocabulary items: n=" + n + ", but maximum is "
+ MAX_ITEMS);
// The #of generated IVs.
int i = 0;
// The Values in the order in which they were declared.
for (Map.Entry e : val2iv.entrySet()) {
final BigdataValue value = e.getValue();
@SuppressWarnings("rawtypes")
final IV iv;
if (i <= 255) {
// Use a byte for the 1st 256 declared vocabulary items.
iv = new VocabURIByteIV((byte) i);
} else {
// Use a short for the next 64k declared vocabulary items.
iv = new VocabURIShortIV((short) i);
}
// Cache the IV on the Value.
value.setIV(iv);
// Note: Do not cache the Value on the IV.
iv.setValue(value);
iv2val.put(iv, value);
i++;
}
assert iv2val.size() == val2iv.size();
}
/**
* The maximum #of items is 256 {@link VocabURIByteIV}s plus 64k
* {@link VocabURIShortIV}s.
*/
static private final int MAX_ITEMS = Short.MAX_VALUE + 256;
final public String getNamespace() {
return valueFactory.getNamespace();
}
final public int size() {
if (val2iv == null)
throw new IllegalStateException();
return val2iv.size();
}
final public Iterator values() {
return Collections.unmodifiableMap(val2iv).values().iterator();
}
@SuppressWarnings("rawtypes")
final public BigdataValue asValue(final IV iv) {
if (val2iv == null)
throw new IllegalStateException();
if (iv == null)
throw new IllegalArgumentException();
return iv2val.get(iv);
}
@SuppressWarnings("rawtypes")
final public IV get(final Value value) {
if (val2iv == null)
throw new IllegalStateException();
if (value == null)
throw new IllegalArgumentException();
final BigdataValue tmp = val2iv.get(value);
if (tmp == null)
return null;
return tmp.getIV();
}
@SuppressWarnings("rawtypes")
final public IConstant getConstant(final Value value) {
final IV iv = get(value);
if (iv == null)
throw new IllegalArgumentException("Not defined: " + value);
return new Constant(iv);
}
// /**
// * The initial version. This version is no longer supported. The manner in
// * which the lexicon is encoded has fundamentally changed with the
// * replacement of the TERM2ID and ID2TERM indices with a single TERMS index
// * and additional inlining of values into the statement indices.
// */
// private static final transient short VERSION0 = 0;
//
// /**
// * This version modified the serialization to include the namespace of the
// * KB instance and to pack the byte length values (this version was never
// * deployed).
// */
// private static final transient short VERSION1 = 1;
/**
* This version modified the serialization to include the namespace of the
* KB instance and a list of the {@link VocabularyDecl} classes to be
* instantiated. The names of those classes are given in the order in which
* they were declared. When the vocabulary is deserialized, the
* {@link BigdataValue}s and {@link IV}s are simply reconstructed from those
* classes.
*
* Note: VERSION ZERO (0) was the initial version. That version is no longer
* supported. The manner in which the lexicon is encoded has fundamentally
* changed with the replacement of the TERM2ID and ID2TERM indices with a
* single TERMS index and additional inlining of values into the statement
* indices.
*/
private static final transient short VERSION2 = 2;
/**
* The current version.
*/
private static final transient short currentVersion = VERSION2;
/**
* Note: The de-serialized state contains {@link Value}s but not
* {@link BigdataValue}s since the {@link AbstractTripleStore} reference is
* not available and we can not obtain the appropriate
* {@link BigdataValueFactory} instance without it. This should not matter
* since the only access to the {@link Value}s is via {@link #get(Value)}
* and {@link #getConstant(Value)}.
*/
public void readExternal(final ObjectInput in) throws IOException,
ClassNotFoundException {
if (val2iv != null)
throw new IllegalStateException();
if (iv2val != null)
throw new IllegalStateException();
final short version = in.readShort();
switch (version) {
// case VERSION0:
// readVersion0(in);
// break;
// case VERSION1:
// readVersion1(in);
// break;
case VERSION2:
readVersion2(in);
break;
default:
throw new UnsupportedOperationException("Unknown version: "
+ version);
}
}
// /**
// * The old code for {@link #VERSION0}. This is here for historical purposes
// * only.
// *
// * @param in
// * @throws IOException
// */
// private void readVersion0(final ObjectInput in) throws IOException {
//
// /*
// * Note: VERSION0 was not able to provide the correct
// * BigdataValueFactory since it did not have access to the KB namespace.
// */
// final ValueFactory valueFactory = new ValueFactoryImpl();
//
// final BigdataValueSerializer valueSer = new BigdataValueSerializer(
// valueFactory);
//
// // read in the #of values.
// final int nvalues = in.readInt();
//
// if (nvalues < 0)
// throw new IOException();
//
// // allocate the map with sufficient capacity.
// val2iv = new LinkedHashMap(nvalues);
// iv2val = new LinkedHashMap(nvalues);
//
// for (int i = 0; i < nvalues; i++) {
//
// // #of bytes in the serialized value.
// int nbytes = in.readInt();
//
// // allocate array of that many bytes.
// byte[] b = new byte[nbytes];
//
// // read the data for the serialized value.
// in.readFully(b);
//
// // de-serialize the value.
// final Value value = valueSer.deserialize(b);
//
// // #of bytes in the serialized IV.
// nbytes = in.readInt();
//
// // allocate array for that many bytes.
// b = new byte[nbytes];
//
// // read the data for the serialized IV.
// in.readFully(b);
//
// // decode the IV.
// final IV iv = IVUtility.decode(b);
//
// // stuff in the map.
// val2iv.put(value, iv);
// iv2val.put(iv, value);
//
// }
//
// }
// private void readVersion1(final ObjectInput in) throws IOException {
//
// // read in the #of values.
// final int nvalues = LongPacker.unpackInt(in);
//
// // The namespace of the KB instance.
// final String namespace = in.readUTF();
//
// // Note: The value factory uses the namespace of the KB instance!
// valueFactory = BigdataValueFactoryImpl.getInstance(namespace);
//
// // ValueSerializer using the namespace of the KB instance!
// final BigdataValueSerializer valueSer = new BigdataValueSerializer(
// valueFactory);
//
// // allocate the map with sufficient capacity.
// val2iv = new LinkedHashMap(nvalues);
// iv2val = new LinkedHashMap(nvalues);
//
// // buffer reused for each Value/IV.
// final ByteArrayBuffer buf = new ByteArrayBuffer();
//
// // buffer reused for each Value.
// final StringBuilder tmp = new StringBuilder();
//
// for (int i = 0; i < nvalues; i++) {
//
// // #of bytes in the serialized value.
// int nbytes = LongPacker.unpackInt(in);
//
// buf.reset();
// buf.ensureCapacity(nbytes);
//
// // read the data for the serialized value.
// in.readFully(buf.array(), 0/* off */, nbytes/* len */);
//
// // de-serialize the value.
// final BigdataValue value = valueSer
// .deserialize(//
// new DataInputBuffer(buf.array(), 0/* off */, nbytes/* len */), //
// tmp//
// );
//
// // #of bytes in the serialized IV.
// nbytes = LongPacker.unpackInt(in);
//
// buf.reset();
// buf.ensureCapacity(nbytes);
//
// // read the data for the serialized IV.
// in.readFully(buf.array(), 0/* off */, nbytes/* len */);
//
// // decode the IV.
// final IV iv = IVUtility.decode(buf.array());
//
// // stuff in the map.
// val2iv.put(value, value);
// iv2val.put(iv, value);
// value.setIV(iv); // cache the IV
//// iv.setValue(value); // but do not cache the Value.
//
// }
//
// }
private void readVersion2(final ObjectInput in) throws IOException {
// read in the #of declarations.
final int ndecls = LongPacker.unpackInt(in);
// read in the #of values.
final int nvalues = LongPacker.unpackInt(in);
// read in the checksum.
final long checksumActual = in.readLong();
// The namespace of the KB instance.
final String namespace = in.readUTF();
// Note: The value factory uses the namespace of the KB instance!
valueFactory = BigdataValueFactoryImpl.getInstance(namespace);
// Initialize the vocabulary.
init(ndecls, nvalues);
// decls = new LinkedHashSet(ndecls);
//
// // allocate the map with sufficient capacity.
// val2iv = new LinkedHashMap(nvalues);
// iv2val = new LinkedHashMap(nvalues);
//
// for (int i = 0; i < ndecls; i++) {
//
// final String className = in.readUTF();
//
// try {
//
// final Class> cls = Class.forName(className);
//
// if (!VocabularyDecl.class.isAssignableFrom(cls))
// throw new IOException(className);
//
// final VocabularyDecl decl = (VocabularyDecl) cls.newInstance();
//
// decls.add(decl);
//
// } catch (InstantiationException e) {
//
// throw new IOException(e);
//
// } catch (IllegalAccessException e) {
//
// throw new IOException(e);
//
// } catch (ClassNotFoundException e) {
//
// throw new IOException(e);
//
// }
//
// }
//
// addAllDecls();
if (ndecls != decls.size()) {
/*
* This indicates a versioning problem with the vocabulary
* declaration classes.
*/
throw new IOException();
}
if (nvalues != val2iv.size()) {
/*
* This indicates a versioning problem with the vocabulary
* declaration classes.
*/
throw new VocabularyVersioningException();
}
// compute a checksum on the hash codes of the URIs.
long checksum = 0;
for(Value value : val2iv.keySet()) {
checksum += value.hashCode();
}
if (checksum != checksumActual) {
/*
* This indicates a versioning problem with the vocabulary
* declaration classes.
*/
throw new VocabularyVersioningException();
}
generateIVs();
}
public void writeExternal(final ObjectOutput out) throws IOException {
if (val2iv == null)
throw new IllegalStateException();
if (iv2val == null)
throw new IllegalStateException();
out.writeShort(currentVersion);
switch (currentVersion) {
// case VERSION0:
// writeVersion0(out);
// break;
// case VERSION1:
// writeVersion1(out);
// break;
case VERSION2:
writeVersion2(out);
break;
default:
throw new AssertionError();
}
}
// /**
// * The old code for {@link #VERSION0}. This is here for historical purposes
// * only.
// *
// * @param out
// * @throws IOException
// */
// private void writeVersion0(final ObjectOutput out) throws IOException {
//
// final int nvalues = val2iv.size();
//
// // write on the #of values.
// out.writeInt(nvalues);
//
// // reused for each serialized term.
// final DataOutputBuffer buf = new DataOutputBuffer();
// final ByteArrayBuffer tbuf = new ByteArrayBuffer();
//
// final BigdataValueSerializer valueSer = new BigdataValueSerializer(
// new ValueFactoryImpl());
//
// final IKeyBuilder keyBuilder = KeyBuilder.newInstance();
//
// final Iterator> itr = val2iv.entrySet()
// .iterator();
//
// while (itr.hasNext()) {
//
// final Map.Entry entry = itr.next();
//
// final BigdataValue value = entry.getKey();
//
// final IV iv = entry.getValue();
//
// assert value != null;
//
// assert iv != null;
//
// // reset the buffer.
// buf.reset();
//
// // serialize the Value onto the buffer.
// valueSer.serialize(value, buf, tbuf);
//
// // #of bytes in the serialized value.
// final int nbytes = buf.limit();
//
// // write #of bytes on the output stream.
// out.writeInt(nbytes);
//
// // copy serialized value onto the output stream.
// out.write(buf.array(), 0, buf.limit());
//
// final byte[] b = iv.encode(keyBuilder.reset()).getKey();
//
// out.writeInt(b.length);
//
// out.write(b);
//
// }
//
// }
// private void writeVersion1(final ObjectOutput out) throws IOException {
//
// final int nvalues = val2iv.size();
// assert iv2val.size() == nvalues;
//
// // write on the #of values.
// LongPacker.packLong(out, nvalues);
//
// // The namespace of the KB instance.
// out.writeUTF(valueFactory.getNamespace());
//
// // reused for each serialized term.
// final DataOutputBuffer buf = new DataOutputBuffer();
// final ByteArrayBuffer tbuf = new ByteArrayBuffer();
// final IKeyBuilder keyBuilder = KeyBuilder.newInstance();
//
// final BigdataValueSerializer valueSer = new BigdataValueSerializer(
// valueFactory);
//
// final Iterator> itr = val2iv.entrySet()
// .iterator();
//
// while (itr.hasNext()) {
//
// final Map.Entry entry = itr.next();
//
// final BigdataValue value = entry.getValue();
//
// final IV iv = value.getIV();
//
// assert value != null;
//
// assert iv != null;
//
// // reset the buffer.
// buf.reset();
//
// // serialize the Value onto the buffer.
// valueSer.serialize(value, buf, tbuf);
//
// // #of bytes in the serialized value.
// final int nbytes = buf.limit();
//
// // write #of bytes on the output stream.
// LongPacker.packLong(out, nbytes);
//
// // copy serialized value onto the output stream.
// out.write(buf.array(), 0, buf.limit());
//
// // encode the key.
// iv.encode(keyBuilder.reset());
//
// // write #of bytes in the IV on the output stream.
// LongPacker.packLong(out, keyBuilder.len());
//
// // write the IV on the output stream.
// out.write(keyBuilder.array(), 0/* off */, keyBuilder.len());
//
// }
//
// }
private void writeVersion2(final ObjectOutput out) throws IOException {
assert iv2val.size() == val2iv.size();
// compute a checksum on the hash codes of the URIs.
long checksum = 0;
for(Value value : val2iv.keySet()) {
checksum += value.hashCode();
}
// write on the #of declarations.
LongPacker.packLong(out, decls.size());
// write on the #of values.
LongPacker.packLong(out, val2iv.size());
// write out the checksum.
out.writeLong(checksum);
// The namespace of the KB instance.
out.writeUTF(valueFactory.getNamespace());
// for (VocabularyDecl decl : decls) {
//
// // The class name of the vocabulary declaration.
// out.writeUTF(decl.getClass().getName());
//
// }
}
/**
* An instance of this class indicates a versioning problem with the
* {@link VocabularyDecl declaration classes}. If a vocabulary declaration
* class is modified after it has been used to instantiate a triple store
* then the mapping of URIs onto IVs might not be stable with the result
* that encode and decode of statements may be broken.
*
* @author Bryan
* Thompson
*/
public static class VocabularyVersioningException extends IOException {
/**
*
*/
private static final long serialVersionUID = 1L;
}
}