All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.rdf.lexicon.Term2IdTupleSerializer Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Jul 7, 2008
 */

package com.bigdata.rdf.lexicon;

import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;
import java.util.Properties;

import org.openrdf.model.Value;

import com.bigdata.btree.DefaultTupleSerializer;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.keys.DefaultKeyBuilderFactory;
import com.bigdata.btree.keys.IKeyBuilderFactory;
import com.bigdata.io.ByteArrayBuffer;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
import com.bigdata.rdf.internal.impl.TermId;

/**
 * Handles the term:id index (forward mapping for the lexicon). The keys are
 * unsigned byte[]s representing a total order for the RDF {@link Value} space.
 * The index assigns term identifiers, and those term identifiers are stored in
 * the values of the index.
 * 
 * @author Bryan Thompson
 * @version $Id: Term2IdTupleSerializer.java 4818 2011-06-29 20:01:56Z
 *          thompsonbry $
 */
public class Term2IdTupleSerializer extends DefaultTupleSerializer {

    /**
     * 
     */
    private static final long serialVersionUID = 1486882823994548034L;

    /**
     * De-serialization ctor.
     */
    public Term2IdTupleSerializer() {
        
        super();
        
    }
    
    /**
     * Configures the {@link IKeyBuilderFactory} from the caller's properties.
     * 
     * @param properties
     */
    public Term2IdTupleSerializer(final Properties properties) {
        
        this(new DefaultKeyBuilderFactory(properties));
        
    }

    /**
     * Uses the caller's {@link IKeyBuilderFactory}.
     * 
     * @param keyBuilderFactory
     */
    public Term2IdTupleSerializer(final IKeyBuilderFactory keyBuilderFactory) {

        /*
         * The FixedLengthValueRabaCoder is MUCH faster than the
         * CannonicalHuffman coder and takes the same space on the disk. An all
         * around win.
         * 
         * @see https://sourceforge.net/apps/trac/bigdata/ticket/506 (Load,
         * closure and query performance in 1.1.x versus 1.0.x)
         */
//        super(keyBuilderFactory);
        super(keyBuilderFactory, getDefaultLeafKeysCoder(),
            IVUtility.PACK_TIDS //
            ? new com.bigdata.btree.raba.codec.SimpleRabaCoder()//
            : new com.bigdata.btree.raba.codec.FixedLengthValueRabaCoder(9)//
            );
        
    }

    /**
     * Thread-local object for constructing keys for the lexicon.
     */
    public LexiconKeyBuilder getLexiconKeyBuilder() {
        
        /*
         * FIXME We should save off a reference to this to reduce heap churn
         * and then use that reference in this class.
         */
        return new LexiconKeyBuilder(getKeyBuilder());
        
    }

    /**
     * Return a {@link LexiconKeyBuilder} that is setup with collation strength
     * PRIMARY.
     * 
     * @see 
     *      Name2Addr.indexNameScan(prefix) uses scan + filter 
     */
    public LexiconKeyBuilder getLexiconPrimaryKeyBuilder() {
        
        /*
         * FIXME We should save off a reference to this to reduce heap churn
         * and then use that reference in this class.
         */
        return new LexiconKeyBuilder(getPrimaryKeyBuilder());
        
    }

    /**
     * You can not decode the term:id keys since they include Unicode sort keys
     * and that is a lossy transform.
     * 
     * @throws UnsupportedOperationException
     *             always
     */
    @Override
    public Object deserializeKey(ITuple tuple) {
        
        throw new UnsupportedOperationException();
        
    }

    /**
     * Return the unsigned byte[] key for an RDF {@link Value}.
     * 
     * @param obj
     *            The RDF {@link Value}.
     */
    @Override
    public byte[] serializeKey(Object obj) {

        return getLexiconKeyBuilder().value2Key((Value)obj);
        
    }

    /**
     * Return the byte[] value, which is a term identifier written as a packed
     * long integer.
     * 
     * @param obj
     *            A term identifier expressed as a {@link TermId}.
     */
    @Override
    public byte[] serializeVal(final Object obj) {

        final IV iv = (IV) obj;

        /*
         * Note: reusing the same KeyBuilder as the keys, but that is Ok since
         * the IV encoding does not rely on the Unicode properties and the 
         * KeyBuilder is a thread-local instance so there is no contention for
         * it.
         */
        final byte[] key = iv.encode(getKeyBuilder()).getKey();

        return key;
        
    }

    /**
     * De-serializes the {@link ITuple} as a {@link IV} whose value is the
     * term identifier associated with the key. The key itself is not decodable.
     */
    @Override
    public IV deserialize(final ITuple tuple) {

        final ByteArrayBuffer b = tuple.getValueBuffer();
        
        return IVUtility.decodeFromOffset(b.array(), 0);
        
    }

    /**
     * The initial version (no additional persistent state).
     */
    private final static transient byte VERSION0 = 0;

    /**
     * The current version.
     */
    private final static transient byte VERSION = VERSION0;

    @Override
    public void readExternal(final ObjectInput in) throws IOException,
            ClassNotFoundException {

        super.readExternal(in);
        
        final byte version = in.readByte();
        
        switch (version) {
        case VERSION0:
            break;
        default:
            throw new UnsupportedOperationException("Unknown version: "
                    + version);
        }

    }

    @Override
    public void writeExternal(final ObjectOutput out) throws IOException {

        super.writeExternal(out);
        
        out.writeByte(VERSION);
        
    }
    
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy