com.bigdata.sparse.AbstractAtomicRowReadOrWrite Maven / Gradle / Ivy

Go to download
/*

 Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

 Contact:
 SYSTAP, LLC DBA Blazegraph
 2501 Calvert ST NW #106
 Washington, DC 20008
 [email protected]

 This program is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
 the Free Software Foundation; version 2 of the License.

 This program is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.

 You should have received a copy of the GNU General Public License
 along with this program; if not, write to the Free Software
 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA

 */
/*
 * Created on Jul 3, 2008
 */

package com.bigdata.sparse;

import java.io.Externalizable;
import java.io.IOException;
import java.io.ObjectInput;
import java.io.ObjectOutput;

import org.apache.log4j.Logger;

import com.bigdata.btree.IIndex;
import com.bigdata.btree.IRangeQuery;
import com.bigdata.btree.ITuple;
import com.bigdata.btree.ITupleIterator;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.btree.proc.AbstractIndexProcedure;
import com.bigdata.btree.proc.ISimpleIndexProcedure;
import com.bigdata.util.BytesUtil;

/**
 * Abstract class implements the atomic read operation. However, it does NOT
 * declare itself to be a read-only operation since this class is extended by
 * both {@link AtomicRowRead} and {@link AtomicRowWriteRead}.
 * 
 * @author Bryan Thompson
 */
abstract public class AbstractAtomicRowReadOrWrite extends
        AbstractIndexProcedure implements ISimpleIndexProcedure,
        IRowStoreConstants, Externalizable {

    private static final Logger log = Logger.getLogger(AbstractAtomicRowReadOrWrite.class);
    
//    /**
//     * True iff the {@link #log} level is INFO or less.
//     */
//    protected final static boolean INFO = log.isInfoEnabled();
//    
//    /**
//     * True iff the {@link #log} level is DEBUG or less.
//     */
//    protected final static boolean DEBUG = log.isDebugEnabled();
    
    protected Schema schema;
    protected Object primaryKey;
    protected long fromTime;
    protected long toTime;
    protected INameFilter filter;
    
    /**
     * De-serialization ctor.
     */
    protected AbstractAtomicRowReadOrWrite() {

        super();
        
    }
    
    /**
     * Constructor for an atomic write/read operation.
     * 
     * @param schema
     *            The schema governing the property set.
     * @param primaryKey
     *            The value of the primary key (identifies the logical row to be
     *            read).
     * @param fromTime
     *            The first timestamp for which timestamped property values will
     *            be accepted.
     * @param toTime
     *            The first timestamp for which timestamped property values will
     *            NOT be accepted -or- {@link IRowStoreConstants#CURRENT_ROW} to
     *            accept only the most current binding whose timestamp is GTE
     *            fromTime.
     * @param filter
     *            An optional filter used to restrict the property values that
     *            will be returned.
     */
    protected AbstractAtomicRowReadOrWrite(final Schema schema,
            final Object primaryKey, final long fromTime, final long toTime,
            final INameFilter filter) {

        SparseRowStore.assertArgs(schema, primaryKey, fromTime, toTime);

        this.schema = schema;
        
        this.primaryKey = primaryKey;
        
        this.fromTime = fromTime;
        
        this.toTime = toTime;

        this.filter = filter;
        
    }
    
    /**
     * Atomic read on the index.
     * 
     * @param ndx
     *            The index on which the data are stored.
     * @param schema
     *            The schema governing the row.
     * @param primaryKey
     *            The primary key identifies the logical row of interest.
     * @param fromTime
     *            The first timestamp for which timestamped property values will
     *            be accepted.
     * @param toTime
     *            The first timestamp for which timestamped property values will
     *            NOT be accepted -or- {@link IRowStoreConstants#CURRENT_ROW} to
     *            accept only the most current binding whose timestamp is GTE
     *            fromTime.
     * @param writeTime
     *            The resolved timestamp for an atomic write operation -or- ZERO
     *            (0L) IFF the operation is NOT a write.
     * @param filter
     *            An optional filter used to select the values for property
     *            names accepted by that filter.
     * 
     * @return The logical row for that primary key -or- null iff
     *         there is no data for the primaryKey.
     */
    protected static TPS atomicRead(final IIndex ndx, final Schema schema,
            final Object primaryKey, final long fromTime, final long toTime,
            final long writeTime, final INameFilter filter) {

        final byte[] fromKey = schema.getPrefix(ndx.getIndexMetadata()
                .getKeyBuilder(), primaryKey);

        final TPS tps = atomicRead(ndx, fromKey, schema, fromTime, toTime,
                filter, new TPS(schema, writeTime));

        if (tps == null) {

            if (log.isInfoEnabled())
                log.info("No data for primaryKey: " + primaryKey);

        }
    
        return tps;
    
    }

    /**
     * Alternative form useful when you have the raw key (unsigned byte[])
     * rather than a primary key (application object).
     * 
     * @param tps
     * @param fromKey
     * @param ndx
     * @param schema
     * @param fromTime
     * @param toTime
     * @param filter
     * @param tps
     *            The object into which the timestamped property values will be
     *            read.
     * 
     * @return The {@link TPS} -or- null iff there is no data for
     *         the logical row which satisified the various criteria (the
     *         schema, fromTime, toTime, and filter).
     */
    protected static TPS atomicRead(final IIndex ndx, final byte[] fromKey,
            final Schema schema, final long fromTime, final long toTime,
            final INameFilter filter, final TPS tps) {

        assert ndx != null;

        assert schema != null;
        
        assert fromKey != null;
        
        assert tps != null;

        /*
         * Scan all entries within the fromKey/toKey range populating [tps] as
         * we go.
         */
    
        final byte[] toKey = SuccessorUtil.successor(fromKey.clone());
    
        if (log.isInfoEnabled()) {
            log.info("read: fromKey=" + BytesUtil.toString(fromKey)+"\n"+
                     "read:   toKey=" + BytesUtil.toString(toKey));
        }
    
//        /*
//         * Note: If we are only going to accept the most recent bindings then we
//         * read in reverse order. This allows us to efficiently ignore property
//         * values for which we already have a binding in our TPS.
//         */
//        
//        final boolean reverseScan = toTime == CURRENT_ROW; 
//        
//        if (reverseScan) {
//
//            if(INFO)
//                log.info("reverseScan: fromTime=" + fromTime);
//            
//        }
//        
//        final int flags = IRangeQuery.DEFAULT | IRangeQuery.READONLY
//                | (reverseScan ? IRangeQuery.REVERSE : 0);

        final int flags = IRangeQuery.DEFAULT | IRangeQuery.READONLY;
        
//        /*
//         * Used during reverse scans when the [toTime] is [CURRENT_ROW] to track
//         * which properties have bound values, including when a "deleted" entry
//         * is observed for a property.
//         */
//
//        final Set bound = (reverseScan ? new HashSet()
//                : null);
        
        // iterator scanning tuples encoding timestamped property values.
        final ITupleIterator itr = ndx.rangeIterator(fromKey, toKey,
                0/* capacity */, flags, null/* filter */);
    
        // #of entries scanned for that primary key.
        int nscanned = 0;
        
        while(itr.hasNext()) {
            
            final ITuple tuple = itr.next();
            
            final byte[] key = tuple.getKey();
            
            nscanned++;
            
            // Decode the key so that we can get the column name.
            final KeyDecoder keyDecoder = new KeyDecoder(key);
    
            // The column name.
            final String col = keyDecoder.getColumnName();
    
            if (filter != null && !filter.accept(col)) {
    
                // Skip property names that have been filtered out.
    
                if (log.isDebugEnabled()) {
    
                    log.debug("Skipping property: name=" + col + " (filtered)");
                    
                }
    
                continue;
    
            }
    
//            if (toTime == CURRENT_ROW) {
//
//                /*
//                 * This relies on the fact that we traverse the tuples in reverse
//                 * index order when we only want to collect the current bindings.
//                 * Therefore we simply ignore any property value if we already have
//                 * a binding for that property. [bound] is used to quickly detect
//                 * property values for which we have already collected a binding,
//                 * even if that binding was a "deleted" property marker.
//                 */
//
//                if (bound.contains(col)) {
//
//                    if (DEBUG) {
//
//                        log.debug("Skipping property: name=" + col
//                                + " (already bound)");
//
//                    }
//
//                    continue;
//
//                }
//
//                bound.add(col);
//
//            }

            /*
             * Skip column values whose timestamp lies outside of the specified
             * half-open range.
             */
           
            final long columnValueTimestamp = keyDecoder.getTimestamp();
            
            if (columnValueTimestamp < fromTime) {

                if (log.isDebugEnabled()) {

                    log.debug("Ignoring earlier revision: col=" + col
                            + ", fromTime=" + fromTime + ", timestamp="
                            + columnValueTimestamp);

                }

                continue;

            }

            if (toTime != CURRENT_ROW && columnValueTimestamp >= toTime) {

                if (log.isDebugEnabled()) {

                    log.debug("Ignoring later revision: col=" + col
                            + ", toTime=" + toTime + ", timestamp="
                            + columnValueTimestamp);

                }

                continue;

            }

            /*
             * Decode the value. A [null] indicates a deleted property value.
             */

            final byte[] val = tuple.getValue();

            final Object v = ValueType.decode(val);

            /*
             * Add this timestamped property value to the collection.
             */

            tps.set(col, columnValueTimestamp, v);

            if (log.isInfoEnabled())
                log.info("Accept: name=" + col + ", timestamp="
                        + columnValueTimestamp + ", value=" + v);
    
        }
    
        if (nscanned == 0) {
            
            /*
             * Return null iff there are no column values for that primary key.
             * 
             * Note: this is a stronger criteria than none being matched.
             */
            
            return null;
    
        }
    
        if (toTime == CURRENT_ROW) {

            return tps.currentRow();
            
        }
        
        return tps;
        
    }

    /**
     * Return the current binding for the named property.
     * 
     * @param schema
     *            The schema.
     * @param primaryKey
     *            The primary key.
     * @param name
     *            The property name.
     * 
     * @return The current binding -or- null iff there is no
     *         current binding.
     * 
     * @todo this can be optimized by including the encoded column name in the
     *       generated [fromKey] and [toKey] so that we scan less data from the
     *       index and by using a reverse traversal iterator to read the most
     *       recent value in the key range first. This is especially important
     *       if timeseries data are being stored.
     */
    protected static ITPV getCurrentValue(final IIndex ndx,
            final Schema schema, final Object primaryKey, final String name) {

        final TPS tps = atomicRead(ndx, schema, primaryKey, MIN_TIMESTAMP,
                CURRENT_ROW, 0L/* writeTime */, new SingleColumnFilter(name));

        if (tps == null) {

            // never bound.
            return null;
            
        }
        
        final ITPV tpv = tps.get(name);

        if(tpv.getValue() == null) {
            
            // deleted property value.
            return null;
            
        }
        
        return tpv;

    }

    /**
     * The initial version.
     */
    private static final transient byte VERSION0 = 0;

    /**
     * The current version.
     */
    private static final transient byte VERSION = VERSION0;

    @Override
    public void readExternal(final ObjectInput in) throws IOException,
            ClassNotFoundException {
        
        final byte version = in.readByte();

        switch (version) {
        case VERSION0:
            break;
        default:
            throw new UnsupportedOperationException("Unknown version: "
                    + version);
        }

        schema = (Schema) in.readObject();
        
        primaryKey = in.readObject();
        
        fromTime = in.readLong();

        toTime = in.readLong();
        
        filter = (INameFilter) in.readObject();
        
    }

    @Override
    public void writeExternal(final ObjectOutput out) throws IOException {

        out.writeByte(VERSION);

        out.writeObject(schema);
        
        out.writeObject(primaryKey);
        
        out.writeLong(fromTime);
        
        out.writeLong(toTime);
        
        out.writeObject(filter);
        
    }

}