All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.bigdata.bop.solutions.SolutionSetStream Maven / Gradle / Ivy

/**

Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016.  All rights reserved.

Contact:
     SYSTAP, LLC DBA Blazegraph
     2501 Calvert ST NW #106
     Washington, DC 20008
     [email protected]

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
*/
/*
 * Created on Apr 9, 2012
 */

package com.bigdata.bop.solutions;

import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;

import org.apache.log4j.Logger;

import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.btree.BaseIndexStats;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.io.SerializerUtil;
import com.bigdata.rawstore.IPSOutputStream;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.internal.encoder.SolutionSetStreamDecoder;
import com.bigdata.rdf.internal.encoder.SolutionSetStreamEncoder;
import com.bigdata.rdf.sparql.ast.ISolutionSetStats;
import com.bigdata.stream.Stream;
import com.bigdata.striterator.Chunkerator;

import cutthecrap.utils.striterators.ArrayIterator;
import cutthecrap.utils.striterators.Expander;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.Striterator;

/**
 * A persistence capable solution set stored using a stream oriented API. The
 * order of the solutions on playback is their write order. This data structure
 * provides fast read/write performance, but does not provide key-based access
 * into the solution sets.
 * 
 * TODO Test performance with and without gzip. Extract into the CREATE schema /
 * IndexMetadata so we can do this declaratively.
 */
public final class SolutionSetStream extends Stream implements
        ISolutionSet {

    private static final Logger log = Logger.getLogger(SolutionSetStream.class);

    /**
     * Encapsulates the address and the data.
     * 
     * @author Bryan Thompson
     */
    private final class MySolutionSetStats implements ISolutionSetStats {
        
        private ISolutionSetStats delegate;
        private long addr;

        public MySolutionSetStats(final ISolutionSetStats stats) {

            this.delegate = stats;
            
        }
        
        @Override
        public long getSolutionSetSize() {
            return delegate.getSolutionSetSize();
        }

        @Override
        public Set> getUsedVars() {
            return delegate.getUsedVars();
        }

        @Override
        public Set> getAlwaysBound() {
            return delegate.getAlwaysBound();
        }

        @Override
        public Set> getNotAlwaysBound() {
            return delegate.getNotAlwaysBound();
        }

        @Override
        public Set> getMaterialized() {
            return delegate.getMaterialized();
        }

        @Override
        public Map, IConstant> getConstants() {
            return delegate.getConstants();
        }
        
    }
    
    /**
     * The {@link ISolutionSetStats} are collected when the solution are written
     * by {@link #put(ICloseableIterator)}.
     * 
     * @see #needsCheckpoint()
     * @see #flush()
     * 
     *      FIXME GIST : This is hacked into the bloomFilterAddr. It should have
     *      its own address. The Checkpoint class needs a protocol for
     *      populating and reporting fields which are specific to derived
     *      classes, not just BTree, HTree, and Stream. Or we need to add a
     *      general concept of a "summary statistics object" for a persistent
     *      data structure.
     * 
     * @see  GIST
     *      
     */
    private MySolutionSetStats solutionSetStats;
    
    /**
     * Required constructor. This constructor is used both to create a new named
     * solution set, and to load an existing named solution set from the store
     * using a {@link Checkpoint} record.
     * 
     * @param store
     *            The store.
     * @param checkpoint
     *            The {@link Checkpoint} record.
     * @param metadata
     *            The metadata record.
     * @param readOnly
     *            When true the view will be immutable.
     * 
     * @see #create(IRawStore, StreamIndexMetadata)
     * @see #load(IRawStore, long, boolean)
     */
    public SolutionSetStream(final IRawStore store,
            final Checkpoint checkpoint, final IndexMetadata metadata,
            final boolean readOnly) {

        super(store, checkpoint, metadata, readOnly);
        
        /*
         * Note: The SolutionSetStats will be loaded by setCheckpoint().
         */

    }

    /**
     * Create a stream for an ordered solution set.
     * 

* {@inheritDoc} * * FIXME GIST : This is not setting the SolutionSetStream class when invoked * by {@link Checkpoint#create(IRawStore, IndexMetadata)} since * Stream.create() is being invoked rather than SolutionSetStream.create(). * * @see GIST */ public static SolutionSetStream create(final IRawStore store, final StreamIndexMetadata metadata) { /* * Must override the implementation class name: */ metadata.setStreamClassName(SolutionSetStream.class.getName()); return Stream.create(store, metadata); } /** * Return the {@link ISolutionSetStats} for the saved solution set. * * @return The {@link ISolutionSetStats}. */ @Override public ISolutionSetStats getStats() { /* * Note: This field is set by setCheckpoint(). */ return solutionSetStats; } /** * Return the address of the {@link ISolutionSetStats} to be written into the * next {@link Checkpoint} record. The caller must have {@link #flush()} the * {@link SolutionSetStream} as a pre-condition (to ensure that the stats * have been written out). If the {@link ISolutionSetStats} are not loaded, * then the address from the last {@link Checkpoint} record is returned. */ public long getStatsAddr() { if (solutionSetStats != null) { return solutionSetStats.addr; } return getCheckpoint().getBloomFilterAddr(); } @Override public ICloseableIterator get() { if (rootAddr == IRawStore.NULL) throw new IllegalStateException(); // Open input stream reading decompressed data from store. final DataInputStream in = new DataInputStream( wrapInputStream(getStore().getInputStream(rootAddr))); // Wrap with iterator pattern that will decode solutions. final SolutionSetStreamDecoder decoder = new SolutionSetStreamDecoder( metadata.getName(), in, rangeCount()); // Return the iterator to the caller. return decoder; } @Override public void put(final ICloseableIterator src2) { if (src2 == null) throw new IllegalArgumentException(); assertNotReadOnly(); final String name = metadata.getName(); // Address from which the solutions may be read. final long newAddr; // Used to encode the solutions on the stream. final SolutionSetStreamEncoder encoder = new SolutionSetStreamEncoder( name); // Stream writes onto the backing store. final IPSOutputStream out = getStore().getOutputStream(); try { // Wrap with data output stream and compression. final DataOutputStream os = new DataOutputStream( wrapOutputStream(out)); try { // Encode the solutions onto the stream. encoder.encode(os, src2); // Flush the stream. os.flush(); } finally { try { os.close(); } catch (IOException e) { // Unexpected exception. log.error(e, e); } } // Flush out.flush(); // Note address of the stream. newAddr = out.getAddr(); } catch (IOException e) { throw new RuntimeException(e); } finally { try { out.close(); } catch (IOException e) { // Unexpected exception. log.error(e, e); } } if (rootAddr != IRawStore.NULL) { /* * Release the old solution set. */ recycle(rootAddr); } if (solutionSetStats != null && solutionSetStats.addr != IRawStore.NULL) { recycle(solutionSetStats.addr); } rootAddr = newAddr; entryCount = encoder.getSolutionCount(); solutionSetStats = new MySolutionSetStats(encoder.getStats()); fireDirtyEvent(); } /* * ICheckpointProtocol */ @SuppressWarnings("unchecked") @Override public ICloseableIterator scan() { return (ICloseableIterator) new Striterator(get()) .addFilter(new Expander() { /** * */ private static final long serialVersionUID = 1L; @SuppressWarnings("rawtypes") @Override protected Iterator expand(final Object obj) { return new ArrayIterator((IBindingSet[]) obj); } }); } @Override public void clear() { super.clear(); solutionSetStats = null; } @Override protected boolean needsCheckpoint() { if (super.needsCheckpoint()) return true; if (solutionSetStats != null && solutionSetStats.addr != getCheckpoint() .getBloomFilterAddr()) { // The statistics field was updated. return true; } if (solutionSetStats == null && getCheckpoint().getBloomFilterAddr() != IRawStore.NULL) { // The statistics field was cleared. return true; } return false; } @Override protected void flush() { super.flush(); /* * If the solutionSetStats are dirty, then write them out and set the * addr so it will be propagated to the checkpoint record. */ if (solutionSetStats != null && (solutionSetStats.addr == IRawStore.NULL // || solutionSetStats.addr != getCheckpoint().getBloomFilterAddr())// ) { solutionSetStats.addr = getStore() .write(ByteBuffer.wrap(SerializerUtil .serialize(solutionSetStats.delegate))); } } /** * {@inheritDoc} *

* Extended to persist the {@link ISolutionSetStats}. */ @Override protected void setCheckpoint(final Checkpoint checkpoint) { super.setCheckpoint(checkpoint); { final long addr = checkpoint.getBloomFilterAddr(); if(addr != IRawStore.NULL) { this.solutionSetStats = new MySolutionSetStats( (ISolutionSetStats) SerializerUtil .deserialize(getStore().read(addr))); this.solutionSetStats.addr = addr; } } } @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public void write(final ICloseableIterator src) { try { /* * Chunk up the solutions and write them onto the stream. */ put(new Chunkerator(src)); } finally { src.close(); } } @Override public BaseIndexStats dumpPages(final boolean recursive, final boolean visitLeaves) { return new BaseIndexStats(this); } /* * I've commented out the AccessPath and Predicate abstractions for now. * They were not required to implement the SPARQL CACHE mechanism, but * this code might be useful in the future. */ // /** // * Return an access path that can be used to scan the solutions. // * // * @param pred // * ignored. // * // * @return The access path. // * // * TODO FILTERS: (A) Any filter attached to the predicate must be // * applied by the AP to remove solutions which do not satisfy the // * filter; // *

// * (B) When the AP has a filter, then an exact range count must scan // * the solutions to decide how many will match. // */ // public SolutionSetAP getAccessPath(final IPredicate pred) { // // return new SolutionSetAP(this, pred); // // } // // /** // * Class provides basic access path suitable for full scans. // * // * @author Bryan // * Thompson // */ // public static class SolutionSetAP implements // IBindingSetAccessPath { // // private final SolutionSetStream stream; // private final IPredicate predicate; // // public SolutionSetAP(final SolutionSetStream stream, // final IPredicate pred) { // // this.stream = stream; // // this.predicate = pred; // // } // // public SolutionSetStream getStream() { // // return stream; // // } // // @Override // public IPredicate getPredicate() { // // return predicate; // // } // // @Override // public boolean isEmpty() { // // /* // * Note: If we develop streams which can have non-exact range counts // * (due to deleted tuples) then this would have to be revisited. // */ // // return stream.entryCount == 0; // // } // // @Override // public long rangeCount(boolean exact) { // // return stream.entryCount; // // } // // @Override // public ICloseableIterator solutions(final long limit, // final BaseJoinStats stats) { // // if (limit != 0L && limit != Long.MAX_VALUE) // throw new UnsupportedOperationException(); // // final IChunkedIterator itr = new ChunkConsumerIterator( // stream.get()); // // return BOpContext.solutions(itr, predicate, stats); // // } // // @Override // public long removeAll() { // // final long n = stream.rangeCount(); // // stream.removeAll(); // // return n; // // } // // } // // /** // * A predicate that can be used with an {@link ISolutionSet} without having // * to resolve the {@link ISolutionSet} as an {@link IRelation}. // * // * @author Bryan // * Thompson // * @param // */ // public static class SolutionSetStreamPredicate extends // Predicate { // // /** // * // */ // private static final long serialVersionUID = 1L; // // public SolutionSetStreamPredicate(BOp[] args, Map annotations) { // super(args, annotations); // } // // public SolutionSetStreamPredicate(BOp[] args, NV... annotations) { // super(args, annotations); // } // // /** Deep copy constructor. */ // public SolutionSetStreamPredicate(final SolutionSetStreamPredicate op) { // super(op); // } // // /** // * // * @param attributeName // * The name of the query attribute that will be used to // * resolve this solution set. // * @param timestamp // * The timestamp associated with the view. // */ // public SolutionSetStreamPredicate(/*IVariableOrConstant[] values,*/ // final String attributeName,/* int partitionId, boolean optional, */ // /* // * IElementFilter constraint, IAccessPathExpander // * expander, // */ // final long timestamp) { // // super(EMPTY, attributeName/* relationName */, -1/* partitionId */, // false/* optional */, null/* constraint */, // null/* expander */, timestamp); // // } // // } // // private static transient final IVariableOrConstant[] EMPTY = new IVariableOrConstant[0]; }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy