com.bigdata.bop.solutions.SolutionSetStream Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Apr 9, 2012
*/
package com.bigdata.bop.solutions;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import org.apache.log4j.Logger;
import com.bigdata.bop.IBindingSet;
import com.bigdata.bop.IConstant;
import com.bigdata.bop.IVariable;
import com.bigdata.btree.BaseIndexStats;
import com.bigdata.btree.Checkpoint;
import com.bigdata.btree.IndexMetadata;
import com.bigdata.io.SerializerUtil;
import com.bigdata.rawstore.IPSOutputStream;
import com.bigdata.rawstore.IRawStore;
import com.bigdata.rdf.internal.encoder.SolutionSetStreamDecoder;
import com.bigdata.rdf.internal.encoder.SolutionSetStreamEncoder;
import com.bigdata.rdf.sparql.ast.ISolutionSetStats;
import com.bigdata.stream.Stream;
import com.bigdata.striterator.Chunkerator;
import cutthecrap.utils.striterators.ArrayIterator;
import cutthecrap.utils.striterators.Expander;
import cutthecrap.utils.striterators.ICloseableIterator;
import cutthecrap.utils.striterators.Striterator;
/**
* A persistence capable solution set stored using a stream oriented API. The
* order of the solutions on playback is their write order. This data structure
* provides fast read/write performance, but does not provide key-based access
* into the solution sets.
*
* TODO Test performance with and without gzip. Extract into the CREATE schema /
* IndexMetadata so we can do this declaratively.
*/
public final class SolutionSetStream extends Stream implements
ISolutionSet {
private static final Logger log = Logger.getLogger(SolutionSetStream.class);
/**
* Encapsulates the address and the data.
*
* @author Bryan Thompson
*/
private final class MySolutionSetStats implements ISolutionSetStats {
private ISolutionSetStats delegate;
private long addr;
public MySolutionSetStats(final ISolutionSetStats stats) {
this.delegate = stats;
}
@Override
public long getSolutionSetSize() {
return delegate.getSolutionSetSize();
}
@Override
public Set> getUsedVars() {
return delegate.getUsedVars();
}
@Override
public Set> getAlwaysBound() {
return delegate.getAlwaysBound();
}
@Override
public Set> getNotAlwaysBound() {
return delegate.getNotAlwaysBound();
}
@Override
public Set> getMaterialized() {
return delegate.getMaterialized();
}
@Override
public Map, IConstant>> getConstants() {
return delegate.getConstants();
}
}
/**
* The {@link ISolutionSetStats} are collected when the solution are written
* by {@link #put(ICloseableIterator)}.
*
* @see #needsCheckpoint()
* @see #flush()
*
* FIXME GIST : This is hacked into the bloomFilterAddr. It should have
* its own address. The Checkpoint class needs a protocol for
* populating and reporting fields which are specific to derived
* classes, not just BTree, HTree, and Stream. Or we need to add a
* general concept of a "summary statistics object" for a persistent
* data structure.
*
* @see GIST
*
*/
private MySolutionSetStats solutionSetStats;
/**
* Required constructor. This constructor is used both to create a new named
* solution set, and to load an existing named solution set from the store
* using a {@link Checkpoint} record.
*
* @param store
* The store.
* @param checkpoint
* The {@link Checkpoint} record.
* @param metadata
* The metadata record.
* @param readOnly
* When true
the view will be immutable.
*
* @see #create(IRawStore, StreamIndexMetadata)
* @see #load(IRawStore, long, boolean)
*/
public SolutionSetStream(final IRawStore store,
final Checkpoint checkpoint, final IndexMetadata metadata,
final boolean readOnly) {
super(store, checkpoint, metadata, readOnly);
/*
* Note: The SolutionSetStats will be loaded by setCheckpoint().
*/
}
/**
* Create a stream for an ordered solution set.
*
* {@inheritDoc}
*
* FIXME GIST : This is not setting the SolutionSetStream class when invoked
* by {@link Checkpoint#create(IRawStore, IndexMetadata)} since
* Stream.create() is being invoked rather than SolutionSetStream.create().
*
* @see GIST
*/
public static SolutionSetStream create(final IRawStore store,
final StreamIndexMetadata metadata) {
/*
* Must override the implementation class name:
*/
metadata.setStreamClassName(SolutionSetStream.class.getName());
return Stream.create(store, metadata);
}
/**
* Return the {@link ISolutionSetStats} for the saved solution set.
*
* @return The {@link ISolutionSetStats}.
*/
@Override
public ISolutionSetStats getStats() {
/*
* Note: This field is set by setCheckpoint().
*/
return solutionSetStats;
}
/**
* Return the address of the {@link ISolutionSetStats} to be written into the
* next {@link Checkpoint} record. The caller must have {@link #flush()} the
* {@link SolutionSetStream} as a pre-condition (to ensure that the stats
* have been written out). If the {@link ISolutionSetStats} are not loaded,
* then the address from the last {@link Checkpoint} record is returned.
*/
public long getStatsAddr() {
if (solutionSetStats != null) {
return solutionSetStats.addr;
}
return getCheckpoint().getBloomFilterAddr();
}
@Override
public ICloseableIterator get() {
if (rootAddr == IRawStore.NULL)
throw new IllegalStateException();
// Open input stream reading decompressed data from store.
final DataInputStream in = new DataInputStream(
wrapInputStream(getStore().getInputStream(rootAddr)));
// Wrap with iterator pattern that will decode solutions.
final SolutionSetStreamDecoder decoder = new SolutionSetStreamDecoder(
metadata.getName(), in, rangeCount());
// Return the iterator to the caller.
return decoder;
}
@Override
public void put(final ICloseableIterator src2) {
if (src2 == null)
throw new IllegalArgumentException();
assertNotReadOnly();
final String name = metadata.getName();
// Address from which the solutions may be read.
final long newAddr;
// Used to encode the solutions on the stream.
final SolutionSetStreamEncoder encoder = new SolutionSetStreamEncoder(
name);
// Stream writes onto the backing store.
final IPSOutputStream out = getStore().getOutputStream();
try {
// Wrap with data output stream and compression.
final DataOutputStream os = new DataOutputStream(
wrapOutputStream(out));
try {
// Encode the solutions onto the stream.
encoder.encode(os, src2);
// Flush the stream.
os.flush();
} finally {
try {
os.close();
} catch (IOException e) {
// Unexpected exception.
log.error(e, e);
}
}
// Flush
out.flush();
// Note address of the stream.
newAddr = out.getAddr();
} catch (IOException e) {
throw new RuntimeException(e);
} finally {
try {
out.close();
} catch (IOException e) {
// Unexpected exception.
log.error(e, e);
}
}
if (rootAddr != IRawStore.NULL) {
/*
* Release the old solution set.
*/
recycle(rootAddr);
}
if (solutionSetStats != null && solutionSetStats.addr != IRawStore.NULL) {
recycle(solutionSetStats.addr);
}
rootAddr = newAddr;
entryCount = encoder.getSolutionCount();
solutionSetStats = new MySolutionSetStats(encoder.getStats());
fireDirtyEvent();
}
/*
* ICheckpointProtocol
*/
@SuppressWarnings("unchecked")
@Override
public ICloseableIterator scan() {
return (ICloseableIterator) new Striterator(get())
.addFilter(new Expander() {
/**
*
*/
private static final long serialVersionUID = 1L;
@SuppressWarnings("rawtypes")
@Override
protected Iterator expand(final Object obj) {
return new ArrayIterator((IBindingSet[]) obj);
}
});
}
@Override
public void clear() {
super.clear();
solutionSetStats = null;
}
@Override
protected boolean needsCheckpoint() {
if (super.needsCheckpoint())
return true;
if (solutionSetStats != null
&& solutionSetStats.addr != getCheckpoint()
.getBloomFilterAddr()) {
// The statistics field was updated.
return true;
}
if (solutionSetStats == null
&& getCheckpoint().getBloomFilterAddr() != IRawStore.NULL) {
// The statistics field was cleared.
return true;
}
return false;
}
@Override
protected void flush() {
super.flush();
/*
* If the solutionSetStats are dirty, then write them out and set the
* addr so it will be propagated to the checkpoint record.
*/
if (solutionSetStats != null
&& (solutionSetStats.addr == IRawStore.NULL //
|| solutionSetStats.addr != getCheckpoint().getBloomFilterAddr())//
) {
solutionSetStats.addr = getStore()
.write(ByteBuffer.wrap(SerializerUtil
.serialize(solutionSetStats.delegate)));
}
}
/**
* {@inheritDoc}
*
* Extended to persist the {@link ISolutionSetStats}.
*/
@Override
protected void setCheckpoint(final Checkpoint checkpoint) {
super.setCheckpoint(checkpoint);
{
final long addr = checkpoint.getBloomFilterAddr();
if(addr != IRawStore.NULL) {
this.solutionSetStats = new MySolutionSetStats(
(ISolutionSetStats) SerializerUtil
.deserialize(getStore().read(addr)));
this.solutionSetStats.addr = addr;
}
}
}
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public void write(final ICloseableIterator> src) {
try {
/*
* Chunk up the solutions and write them onto the stream.
*/
put(new Chunkerator(src));
} finally {
src.close();
}
}
@Override
public BaseIndexStats dumpPages(final boolean recursive,
final boolean visitLeaves) {
return new BaseIndexStats(this);
}
/*
* I've commented out the AccessPath and Predicate abstractions for now.
* They were not required to implement the SPARQL CACHE mechanism, but
* this code might be useful in the future.
*/
// /**
// * Return an access path that can be used to scan the solutions.
// *
// * @param pred
// * ignored.
// *
// * @return The access path.
// *
// * TODO FILTERS: (A) Any filter attached to the predicate must be
// * applied by the AP to remove solutions which do not satisfy the
// * filter;
// *
// * (B) When the AP has a filter, then an exact range count must scan
// * the solutions to decide how many will match.
// */
// public SolutionSetAP getAccessPath(final IPredicate pred) {
//
// return new SolutionSetAP(this, pred);
//
// }
//
// /**
// * Class provides basic access path suitable for full scans.
// *
// * @author Bryan
// * Thompson
// */
// public static class SolutionSetAP implements
// IBindingSetAccessPath {
//
// private final SolutionSetStream stream;
// private final IPredicate predicate;
//
// public SolutionSetAP(final SolutionSetStream stream,
// final IPredicate pred) {
//
// this.stream = stream;
//
// this.predicate = pred;
//
// }
//
// public SolutionSetStream getStream() {
//
// return stream;
//
// }
//
// @Override
// public IPredicate getPredicate() {
//
// return predicate;
//
// }
//
// @Override
// public boolean isEmpty() {
//
// /*
// * Note: If we develop streams which can have non-exact range counts
// * (due to deleted tuples) then this would have to be revisited.
// */
//
// return stream.entryCount == 0;
//
// }
//
// @Override
// public long rangeCount(boolean exact) {
//
// return stream.entryCount;
//
// }
//
// @Override
// public ICloseableIterator solutions(final long limit,
// final BaseJoinStats stats) {
//
// if (limit != 0L && limit != Long.MAX_VALUE)
// throw new UnsupportedOperationException();
//
// final IChunkedIterator itr = new ChunkConsumerIterator(
// stream.get());
//
// return BOpContext.solutions(itr, predicate, stats);
//
// }
//
// @Override
// public long removeAll() {
//
// final long n = stream.rangeCount();
//
// stream.removeAll();
//
// return n;
//
// }
//
// }
//
// /**
// * A predicate that can be used with an {@link ISolutionSet} without having
// * to resolve the {@link ISolutionSet} as an {@link IRelation}.
// *
// * @author Bryan
// * Thompson
// * @param
// */
// public static class SolutionSetStreamPredicate extends
// Predicate {
//
// /**
// *
// */
// private static final long serialVersionUID = 1L;
//
// public SolutionSetStreamPredicate(BOp[] args, Map annotations) {
// super(args, annotations);
// }
//
// public SolutionSetStreamPredicate(BOp[] args, NV... annotations) {
// super(args, annotations);
// }
//
// /** Deep copy constructor. */
// public SolutionSetStreamPredicate(final SolutionSetStreamPredicate op) {
// super(op);
// }
//
// /**
// *
// * @param attributeName
// * The name of the query attribute that will be used to
// * resolve this solution set.
// * @param timestamp
// * The timestamp associated with the view.
// */
// public SolutionSetStreamPredicate(/*IVariableOrConstant>[] values,*/
// final String attributeName,/* int partitionId, boolean optional, */
// /*
// * IElementFilter constraint, IAccessPathExpander
// * expander,
// */
// final long timestamp) {
//
// super(EMPTY, attributeName/* relationName */, -1/* partitionId */,
// false/* optional */, null/* constraint */,
// null/* expander */, timestamp);
//
// }
//
// }
//
// private static transient final IVariableOrConstant>[] EMPTY = new IVariableOrConstant[0];
}