com.bigdata.rdf.spo.XXXCShardSplitHandler Maven / Gradle / Ivy
/**
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on Oct 4, 2010
*/
package com.bigdata.rdf.spo;
import java.io.Serializable;
import org.apache.log4j.Logger;
import com.bigdata.btree.ILinearList;
import com.bigdata.btree.ISimpleSplitHandler;
import com.bigdata.btree.IndexSegment;
import com.bigdata.btree.keys.IKeyBuilder;
import com.bigdata.btree.keys.KeyBuilder;
import com.bigdata.btree.keys.SuccessorUtil;
import com.bigdata.rdf.internal.IV;
import com.bigdata.rdf.internal.IVUtility;
/**
* A split handler for the xxxC quads indices which ensures that all quads for
* the same "triple" are in the same shard. The use of this split handler allows
* optimizations for default graph queries in scale-out. Since all quads for a
* given triple are known to be on the same shard, we can use the
* {@link ContextAdvancer} to skip to the next possible triple without imposing
* a "DISTINCT" filter.
*
* @author Bryan Thompson
* @version $Id$
*/
public class XXXCShardSplitHandler implements ISimpleSplitHandler, Serializable {
/**
*
*/
private static final long serialVersionUID = 1L;
private static final transient Logger log = Logger
.getLogger(XXXCShardSplitHandler.class);
/**
* Return a separator key which does not split quads for the same triple
* prefix.
*
* A "quad" key is formed by an IV[4]
array. For an index whose
* last key component is "C" (context), the first three key components
* identify some permutation of a triple. This will return a separator key
* such that all "quads" for a given triple are on the same shard.
*
* On entry, splitAt will be the index of some quads key. From that
* key, this method generates (a) the key corresponding to that triple from
* IV[3]
; and (b) the key corresponding to the next triple from
* successor(IV[3])
.
*
* The {@link ILinearList#indexOf(byte[])} positions of the two keys are
* considered within the index segment. The key is returned whose index
* position is closest to the given splitAt index. If neither of
* these two keys lies within the allowable half-open range
* (fromIndex:toIndex])
then an error message is logged and
* null
is returned, indicating that the {@link IndexSegment}
* should not be split.
*/
public byte[] getSeparatorKey(final IndexSegment seg, final int fromIndex,
final int toIndex, final int splitAt) {
// extract the key.
final byte[] key = seg.keyAt(splitAt);
// decode the first three components of the key.
@SuppressWarnings("unchecked")
final IV[] terms = IVUtility.decode(key, 3/* nterms */);
// key builder.
final IKeyBuilder keyBuilder = KeyBuilder.newInstance(64/* capacity */);
// encode the first three components of the key.
IVUtility.encode(keyBuilder, terms[0]);
IVUtility.encode(keyBuilder, terms[1]);
IVUtility.encode(keyBuilder, terms[2]);
// obtain the key for that triple prefix.
final byte[] fromKey = keyBuilder.getKey();
/*
* The index position of [fromKey] in the segment.
*
* Note: The [fromKey] is a triple and the index contains quads so
* indexOf() will return an insertion point which we convert to an
* index. That index will be a quad actually present in the B+Tree.
*/
final long fromKeyAt = -(seg.indexOf(fromKey)) - 1;
// obtain the successor of the key (next possible triple prefix).
final byte[] toKey = SuccessorUtil.successor(fromKey.clone());
/*
* The index position of [toKey] in the segment.
*
* Note: The [toKey] is a triple and the index contains quads so
* indexOf() will return an insertion point which we convert to an
* index. That index will be a quad actually present in the B+Tree
* unless it is beyond the end of the B+Tree.
*/
final long toKeyAt = -(seg.indexOf(toKey)) - 1;
/*
* [splitAt] is the index of a quad. fromKey must be LTE that quad.
* toKey must be GT that quad.
*/
assert fromKeyAt <= splitAt;
assert splitAt < toKeyAt;
final long nbefore = splitAt - fromKeyAt;
final long nafter = toKeyAt - splitAt;
final boolean useFromKey = nbefore < nafter && fromKeyAt >= fromIndex;
final boolean useToKey = toKeyAt < toIndex;
final byte[] separatorKey = useFromKey ? fromKey : useToKey ? toKey
: null;
// System.err.println("Given: {splitAt=" + splitAt + ", fromIndex="
// + fromIndex + ", toIndex=" + toIndex + "}");
// System.err.println("Found: {fromKeyAt=" + fromKeyAt + ", useFromKey="
// + useFromKey + ", toKeyAt=" + toKeyAt + ", useToKey="
// + useToKey + "}");
// System.err.println("splitAt: index=" + splitAt + ", key="
// + BytesUtil.toString(key));
// System.err.println("fromKey: index=" + fromKeyAt + ", key="
// + BytesUtil.toString(fromKey));
// System.err.println("toKey : index=" + toKeyAt + ", key="
// + BytesUtil.toString(toKey));
// System.err.println("chosen : index="
// + (useFromKey ? fromKeyAt : useToKey ? toKeyAt : "n/a")
// + ", key=" + BytesUtil.toString(separatorKey));
// for (int i = splitAt - 10; i < splitAt + 10; i++) {
// System.err.println("tuples : index=" + i + ", key="
// + BytesUtil.toString(seg.keyAt(i)));
// }
if (separatorKey == null)
log.error("Could not split shard: name="
+ seg.getIndexMetadata().getName()
+ ", partitionId="
+ seg.getIndexMetadata().getPartitionMetadata()
.getPartitionId());
return separatorKey;
}
}