com.bigdata.service.ndx.AbstractSplitter Maven / Gradle / Ivy
/*
Copyright (C) SYSTAP, LLC DBA Blazegraph 2006-2016. All rights reserved.
Contact:
SYSTAP, LLC DBA Blazegraph
2501 Calvert ST NW #106
Washington, DC 20008
[email protected]
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; version 2 of the License.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
*/
/*
* Created on May 7, 2009
*/
package com.bigdata.service.ndx;
import java.util.Arrays;
import java.util.LinkedList;
import org.apache.log4j.Logger;
import com.bigdata.btree.keys.KVO;
import com.bigdata.mdi.IMetadataIndex;
import com.bigdata.mdi.PartitionLocator;
import com.bigdata.service.IMetadataService;
import com.bigdata.service.Split;
import com.bigdata.util.BytesUtil;
/**
* Basic implementation - you only need to provide resolution for the
* {@link IMetadataIndex}.
*
* @author Bryan Thompson
* @version $Id$
*/
abstract public class AbstractSplitter implements ISplitter {
protected static final transient Logger log = Logger.getLogger(AbstractSplitter.class);
/**
* Return the {@link IMetadataIndex} that will be used to compute the
* {@link Split}s
*
* @param ts
* The timestamp of the {@link IMetadataIndex} view.
*
* @return The {@link IMetadataIndex}.
*/
protected abstract IMetadataIndex getMetadataIndex(long ts);
public AbstractSplitter() {
}
/**
* {@inheritDoc}
*
* Find the partition for the first key. Check the last key, if it is in the
* same partition then then this is the simplest case and we can just send
* the data along.
*
* Otherwise, perform a binary search on the remaining keys looking for the
* index of the first key GTE the right separator key for that partition.
* The batch for this partition is formed from all keys from the first key
* for that partition up to but excluding the index position identified by
* the binary search (if there is a match; if there is a miss, then the
* binary search result needs to be converted into a key index and that will
* be the last key for the current partition).
*
* Examine the next key and repeat the process until all keys have been
* allocated to index partitions.
*
* Note: Split points MUST respect the "row" identity for a sparse row
* store, but we get that constraint by maintaining the index partition
* boundaries in agreement with the split point constraints for the index.
*
* Note: The splitter always detect keys out of order and will throw an
* {@link IllegalArgumentException}. This is done since it is otherwise too
* easy for applications to produce unordered data which would then quietly
* violate this expectation if we relied on asserts.
*
* @return The list of {@link Split}s. Each {@link Split#pmd} pairs the keys
* in the (fromIndex,toIndex] range with a {@link PartitionLocator}.
*
* @see Arrays#sort(Object[], int, int, java.util.Comparator)
*
* @see BytesUtil#compareBytes(byte[], byte[])
*
* @todo Caching? This procedure performs the minimum #of lookups using
* {@link IMetadataIndex#find(byte[])} since that operation will be an
* RMI in a distributed federation. The find(byte[] key) operation is
* difficult to cache since it locates the index partition that would
* span the key and many, many different keys could fit into that same
* index partition. The only effective cache technique may be an LRU
* that scans ~10 caches locators to see if any of them is a match
* before reaching out to the remote {@link IMetadataService}. Or
* perhaps the locators can be cached in a local data structure and a
* miss there would result in a read through to the remote
* {@link IMetadataService} but then we have the problem of figuring
* out when to release locators if the client is long-lived.
*/
public LinkedList splitKeys(final long ts, final int fromIndex,
final int toIndex, final byte[][] keys) {
if (keys == null)
throw new IllegalArgumentException();
if (fromIndex < 0)
throw new IllegalArgumentException();
if (fromIndex >= toIndex)
throw new IllegalArgumentException();
if (toIndex > keys.length)
throw new IllegalArgumentException();
final LinkedList splits = new LinkedList();
// start w/ the first key.
int currentIndex = fromIndex;
byte[] lastKey = null;
while (currentIndex < toIndex) {
final byte[] key = keys[currentIndex];
if (key == null) {
throw new IllegalArgumentException("null @ index="
+ currentIndex);
}
if (lastKey != null && BytesUtil.compareBytes(lastKey, key) > 0) {
/*
* Make sure that the keys are ordered.
*
* Note: We do allow duplicate keys since that is common when
* writes are combined on an asynchronous write pipeline but
* duplicate detection can not be enabled. E.g., TERM2ID which
* uses KVOLatch.
*/
throw new IllegalArgumentException("keys out of order @ index="
+ currentIndex + " : lastKey="
+ BytesUtil.toString(lastKey) + ", thisKey="
+ BytesUtil.toString(key));
}
// update before we go any further.
lastKey = key;
/*
* This is partition spanning the current key (RMI)
*
* Note: Using the caller's timestamp here!
*/
final PartitionLocator locator = getMetadataIndex(ts).find(key);
if (locator == null)
throw new RuntimeException("No index partitions?");
final byte[] rightSeparatorKey = locator.getRightSeparatorKey();
if (rightSeparatorKey == null) {
/*
* The last index partition does not have an upper bound and
* will absorb any keys that order GTE to its left separator
* key.
*/
isValidSplit( locator, currentIndex, toIndex, keys );
splits.add(new Split(locator, currentIndex, toIndex));
// done.
currentIndex = toIndex;
} else {
/*
* Otherwise this partition has an upper bound, so figure out
* the index of the last key that would go into this partition.
*
* We do this by searching for the rightSeparator of the index
* partition itself.
*/
int pos = BytesUtil.binarySearch(keys, currentIndex, toIndex
- currentIndex, rightSeparatorKey);
if (pos >= 0) {
/*
* There is a hit on the rightSeparator key. The index
* returned by the binarySearch is the exclusive upper bound
* for the split. The key at that index is excluded from the
* split - it will be the first key in the next split.
*
* Note: There is a special case when the keys[] includes
* duplicates of the key that corresponds to the
* rightSeparator. This causes a problem where the
* binarySearch returns the index of ONE of the keys that is
* equal to the rightSeparator key and we need to back up
* until we have found the FIRST ONE.
*
* Note: The behavior of the binarySearch is effectively
* under-defined here and sometimes it will return the index
* of the first key EQ to the rightSeparator while at other
* times it will return the index of the second or greater
* key that is EQ to the rightSeparatoer.
*/
while (pos > currentIndex) {
if (BytesUtil.bytesEqual(keys[pos - 1],
rightSeparatorKey)) {
// keep backing up.
pos--;
continue;
}
break;
}
if (log.isDebugEnabled())
log.debug("Exact match on rightSeparator: pos=" + pos
+ ", key=" + BytesUtil.toString(keys[pos]));
} else if (pos < 0) {
/*
* There is a miss on the rightSeparator key (it is not
* present in the keys that are being split). In this case
* the binary search returns the insertion point. We then
* compute the exclusive upper bound from the insertion
* point.
*/
pos = -pos - 1;
assert pos > currentIndex && pos <= toIndex : "Expected pos in ["
+ currentIndex + ":" + toIndex + ") but pos=" + pos;
}
/*
* Note: this test can be enabled if you are having problems
* with KeyAfterPartition or KeyBeforePartition. It will go
* through more effort to validate the constraints on the split.
* However, due to the additional byte[] comparisons, this
* SHOULD be disabled except when tracking a bug.
*/
// assert validSplit( locator, currentIndex, pos, keys );
splits.add(new Split(locator, currentIndex, pos));
currentIndex = pos;
}
}
return splits;
}
/**
* Reshape the data into an unsigned byte[][] and then invoke
* {@link #splitKeys(long, int, int, byte[][])}.
*/
public LinkedList splitKeys(final long ts, final int fromIndex,
final int toIndex, final KVO[] a) {
/*
* Change the shape of the data so that we can split it.
*/
final byte[][] keys = new byte[a.length][];
for (int i = 0; i < a.length; i++) {
keys[i] = a[i].key;
}
return splitKeys(ts, fromIndex, toIndex, keys);
}
/**
* Paranoia testing for generated splits.
*
* @param locator
* @param fromIndex
* @param toIndex
* @param keys
* @return
*/
private boolean isValidSplit(final PartitionLocator locator,
final int fromIndex, final int toIndex, final byte[][] keys) {
assert fromIndex <= toIndex : "fromIndex=" + fromIndex + ", toIndex="
+ toIndex;
assert fromIndex >= 0 : "fromIndex=" + fromIndex;
assert toIndex <= keys.length : "toIndex=" + toIndex + ", keys.length="
+ keys.length;
// begin with the left separator on the index partition.
byte[] lastKey = locator.getLeftSeparatorKey();
assert lastKey != null;
for (int i = fromIndex; i < toIndex; i++) {
final byte[] key = keys[i];
assert key != null;
if (lastKey != null) {
final int ret = BytesUtil.compareBytes(lastKey, key);
if (ret > 0)
throw new IllegalArgumentException("keys out of order: i="
+ i + ", lastKey=" + BytesUtil.toString(lastKey)
+ ", key=" + BytesUtil.toString(key)
// + ", keys=" + BytesUtil.toString(keys)
);
}
lastKey = key;
}
// Note: Must be strictly LT the rightSeparator key (when present).
{
final byte[] key = locator.getRightSeparatorKey();
if (key != null) {
final int ret = BytesUtil.compareBytes(lastKey, key);
if (ret >= 0)
throw new IllegalArgumentException(
"keys out of order: lastKey="
+ BytesUtil.toString(lastKey)
+ ", rightSeparator="
+ BytesUtil.toString(key)
// +", keys="+BytesUtil.toString(keys)
);
}
}
return true;
}
}