org.apache.hadoop.hbase.util.byterange.ByteRangeSet Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.hadoop.hbase.util.byterange;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;
import com.google.common.collect.Lists;
/**
* Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
* order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
*
* Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and
* {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a
* trie-oriented ByteRangeTrieSet, etc
*/
@InterfaceAudience.Private
public abstract class ByteRangeSet {
/******************** fields **********************/
protected byte[] byteAppender;
protected int numBytes;
protected Map uniqueIndexByUniqueRange;
protected ArrayList uniqueRanges;
protected int numUniqueRanges = 0;
protected int[] uniqueRangeIndexByInsertionId;
protected int numInputs;
protected List sortedIndexByUniqueIndex;
protected int[] sortedIndexByInsertionId;
protected ArrayList sortedRanges;
/****************** construct **********************/
protected ByteRangeSet() {
this.byteAppender = new byte[0];
this.uniqueRanges = Lists.newArrayList();
this.uniqueRangeIndexByInsertionId = new int[0];
this.sortedIndexByUniqueIndex = Lists.newArrayList();
this.sortedIndexByInsertionId = new int[0];
this.sortedRanges = Lists.newArrayList();
}
public void reset() {
numBytes = 0;
uniqueIndexByUniqueRange.clear();
numUniqueRanges = 0;
numInputs = 0;
sortedIndexByUniqueIndex.clear();
sortedRanges.clear();
}
/*************** abstract *************************/
public abstract void addToSortedRanges();
/**************** methods *************************/
/**
* Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and
* insert it into the tracking Map uniqueIndexByUniqueRange.
*/
public void add(ByteRange bytes) {
Integer index = uniqueIndexByUniqueRange.get(bytes);
if (index == null) {
index = store(bytes);
}
int minLength = numInputs + 1;
uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId,
minLength, 2 * minLength);
uniqueRangeIndexByInsertionId[numInputs] = index;
++numInputs;
}
protected int store(ByteRange bytes) {
int indexOfNewElement = numUniqueRanges;
if (uniqueRanges.size() <= numUniqueRanges) {
uniqueRanges.add(new SimpleMutableByteRange());
}
ByteRange storedRange = uniqueRanges.get(numUniqueRanges);
int neededBytes = numBytes + bytes.getLength();
byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes);
bytes.deepCopyTo(byteAppender, numBytes);
storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet
numBytes += bytes.getLength();
uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement);
int newestUniqueIndex = numUniqueRanges;
++numUniqueRanges;
return newestUniqueIndex;
}
public ByteRangeSet compile() {
addToSortedRanges();
for (int i = 0; i < sortedRanges.size(); ++i) {
sortedIndexByUniqueIndex.add(null);// need to grow the size
}
// TODO move this to an invert(int[]) util method
for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) {
int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i));
sortedIndexByUniqueIndex.set(uniqueIndex, i);
}
sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs,
numInputs);
for (int i = 0; i < numInputs; ++i) {
int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i];
int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex);
sortedIndexByInsertionId[i] = sortedIndex;
}
return this;
}
public int getSortedIndexForInsertionId(int insertionId) {
return sortedIndexByInsertionId[insertionId];
}
public int size() {
return uniqueIndexByUniqueRange.size();
}
/***************** standard methods ************************/
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
int i = 0;
for (ByteRange r : sortedRanges) {
if (i > 0) {
sb.append("\n");
}
sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray()));
++i;
}
sb.append("\ntotalSize:" + numBytes);
sb.append("\navgSize:" + getAvgSize());
return sb.toString();
}
/**************** get/set *****************************/
public ArrayList getSortedRanges() {
return sortedRanges;
}
public long getAvgSize() {
return numBytes / numUniqueRanges;
}
}