All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.hadoop.hbase.util.byterange.ByteRangeSet Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.hadoop.hbase.util.byterange;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.hbase.classification.InterfaceAudience;
import org.apache.hadoop.hbase.util.ArrayUtils;
import org.apache.hadoop.hbase.util.ByteRange;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.hbase.util.SimpleMutableByteRange;

import com.google.common.collect.Lists;

/**
 * Performance oriented class for de-duping and storing arbitrary byte[]'s arriving in non-sorted
 * order. Appends individual byte[]'s to a single big byte[] to avoid overhead and garbage.
 * 

* Current implementations are {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeHashSet} and * {@link org.apache.hadoop.hbase.util.byterange.impl.ByteRangeTreeSet}, but other options might be a * trie-oriented ByteRangeTrieSet, etc */ @InterfaceAudience.Private public abstract class ByteRangeSet { /******************** fields **********************/ protected byte[] byteAppender; protected int numBytes; protected Map uniqueIndexByUniqueRange; protected ArrayList uniqueRanges; protected int numUniqueRanges = 0; protected int[] uniqueRangeIndexByInsertionId; protected int numInputs; protected List sortedIndexByUniqueIndex; protected int[] sortedIndexByInsertionId; protected ArrayList sortedRanges; /****************** construct **********************/ protected ByteRangeSet() { this.byteAppender = new byte[0]; this.uniqueRanges = Lists.newArrayList(); this.uniqueRangeIndexByInsertionId = new int[0]; this.sortedIndexByUniqueIndex = Lists.newArrayList(); this.sortedIndexByInsertionId = new int[0]; this.sortedRanges = Lists.newArrayList(); } public void reset() { numBytes = 0; uniqueIndexByUniqueRange.clear(); numUniqueRanges = 0; numInputs = 0; sortedIndexByUniqueIndex.clear(); sortedRanges.clear(); } /*************** abstract *************************/ public abstract void addToSortedRanges(); /**************** methods *************************/ /** * Check if the incoming byte range exists. If not, add it to the backing byteAppender[] and * insert it into the tracking Map uniqueIndexByUniqueRange. */ public void add(ByteRange bytes) { Integer index = uniqueIndexByUniqueRange.get(bytes); if (index == null) { index = store(bytes); } int minLength = numInputs + 1; uniqueRangeIndexByInsertionId = ArrayUtils.growIfNecessary(uniqueRangeIndexByInsertionId, minLength, 2 * minLength); uniqueRangeIndexByInsertionId[numInputs] = index; ++numInputs; } protected int store(ByteRange bytes) { int indexOfNewElement = numUniqueRanges; if (uniqueRanges.size() <= numUniqueRanges) { uniqueRanges.add(new SimpleMutableByteRange()); } ByteRange storedRange = uniqueRanges.get(numUniqueRanges); int neededBytes = numBytes + bytes.getLength(); byteAppender = ArrayUtils.growIfNecessary(byteAppender, neededBytes, 2 * neededBytes); bytes.deepCopyTo(byteAppender, numBytes); storedRange.set(byteAppender, numBytes, bytes.getLength());// this isn't valid yet numBytes += bytes.getLength(); uniqueIndexByUniqueRange.put(storedRange, indexOfNewElement); int newestUniqueIndex = numUniqueRanges; ++numUniqueRanges; return newestUniqueIndex; } public ByteRangeSet compile() { addToSortedRanges(); for (int i = 0; i < sortedRanges.size(); ++i) { sortedIndexByUniqueIndex.add(null);// need to grow the size } // TODO move this to an invert(int[]) util method for (int i = 0; i < sortedIndexByUniqueIndex.size(); ++i) { int uniqueIndex = uniqueIndexByUniqueRange.get(sortedRanges.get(i)); sortedIndexByUniqueIndex.set(uniqueIndex, i); } sortedIndexByInsertionId = ArrayUtils.growIfNecessary(sortedIndexByInsertionId, numInputs, numInputs); for (int i = 0; i < numInputs; ++i) { int uniqueRangeIndex = uniqueRangeIndexByInsertionId[i]; int sortedIndex = sortedIndexByUniqueIndex.get(uniqueRangeIndex); sortedIndexByInsertionId[i] = sortedIndex; } return this; } public int getSortedIndexForInsertionId(int insertionId) { return sortedIndexByInsertionId[insertionId]; } public int size() { return uniqueIndexByUniqueRange.size(); } /***************** standard methods ************************/ @Override public String toString() { StringBuilder sb = new StringBuilder(); int i = 0; for (ByteRange r : sortedRanges) { if (i > 0) { sb.append("\n"); } sb.append(i + " " + Bytes.toStringBinary(r.deepCopyToNewArray())); ++i; } sb.append("\ntotalSize:" + numBytes); sb.append("\navgSize:" + getAvgSize()); return sb.toString(); } /**************** get/set *****************************/ public ArrayList getSortedRanges() { return sortedRanges; } public long getAvgSize() { return numBytes / numUniqueRanges; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy