Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.page.bmap;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.DataPageHashSubPageImpl;
import org.apache.flink.runtime.state.gemini.engine.page.compress.GCompressAlgorithm;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.rm.GUnPooledByteBuffer;
import org.apache.flink.util.MathUtils;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import static org.apache.flink.runtime.state.gemini.engine.page.bmap.AbstractGRoutingValue.GROUTING_INFO_HEADER_LENGTH;
import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap.EMPTY_G_BINARY_HASHMAP;
/**
* The helper class for hash map split.
*/
public class SplitHashMapValueHelper {
/**
* return a GBinaryHashMap that isn't split, or return a GBinaryHashMap that indicates the routing info of the split maps.
*/
public static GBinaryHashMap trySplit(
DataPage.DataPageType dataPageType,
List>> keyValueList,
TypeSerializer keySerializer,
TypeSerializer valueSerializer,
long version,
int logicPageId,
Allocator allocator,
long compactionCount,
GCompressAlgorithm gCompressAlgorithm,
GBufferAddressMapping mapping,
int mapSplitSubMapSize,
int mapSplitMinKeyNum) {
int totalKeys = keyValueList.size();
if (totalKeys == 0) {
return EMPTY_G_BINARY_HASHMAP;
}
List>>> subMapList = splitGSValueMap(
keyValueList,
keySerializer,
valueSerializer,
mapSplitMinKeyNum,
mapSplitSubMapSize);
if (subMapList.size() == 1) { //no need split
return GBinaryHashMap.of(dataPageType,
keyValueList,
keySerializer,
valueSerializer,
version,
logicPageId,
allocator,
compactionCount,
gCompressAlgorithm);
}
int[] subMapIdList = new int[subMapList.size()];
int subMapMaxSize = 0;
for (int i = 0; i < subMapList.size(); i++) {
GBinaryHashMap subGBinaryHashMap = GBinaryHashMap.of(dataPageType,
subMapList.get(i),
keySerializer,
valueSerializer,
version,
logicPageId,
allocator,
compactionCount,
gCompressAlgorithm);
int subMapId = mapping.putGByteBufferAddress(new DataPageHashSubPageImpl(subGBinaryHashMap));
subMapIdList[i] = subMapId;
subMapMaxSize = Math.max(subGBinaryHashMap.bytesSize(), subMapMaxSize);
}
int splitMapIndexLen = 0;
GHashHeaderImpl pageHelper = GHashHeaderImpl.getPageHelper(splitMapIndexLen);
GByteBuffer gByteBuffer = genRoutingValueForSplitHashMap(subMapIdList, logicPageId, allocator, subMapMaxSize);
return new GBinaryHashMap<>(pageHelper, gByteBuffer, keySerializer);
}
public static GByteBuffer genRoutingValueForSplitHashMap(
int[] subMapIdArray, int logicPageId, Allocator allocator, int subMapMaxSize) {
if (subMapMaxSize <= 0) {
return null;
}
List subMapIdList = new ArrayList<>(subMapIdArray.length);
List hashIndexList = new ArrayList<>(subMapIdArray.length);
for (int i = 0; i < subMapIdArray.length; i++) {
subMapIdList.add(subMapIdArray[i]);
hashIndexList.add(i);
}
GByteBuffer gByteBuffer = genRoutingBufferForSplitMap(DataPage.DataPageType.KSplitHashRouting,
subMapIdList,
hashIndexList,
subMapMaxSize,
IntSerializer.INSTANCE,
logicPageId,
allocator);
return gByteBuffer;
}
public static GByteBuffer genRoutingBufferForSplitMap(
DataPage.DataPageType dataPageType,
List subMapIdList,
List subMapIndexList,
int subMapMaxSize,
TypeSerializer keySerializer,
int logicPageId,
Allocator allocator) {
int subMapCount = subMapIdList.size();
if (subMapCount <= 0) {
return null;
}
byte[] header = new byte[GROUTING_INFO_HEADER_LENGTH];
ByteBuffer headerBB = ByteBuffer.wrap(header);
GByteArrayOutputStreamWithPos outputStreamForKey = new GByteArrayOutputStreamWithPos(1024);
DataOutputViewStreamWrapper outputViewForKey = new DataOutputViewStreamWrapper(outputStreamForKey);
int lastKeyPosition = subMapCount * Integer.BYTES;
GByteArrayOutputStreamWithPos outputStreamForValue = new GByteArrayOutputStreamWithPos(1024);
DataOutputViewStreamWrapper outputViewForValue = new DataOutputViewStreamWrapper(outputStreamForValue);
int lastValuePosition = 0;
GByteBuffer gByteBuffer = null;
try {
int keyCursor = 0;
for (int i = 0; i < subMapIdList.size(); i++) {
int subMapId = subMapIdList.get(i);
//write key and key indicator
outputStreamForKey.setPosition(lastKeyPosition);
keySerializer.serialize(subMapIndexList.get(i), outputViewForKey);
lastKeyPosition = outputStreamForKey.getPosition();
outputStreamForKey.setPosition(keyCursor * Integer.BYTES);
IntSerializer.INSTANCE.serialize(lastKeyPosition, outputViewForKey);
//write value
IntSerializer.INSTANCE.serialize(subMapId, outputViewForValue);
lastValuePosition = outputStreamForValue.getPosition();
keyCursor++;
}
outputStreamForKey.setPosition(lastKeyPosition);
outputStreamForValue.setPosition(lastValuePosition);
ByteBuffer keyBytes = ByteBuffer.wrap(outputStreamForKey.getBuf(), 0, lastKeyPosition);
ByteBuffer valueBytes = ByteBuffer.wrap(outputStreamForValue.getBuf(), 0, lastValuePosition);
AbstractGRoutingValue.writeHeaderRoutingType(headerBB, dataPageType.getCode());
AbstractGRoutingValue.writeHeaderSubMapCount(headerBB, subMapCount);
AbstractGRoutingValue.writeHeaderSubMapMaxSize(headerBB, subMapMaxSize);
AbstractGRoutingValue.writeHeaderBaseValueOffset(headerBB, header.length + lastKeyPosition);
int newBufferLen = header.length + lastKeyPosition + lastValuePosition;
gByteBuffer = allocator.allocate(newBufferLen);
ByteBufferUtils.copyFromArrayToBuffer(gByteBuffer.getByteBuffer(),
0,
header,
0,
header.length);
ByteBufferUtils.copyFromBufferToBuffer(keyBytes,
gByteBuffer.getByteBuffer(),
0,
header.length,
lastKeyPosition);
ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
gByteBuffer.getByteBuffer(),
0,
header.length + lastKeyPosition,
lastValuePosition);
return gByteBuffer;
} catch (Exception e) {
if (gByteBuffer != null) {
gByteBuffer.release();
}
throw new GeminiRuntimeException("GBinaryHashMap get exception: " + e.getMessage(), e);
}
}
public static GByteBuffer replaceBinaryValueIdList(
BinaryValueForSplit binaryValue,
GBufferAddressMapping pageMapping) {
int[] oldIdList = AbstractGRoutingValue.getAllSubMapId(binaryValue);
int[] newIdList = pageMapping.mergeMapping(binaryValue.getPageMapping(), oldIdList);
GByteArrayOutputStreamWithPos outputStreamForValue = new GByteArrayOutputStreamWithPos(1024);
DataOutputViewStreamWrapper outputViewForValue = new DataOutputViewStreamWrapper(outputStreamForValue);
outputStreamForValue.setPosition(0);
try {
for (int i = 0; i < newIdList.length; i++) {
//write value(mapping id)
IntSerializer.INSTANCE.serialize(newIdList[i], outputViewForValue);
}
int lastValuePosition = outputStreamForValue.getPosition();
int headAndKeyLen = AbstractGRoutingValue.getGRoutingBaseValueOffset(binaryValue);
int newBufferLen = headAndKeyLen + lastValuePosition;
// Note that this buffer is only for temp use, so it's VERY inconvenient to maintain the reference count.
// This solution is not a elegant, should be replaced by shared buffer LATER.
GByteBuffer gByteBuffer = new GUnPooledByteBuffer(ByteBuffer.allocate(newBufferLen));
ByteBufferUtils.copyFromBufferToBuffer(binaryValue.getBb(),
gByteBuffer.getByteBuffer(),
binaryValue.getValueOffset(),
0,
headAndKeyLen);
ByteBuffer valueBytes = ByteBuffer.wrap(outputStreamForValue.getBuf(), 0, lastValuePosition);
ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
gByteBuffer.getByteBuffer(),
0,
headAndKeyLen,
lastValuePosition);
return gByteBuffer;
} catch (Exception e) {
throw new GeminiRuntimeException("replaceBinaryValueIdList get exception: " + e.getMessage(), e);
}
}
private static List>>> splitGSValueMap(
List>> keyValueList,
TypeSerializer keySerializer,
TypeSerializer valueSerializer,
int mapSplitMinKeyNum,
int mapSplitSubMapSize) {
if (keyValueList.size() <= mapSplitMinKeyNum) {
return Collections.singletonList(keyValueList);
}
int subMapNum = getSplitNumBySampling(keyValueList, keySerializer, valueSerializer, mapSplitMinKeyNum, mapSplitSubMapSize);
int realSubMapNum = MathUtils.roundUpToPowerOfTwo(subMapNum);
if (realSubMapNum == 1) {
return Collections.singletonList(keyValueList);
}
return divideKeyValueList(keyValueList, realSubMapNum);
}
public static int getSplitNumBySampling(
List>> keyValueList,
TypeSerializer keySerializer,
TypeSerializer valueSerializer,
int mapSplitMinKeyNum,
int mapSplitSubMapSize) {
//sampling to compute the average size per entry
GByteArrayOutputStreamWithPos outputStreamForSampling = new GByteArrayOutputStreamWithPos(1024);
DataOutputViewStreamWrapper outputViewForSampling = new DataOutputViewStreamWrapper(outputStreamForSampling);
outputStreamForSampling.setPosition(0);
int index = 0;
int samplingStepSize = mapSplitMinKeyNum;
int samplingNum = 0;
try {
while (index < keyValueList.size()) {
keySerializer.serialize(keyValueList.get(index).f0, outputViewForSampling);
valueSerializer.serialize(keyValueList.get(index).f1.getValue(), outputViewForSampling);
index += samplingStepSize;
samplingNum++;
}
} catch (Exception e) {
throw new GeminiRuntimeException("Exception occur when GBinaryHashMap splitGSValueMap" + e.getMessage(), e);
}
int avgSizePerKey = outputStreamForSampling.getPosition() / samplingNum;
int keyNumPerSubMap = mapSplitSubMapSize < avgSizePerKey ? 1 : mapSplitSubMapSize / avgSizePerKey;
int subMapNum = keyValueList.size() / keyNumPerSubMap + (keyValueList.size() % keyNumPerSubMap == 0 ? 0 : 1);
return subMapNum;
}
private static List>>> divideKeyValueList(
List>> keyValueList, int subMapNum) {
List>>> subMapList = new ArrayList<>(subMapNum);
for (int i = 0; i < subMapNum; i++) {
subMapList.add(new ArrayList<>(keyValueList.size() / subMapNum + 1));
}
keyValueList.forEach(entry -> {
subMapList.get(entry.f0.hashCode() & (subMapNum - 1)).add(entry);
});
return subMapList;
}
}