All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.bmap.SplitHashMapValueHelper Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page.bmap;

import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.DataPageHashSubPageImpl;
import org.apache.flink.runtime.state.gemini.engine.page.compress.GCompressAlgorithm;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.rm.GUnPooledByteBuffer;
import org.apache.flink.util.MathUtils;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import static org.apache.flink.runtime.state.gemini.engine.page.bmap.AbstractGRoutingValue.GROUTING_INFO_HEADER_LENGTH;
import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap.EMPTY_G_BINARY_HASHMAP;

/**
 * The helper class for hash map split.
 */
public class SplitHashMapValueHelper {

	/**
	 * return a GBinaryHashMap that isn't  split, or return a GBinaryHashMap that indicates the routing info of the split maps.
	 */
	public static  GBinaryHashMap trySplit(
		DataPage.DataPageType dataPageType,
		List>> keyValueList,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		long version,
		int logicPageId,
		Allocator allocator,
		long compactionCount,
		GCompressAlgorithm gCompressAlgorithm,
		GBufferAddressMapping mapping,
		int mapSplitSubMapSize,
		int mapSplitMinKeyNum) {

		int totalKeys = keyValueList.size();
		if (totalKeys == 0) {
			return EMPTY_G_BINARY_HASHMAP;
		}

		List>>> subMapList = splitGSValueMap(
			keyValueList,
			keySerializer,
			valueSerializer,
			mapSplitMinKeyNum,
			mapSplitSubMapSize);
		if (subMapList.size() == 1) { //no need split
			return GBinaryHashMap.of(dataPageType,
				keyValueList,
				keySerializer,
				valueSerializer,
				version,
				logicPageId,
				allocator,
				compactionCount,
				gCompressAlgorithm);
		}

		int[] subMapIdList = new int[subMapList.size()];
		int subMapMaxSize = 0;
		for (int i = 0; i < subMapList.size(); i++) {
			GBinaryHashMap subGBinaryHashMap = GBinaryHashMap.of(dataPageType,
				subMapList.get(i),
				keySerializer,
				valueSerializer,
				version,
				logicPageId,
				allocator,
				compactionCount,
				gCompressAlgorithm);

			int subMapId = mapping.putGByteBufferAddress(new DataPageHashSubPageImpl(subGBinaryHashMap));
			subMapIdList[i] = subMapId;
			subMapMaxSize = Math.max(subGBinaryHashMap.bytesSize(), subMapMaxSize);
		}

		int splitMapIndexLen = 0;
		GHashHeaderImpl pageHelper = GHashHeaderImpl.getPageHelper(splitMapIndexLen);
		GByteBuffer gByteBuffer =  genRoutingValueForSplitHashMap(subMapIdList, logicPageId, allocator, subMapMaxSize);
		return new GBinaryHashMap<>(pageHelper, gByteBuffer, keySerializer);
	}

	public static GByteBuffer genRoutingValueForSplitHashMap(
		int[] subMapIdArray, int logicPageId, Allocator allocator, int subMapMaxSize) {

		if (subMapMaxSize <= 0) {
			return null;
		}

		List subMapIdList = new ArrayList<>(subMapIdArray.length);
		List hashIndexList = new ArrayList<>(subMapIdArray.length);
		for (int i = 0; i < subMapIdArray.length; i++) {
			subMapIdList.add(subMapIdArray[i]);
			hashIndexList.add(i);
		}

		GByteBuffer gByteBuffer = genRoutingBufferForSplitMap(DataPage.DataPageType.KSplitHashRouting,
			subMapIdList,
			hashIndexList,
			subMapMaxSize,
			IntSerializer.INSTANCE,
			logicPageId,
			allocator);

		return gByteBuffer;
	}

	public static  GByteBuffer genRoutingBufferForSplitMap(
		DataPage.DataPageType dataPageType,
		List subMapIdList,
		List subMapIndexList,
		int subMapMaxSize,
		TypeSerializer keySerializer,
		int logicPageId,
		Allocator allocator) {

		int subMapCount = subMapIdList.size();

		if (subMapCount <= 0) {
			return null;
		}

		byte[] header = new byte[GROUTING_INFO_HEADER_LENGTH];
		ByteBuffer headerBB = ByteBuffer.wrap(header);

		GByteArrayOutputStreamWithPos outputStreamForKey = new GByteArrayOutputStreamWithPos(1024);
		DataOutputViewStreamWrapper outputViewForKey = new DataOutputViewStreamWrapper(outputStreamForKey);
		int lastKeyPosition = subMapCount * Integer.BYTES;

		GByteArrayOutputStreamWithPos outputStreamForValue = new GByteArrayOutputStreamWithPos(1024);
		DataOutputViewStreamWrapper outputViewForValue = new DataOutputViewStreamWrapper(outputStreamForValue);
		int lastValuePosition = 0;

		GByteBuffer gByteBuffer = null;
		try {
			int keyCursor = 0;
			for (int i = 0; i < subMapIdList.size(); i++) {
				int subMapId = subMapIdList.get(i);
				//write key and key indicator
				outputStreamForKey.setPosition(lastKeyPosition);
				keySerializer.serialize(subMapIndexList.get(i), outputViewForKey);
				lastKeyPosition = outputStreamForKey.getPosition();
				outputStreamForKey.setPosition(keyCursor * Integer.BYTES);
				IntSerializer.INSTANCE.serialize(lastKeyPosition, outputViewForKey);

				//write value
				IntSerializer.INSTANCE.serialize(subMapId, outputViewForValue);
				lastValuePosition = outputStreamForValue.getPosition();

				keyCursor++;
			}
			outputStreamForKey.setPosition(lastKeyPosition);
			outputStreamForValue.setPosition(lastValuePosition);
			ByteBuffer keyBytes = ByteBuffer.wrap(outputStreamForKey.getBuf(), 0, lastKeyPosition);
			ByteBuffer valueBytes = ByteBuffer.wrap(outputStreamForValue.getBuf(), 0, lastValuePosition);

			AbstractGRoutingValue.writeHeaderRoutingType(headerBB, dataPageType.getCode());
			AbstractGRoutingValue.writeHeaderSubMapCount(headerBB, subMapCount);
			AbstractGRoutingValue.writeHeaderSubMapMaxSize(headerBB, subMapMaxSize);
			AbstractGRoutingValue.writeHeaderBaseValueOffset(headerBB, header.length + lastKeyPosition);

			int newBufferLen = header.length + lastKeyPosition + lastValuePosition;
			gByteBuffer = allocator.allocate(newBufferLen);
			ByteBufferUtils.copyFromArrayToBuffer(gByteBuffer.getByteBuffer(),
				0,
				header,
				0,
				header.length);
			ByteBufferUtils.copyFromBufferToBuffer(keyBytes,
				gByteBuffer.getByteBuffer(),
				0,
				header.length,
				lastKeyPosition);
			ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
				gByteBuffer.getByteBuffer(),
				0,
				header.length + lastKeyPosition,
				lastValuePosition);

			return gByteBuffer;
		} catch (Exception e) {
			if (gByteBuffer != null) {
				gByteBuffer.release();
			}
			throw new GeminiRuntimeException("GBinaryHashMap get exception: " + e.getMessage(), e);
		}
	}

	public static GByteBuffer replaceBinaryValueIdList(
		BinaryValueForSplit binaryValue,
		GBufferAddressMapping pageMapping) {

		int[] oldIdList = AbstractGRoutingValue.getAllSubMapId(binaryValue);
		int[] newIdList = pageMapping.mergeMapping(binaryValue.getPageMapping(), oldIdList);
		GByteArrayOutputStreamWithPos outputStreamForValue = new GByteArrayOutputStreamWithPos(1024);
		DataOutputViewStreamWrapper outputViewForValue = new DataOutputViewStreamWrapper(outputStreamForValue);
		outputStreamForValue.setPosition(0);
		try {
			for (int i = 0; i < newIdList.length; i++) {
				//write value(mapping id)
				IntSerializer.INSTANCE.serialize(newIdList[i], outputViewForValue);
			}
			int lastValuePosition = outputStreamForValue.getPosition();

			int headAndKeyLen = AbstractGRoutingValue.getGRoutingBaseValueOffset(binaryValue);
			int newBufferLen = headAndKeyLen + lastValuePosition;

			// Note that this buffer is only for temp use, so it's VERY inconvenient to maintain the reference count.
			// This solution is not a elegant, should be replaced by shared buffer LATER.
			GByteBuffer gByteBuffer = new GUnPooledByteBuffer(ByteBuffer.allocate(newBufferLen));

			ByteBufferUtils.copyFromBufferToBuffer(binaryValue.getBb(),
				gByteBuffer.getByteBuffer(),
				binaryValue.getValueOffset(),
				0,
				headAndKeyLen);
			ByteBuffer valueBytes = ByteBuffer.wrap(outputStreamForValue.getBuf(), 0, lastValuePosition);
			ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
				gByteBuffer.getByteBuffer(),
				0,
				headAndKeyLen,
				lastValuePosition);

			return gByteBuffer;
		} catch (Exception e) {
			throw new GeminiRuntimeException("replaceBinaryValueIdList get exception: " + e.getMessage(), e);
		}
	}

	private static  List>>> splitGSValueMap(
		List>> keyValueList,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		int mapSplitMinKeyNum,
		int mapSplitSubMapSize) {

		if (keyValueList.size() <= mapSplitMinKeyNum) {
			return Collections.singletonList(keyValueList);
		}

		int subMapNum = getSplitNumBySampling(keyValueList, keySerializer, valueSerializer, mapSplitMinKeyNum, mapSplitSubMapSize);
		int realSubMapNum = MathUtils.roundUpToPowerOfTwo(subMapNum);
		if (realSubMapNum == 1) {
			return Collections.singletonList(keyValueList);
		}
		return divideKeyValueList(keyValueList, realSubMapNum);
	}

	public static  int getSplitNumBySampling(
		List>> keyValueList,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		int mapSplitMinKeyNum,
		int mapSplitSubMapSize) {
		//sampling to compute the average size per entry
		GByteArrayOutputStreamWithPos outputStreamForSampling = new GByteArrayOutputStreamWithPos(1024);
		DataOutputViewStreamWrapper outputViewForSampling = new DataOutputViewStreamWrapper(outputStreamForSampling);
		outputStreamForSampling.setPosition(0);

		int index = 0;
		int samplingStepSize = mapSplitMinKeyNum;
		int samplingNum = 0;
		try {
			while (index < keyValueList.size()) {
				keySerializer.serialize(keyValueList.get(index).f0, outputViewForSampling);
				valueSerializer.serialize(keyValueList.get(index).f1.getValue(), outputViewForSampling);
				index += samplingStepSize;
				samplingNum++;
			}
		} catch (Exception e) {
			throw new GeminiRuntimeException("Exception occur when GBinaryHashMap splitGSValueMap" + e.getMessage(), e);
		}

		int avgSizePerKey = outputStreamForSampling.getPosition() / samplingNum;
		int keyNumPerSubMap = mapSplitSubMapSize < avgSizePerKey ? 1 : mapSplitSubMapSize / avgSizePerKey;
		int subMapNum = keyValueList.size() / keyNumPerSubMap + (keyValueList.size() % keyNumPerSubMap == 0 ? 0 : 1);

		return subMapNum;
	}

	private static  List>>> divideKeyValueList(
		List>> keyValueList, int subMapNum) {

		List>>> subMapList = new ArrayList<>(subMapNum);
		for (int i = 0; i < subMapNum; i++) {
			subMapList.add(new ArrayList<>(keyValueList.size() / subMapNum + 1));
		}

		keyValueList.forEach(entry -> {
			subMapList.get(entry.f0.hashCode() & (subMapNum - 1)).add(entry);
		});
		return subMapList;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy