All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.DataPageKSortedMapImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValueMap;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValueMapEntry;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryKey;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValueForSplit;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValueImpl;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.ByteBufferDataInputView;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinarySortedMap;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinarySplitHashMap;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBufferAddressMapping;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GComparator;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GSortedRoutingValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.SplitHashMapValueHelper;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.SplitSortedMapValueHelper;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.utils.SeqIDUtils;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.collect.Maps;

import javax.annotation.Nullable;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap.EMPTY_G_BINARY_HASHMAP;
import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinarySortedMap.EMPTY_G_BINARY_SORTEDMAP;

/**
 * DataPageKSortedMapImpl.
 */
public class DataPageKSortedMapImpl extends DataPageKMapImpl implements DataPageKSortedMap {

	private final GComparator gComparator;

	public DataPageKSortedMapImpl(
		GBinaryHashMap gBinaryHashMap,
		TypeSerializer mkTypeSerializer,
		TypeSerializer mvTypeSerializer,
		AbstractGMapValueTypeSerializer gSortedMapValueTypeSerialiZer,
		GComparator gComparator) {
		super(gBinaryHashMap, mkTypeSerializer, mvTypeSerializer, gSortedMapValueTypeSerialiZer);
		this.gComparator = gComparator;
	}

	@Override
	protected GBinarySortedMap getBinaryMap(GByteBuffer valueBB) {
		return new GBinarySortedMap<>(valueBB.getByteBuffer(), mkTypeSerializer, gComparator);
	}

	@Override
	public GSValueMapEntry firstEntry(K key) {
		return getFirstOrLastEntry(key, true);
	}

	@Override
	public GSValueMapEntry lastEntry(K key) {
		return getFirstOrLastEntry(key, false);
	}

	private GSValueMapEntry getFirstOrLastEntry(K key, boolean first) {
		try {
			BinaryValue binaryValue = this.gBinaryHashMap.get(key);
			if (binaryValue == null) {
				return null;
			}
			GValueType mapType = binaryValue.getGValueType();
			// the map related to K is deleted, means all the mapKey is Deleted.
			if (binaryValue.getGValueType() == GValueType.Delete) {
				return new GSValueMapEntry<>(null, mapType, binaryValue.getSeqID());
			}

			GBinarySortedMap sortedMap;
			if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				Preconditions.checkState(this.gBinaryHashMap instanceof GBinarySplitHashMap, "Internal bug.");
				sortedMap = GSortedRoutingValue.getFirstOrLastSubGBinarySortedMap(binaryValue, mkTypeSerializer, gComparator, first);
			} else {
				sortedMap = getBinaryMap(getDuplicateBB(binaryValue));
			}

			MK mkey = first ? sortedMap.firstKey() : sortedMap.lastKey();

			//TODO, sortedMap directly support firstEntry future.
			if (mkey == null) {
				return new GSValueMapEntry<>(null, mapType, binaryValue.getSeqID());
			}
			GSValue gsValue = getForMapBinaryValue(sortedMap, mkey);

			return new GSValueMapEntry<>(mkey, gsValue, mapType, binaryValue.getSeqID());
		} catch (Exception e) {
			throw new GeminiRuntimeException("get exception: " + e.getMessage(), e);
		}
	}

	@Override
	public GSValueMap head(K key, MK endMapKey) {
		return subMap(key, null, endMapKey);
	}

	@Override
	public GSValueMap tail(K key, MK startMapKey) {
		return subMap(key, startMapKey, null);
	}

	@Override
	public GSValueMap subMap(K key, MK startMapKey, MK endMapKey) {
		try {
			BinaryValue binaryValue = this.gBinaryHashMap.get(key);
			if (binaryValue == null) {
				return null;
			}
			GValueType mapType = binaryValue.getGValueType();
			// the map related to K is deleted, means all the mapKey is Deleted.
			if (binaryValue.getGValueType() == GValueType.Delete) {
				return new GSValueMap<>(null, GValueType.Delete, binaryValue.getSeqID());
			}

			SortedMap sortedMap;
			if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				//TODO needn't get all the sub map in the future
				List> subMapList = GSortedRoutingValue.getAllSubGBinarySortedMap(binaryValue, mkTypeSerializer, gComparator);
				sortedMap = new TreeMap<>(gComparator.getJDKCompactor());
				for (GBinarySortedMap subMap : subMapList) {
					sortedMap.putAll(subMap.subMap(startMapKey, endMapKey));
				}
			} else {
				GBinarySortedMap gBinarySortedMap = getBinaryMap(getDuplicateBB(binaryValue));
				sortedMap = gBinarySortedMap.subMap(startMapKey, endMapKey);
			}

			if (sortedMap == null || sortedMap.size() == 0) {
				return new GSValueMap<>(null, mapType, binaryValue.getSeqID());
			}

			SortedMap> result = Maps.transformEntries(sortedMap, (mk, mv) -> getForBinaryValue(mv));

			return new GSValueMap<>(result, mapType, binaryValue.getSeqID());
		} catch (Exception e) {
			throw new GeminiRuntimeException("get exception: " + e.getMessage(), e);
		}
	}

	@Override
	public Map getBinaryMapByBinaryValue(BinaryValue binaryValue, MK mapKey) {
		Map binaryMap;
		if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
			Preconditions.checkState(this.gBinaryHashMap instanceof GBinarySplitHashMap, "Internal bug.");
			binaryMap = GSortedRoutingValue.getSubGBinarySortedMap(mapKey, binaryValue, this.mkTypeSerializer, this.gComparator);
		} else {
			binaryMap = getBinaryMap(getDuplicateBB(binaryValue));
		}

		return binaryMap;
	}

	@Override
	public Map> getMap(BinaryValue binaryValue) throws IOException {
		Map> value = new HashMap<>();
		if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
			List> subMapList = GSortedRoutingValue.getAllSubGBinarySortedMap(binaryValue, mkTypeSerializer, gComparator);
			for (GBinarySortedMap subMap : subMapList) {
				DataInputView byteBufferDataInputView = new ByteBufferDataInputView(subMap.getData(),
					0,
					subMap.bytesSize());

				value.putAll(valueTypeSerializer.deserialize(byteBufferDataInputView));
			}
		} else {
			DataInputView byteBufferDataInputView = new ByteBufferDataInputView(binaryValue.getBb(),
				binaryValue.getValueOffset(),
				binaryValue.getValueLen());

			value = valueTypeSerializer.deserialize(byteBufferDataInputView);
		}

		return value;
	}

	@Override
	public DataPageType getDataPageType() {
		return DataPageType.KSortedMap;
	}

	@VisibleForTesting
	public static  BinaryValue doCompactionSortedMapValue(
		List valueByOrder,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		boolean isMajor,
		long version,
		int logicPageId,
		Allocator allocator,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext,
		GBufferAddressMapping pageMapping,
		MapSplitConfig mapSplitConfig) {

		if (mapSplitConfig.isMapSplitEnabled()) {
			boolean needSplit = false;
			for (BinaryValue binaryValue : valueByOrder) {
				if (binaryValue.getGValueType() == GValueType.Delete) {
					continue;
				}

				if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue) || binaryValue.getValueLen() > mapSplitConfig.getMapSplitSizeThreshold()) {
					needSplit = true;
					break;
				}
			}

			if (needSplit) {
				return doCompactionSortedMapValueSplit(valueByOrder,
					mkTypeSerializer,
					gComparator,
					isMajor,
					version,
					logicPageId,
					allocator,
					stateFilter,
					gRegionContext,
					pageMapping,
					mapSplitConfig.getSubMapSize());
			}
		}

		return doCompactionSortedMapValueNormal(valueByOrder,
			mkTypeSerializer,
			gComparator,
			isMajor,
			version,
			logicPageId,
			allocator,
			stateFilter,
			gRegionContext);
	}

	@VisibleForTesting
	public static  BinaryValue doCompactionSortedMapValueSplit(
		List valueByOrder,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		boolean isMajor,
		long version,
		int logicPageId,
		Allocator allocator,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext,
		GBufferAddressMapping pageMapping,
		int mapSplitSubMapSize) {

		if (valueByOrder.size() == 1 && !isMajor) {
			BinaryValue binaryValue = valueByOrder.get(0);
			if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				GByteBuffer gByteBuffer = SplitHashMapValueHelper.replaceBinaryValueIdList((BinaryValueForSplit) binaryValue, pageMapping);
				return new BinaryValueImpl(gByteBuffer.getByteBuffer(), binaryValue.getGValueType(), binaryValue.getSeqID(), 0, gByteBuffer.capacity());
			}
			return valueByOrder.get(0);
		}
		List listByOrder = new ArrayList<>();
		long seqID = SeqIDUtils.INVALID_SEQID;
		GValueType firstValueType = null;
		int maxSplitValueIndex = -1;
		int maxSplitPartNum = 0;

		for (BinaryValue binaryValue : valueByOrder) {
			if (binaryValue.getGValueType() == GValueType.Delete) {
				firstValueType = GValueType.Delete;
				continue;
			}

			//pick up newest page's seqID.
			seqID = Math.max(seqID, binaryValue.getSeqID());
			listByOrder.add(binaryValue);
			if (firstValueType == null) {
				firstValueType = binaryValue.getGValueType();
			}

			int subMapCount;
			if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				subMapCount = GSortedRoutingValue.getSubMapCount(binaryValue);
			} else {
				subMapCount = binaryValue.getValueLen() / mapSplitSubMapSize +
					(binaryValue.getValueLen() % mapSplitSubMapSize == 0 ? 0 : 1);
			}
			if (subMapCount > maxSplitPartNum) {
				maxSplitPartNum = subMapCount;
				maxSplitValueIndex = listByOrder.size() - 1;
			}
		}

		return compactionSplitBinaryValueList(listByOrder,
			mkTypeSerializer,
			gComparator,
			maxSplitValueIndex,
			isMajor,
			version,
			logicPageId,
			allocator,
			stateFilter,
			gRegionContext,
			firstValueType,
			seqID,
			pageMapping,
			mapSplitSubMapSize);
	}

	private static  BinaryValue doCompactionSortedMapValueNormal(
		List valueByOrder,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		boolean isMajor,
		long version,
		int logicPageId,
		Allocator allocator,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext) {

		try {
			if (valueByOrder.size() == 1 && !isMajor) {
				return valueByOrder.get(0);
			}
			List> listByOrder = new ArrayList<>();
			long seqID = SeqIDUtils.INVALID_SEQID;
			GValueType firstValueType = null;

			for (BinaryValue binaryValue : valueByOrder) {
				if (binaryValue.getGValueType() == GValueType.Delete) {
					firstValueType = GValueType.Delete;
					continue;
				}

				GBinarySortedMap mapValue = new GBinarySortedMap<>(getDuplicateBB(binaryValue).getByteBuffer(),
					mkTypeSerializer,
					gComparator);
				//pick up newest page's seqID.
				seqID = Math.max(seqID, binaryValue.getSeqID());
				listByOrder.add(mapValue);
				if (firstValueType == null) {
					firstValueType = binaryValue.getGValueType();
				}
			}

			GBinarySortedMap gBinarySortedMap;
			if (listByOrder.size() == 0) {
				gBinarySortedMap = EMPTY_G_BINARY_SORTEDMAP;
			} else {
				int index = 0;
				//just for not to create a new map.
				Map newMap = listByOrder.get(index).getBinaryMap();
				long compactionCount = listByOrder.get(index).getCompactionCount();
				index++;
				while (index < listByOrder.size()) {
					newMap.putAll(listByOrder.get(index).getBinaryMap());
					compactionCount += listByOrder.get(index).getCompactionCount();
					index++;
				}

				gBinarySortedMap = GBinarySortedMap.ofBinaryList(DataPageType.KV,
					isMajor,
					version,
					logicPageId,
					mkTypeSerializer,
					gComparator,
					allocator,
					newMap,
					compactionCount,
					stateFilter,
					gRegionContext);
			}

			ByteBuffer bb = gBinarySortedMap == EMPTY_G_BINARY_SORTEDMAP ? null : gBinarySortedMap.getData();
			GValueType gValueType = judgeFinalValueType(bb, firstValueType, isMajor);
			return new BinaryValueImpl(bb, gValueType, seqID, 0, gBinarySortedMap.bytesSize());
		} catch (Exception e) {
			throw new GeminiRuntimeException("Internal BUG " + e.getMessage(), e);
		}
	}

	private static  BinaryValue compactionSplitBinaryValueList(
		List listByOrder,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		int maxSplitValueIndex,
		boolean isMajor,
		long version,
		int logicPageId,
		Allocator allocator,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext,
		GValueType firstValueType,
		long seqID,
		GBufferAddressMapping pageMapping,
		int mapSplitSubMapSize) {

		GByteBuffer finalByteBuffer;
		if (listByOrder.size() == 0) {
			finalByteBuffer = null;
		} else {
			Preconditions.checkState(maxSplitValueIndex >= 0 && maxSplitValueIndex < listByOrder.size(), "Internal Bug.");
			Tuple2, Integer>[] buckets = new Tuple2[listByOrder.size()];
			for (int i = 0; i < listByOrder.size(); i++) {
				buckets[i] = Tuple2.of(new TreeMap<>(gComparator.getJDKBinaryCompactor()), 0);
			}

			// get the keyIndex
			List baseKeyIndex = getBaseKeyIndexList(listByOrder, maxSplitValueIndex, mkTypeSerializer, gComparator, mapSplitSubMapSize);

			List keyIndexList = new ArrayList<>(baseKeyIndex.size());
			List subMapIdList = new ArrayList<>(baseKeyIndex.size());
			int maxSubMapSize = 0;

			for (int part = 0; part < baseKeyIndex.size(); part++) {
				int subListMaxSize = mergeSubList(
					listByOrder,
					mkTypeSerializer,
					gComparator,
					isMajor,
					version,
					logicPageId,
					allocator,
					stateFilter,
					gRegionContext,
					pageMapping,
					mapSplitSubMapSize,
					part,
					buckets,
					baseKeyIndex,
					maxSplitValueIndex,
					subMapIdList,
					keyIndexList,
					maxSubMapSize);
				maxSubMapSize = Math.max(maxSubMapSize, subListMaxSize);
			}

			if (subMapIdList.size() <= 0) {
				return new BinaryValueImpl(null, GValueType.Delete, seqID, 0, 0);
			}

			if (subMapIdList.size() == 1) {
				finalByteBuffer = pageMapping.pollGByteBuffer();
			} else {
				//TODO SplitHashMap and SplitSortedMap can use the different way to generate the routing buffer.
				finalByteBuffer = SplitHashMapValueHelper.genRoutingBufferForSplitMap(DataPageType.KSplitSortedRouting,
					subMapIdList,
					keyIndexList,
					maxSubMapSize,
					mkTypeSerializer,
					logicPageId,
					allocator);
			}
		}

		ByteBuffer bb = finalByteBuffer == null ? null : finalByteBuffer.getByteBuffer();
		GValueType gValueType = judgeFinalValueType(bb, firstValueType, isMajor);
		return new BinaryValueImpl(bb, gValueType, seqID, 0, bb == null ? 0 : bb.capacity());
	}

	private static  int mergeSubList(
		List listByOrder,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		boolean isMajor,
		long version,
		int logicPageId,
		Allocator allocator,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext,
		GBufferAddressMapping pageMapping,
		int mapSplitSubMapSize,
		int part,
		Tuple2, Integer>[] buckets,
		List baseKeyIndex,
		int maxSplitValueIndex,
		List subMapIdList,
		List keyIndexList,
		int maxSubMapSize) {

		SortedMap mergeMap = new TreeMap<>(gComparator.getJDKBinaryCompactor());
		long compactionCount = 0;
		PageAddress tmpPageAddress = null;
		BinaryValue tmpBinaryValue = null;

		//merge the subMap in different page
		for (int listIndex = 0; listIndex < listByOrder.size(); listIndex++) {

			SortedMap bucket = buckets[listIndex].f0;
			int nextPartIndex = buckets[listIndex].f1;
			BinaryValue binaryValue = listByOrder.get(listIndex);
			// GBinarySortedMap which isn't split  need to be put in bucket firstly
			if (nextPartIndex == 0 && !GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				GBinarySortedMap gBinarySortedMap = new GBinarySortedMap<>(
					binaryValue.getValueLen() != 0 ? getDuplicateBB(binaryValue).getByteBuffer() : null,
					mkTypeSerializer,
					gComparator);
				bucket.putAll(gBinarySortedMap.getSortedBinaryMap());
				compactionCount += gBinarySortedMap.getCompactionCount();
				buckets[listIndex] = Tuple2.of(buckets[listIndex].f0, ++nextPartIndex);
			}

			//situation 1: only use the data in bucket to merge
			if (!bucket.isEmpty() && gComparator.compare(bucket.lastKey(), baseKeyIndex.get(part)) >= 0) {
				//for the last part that need to merge, put the all remaining data in bucket to the merge map;
				if (part == baseKeyIndex.size() - 1) {
					mergeMap.putAll(bucket);
					bucket.clear();
					continue;
				}
				Iterator> iterator = bucket.entrySet().iterator();
				while (iterator.hasNext()) {
					SortedMap.Entry entry = iterator.next();
					if (gComparator.compare(entry.getKey(), baseKeyIndex.get(part)) > 0) {
						break;
					}
					mergeMap.put(entry.getKey(), entry.getValue());
					iterator.remove();
				}
				continue;
			}
			//situation 2: use the data in bucket and another sub split map to merge
			if (!bucket.isEmpty()) {
				mergeMap.putAll(bucket);
				bucket.clear();
			}

			//1 means no split
			int maxPartNum = 1;
			if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
				maxPartNum = GSortedRoutingValue.getSubMapCount(binaryValue);
			}
			//only split map can enter this loop
			while (nextPartIndex < maxPartNum) {

				if (listIndex == maxSplitValueIndex && tmpPageAddress == null && mergeMap.isEmpty()) {
					if (gComparator.compare(GSortedRoutingValue.getKeyIndexBySlot(binaryValue, part), baseKeyIndex.get(part)) != 0) {
						throw new GeminiRuntimeException("Internal bug");
					}
					tmpPageAddress = GSortedRoutingValue.getSubMapPageAddress(nextPartIndex++, binaryValue);
					tmpBinaryValue = binaryValue;
					break;
				}

				GBinarySortedMap nextSubSortedMap = GSortedRoutingValue.getSubGBinarySortedMapWithKey(null, nextPartIndex++,
					binaryValue,
					mkTypeSerializer,
					gComparator);

				compactionCount += nextSubSortedMap.getCompactionCount();
				//for the last part that need to merge, put the all remaining sub map to it;
				if (part == baseKeyIndex.size() - 1) {
					mergeMap.putAll(nextSubSortedMap.getSortedBinaryMap());
					continue;
				}
				//the whole sub map need to merge
				if (gComparator.compare(nextSubSortedMap.lastKey(), baseKeyIndex.get(part)) < 0) {
					mergeMap.putAll(nextSubSortedMap.getSortedBinaryMap());
				} else {
					// the partial sub map need to merge
					Iterator> iterator = nextSubSortedMap.getSortedBinaryMap().entrySet().iterator();
					SortedMap newBucket = new TreeMap<>(gComparator.getJDKBinaryCompactor());
					while (iterator.hasNext()) {
						SortedMap.Entry entry = iterator.next();
						if (gComparator.compare(entry.getKey(), baseKeyIndex.get(part)) <= 0) {
							mergeMap.put(entry.getKey(), entry.getValue());
						} else {
							newBucket.put(entry.getKey(), entry.getValue());
						}
					}
					buckets[listIndex] = Tuple2.of(newBucket, nextPartIndex);
					break;
				}
			}
			buckets[listIndex] = Tuple2.of(buckets[listIndex].f0, nextPartIndex);
		}

		if (tmpPageAddress != null) {
			if (mergeMap.isEmpty()) {
				int subMapId = pageMapping.putGByteBufferAddress(tmpPageAddress);
				subMapIdList.add(subMapId);
				keyIndexList.add(GSortedRoutingValue.getKeyIndexBySlot(tmpBinaryValue, part, mkTypeSerializer));
				maxSubMapSize = Math.max(maxSubMapSize, tmpPageAddress.getDataLen());

				return maxSubMapSize;
			} else {
				mergeMap = mergeTmpSubMapIntoCompactionMap(mergeMap, gComparator, tmpPageAddress, tmpBinaryValue, mkTypeSerializer);
			}
		}

		if (mergeMap.isEmpty()) {
			return 0;
		}

		List> subMapList = SplitSortedMapValueHelper.divideSortedMap(mergeMap, mapSplitSubMapSize);
		for (SortedMap subMap : subMapList) {
			GBinarySortedMap finalSubMap = GBinarySortedMap.ofBinaryList(DataPageType.KV,
				isMajor,
				version,
				logicPageId,
				mkTypeSerializer,
				gComparator,
				allocator,
				subMap,
				compactionCount,
				stateFilter,
				gRegionContext);
			if (finalSubMap == EMPTY_G_BINARY_SORTEDMAP) {
				continue;
			}

			int subMapId = pageMapping.putGByteBufferAddress(new DataPageSortedSubPageImpl(finalSubMap));
			subMapIdList.add(subMapId);
			keyIndexList.add(finalSubMap.lastKey());
			maxSubMapSize = Math.max(finalSubMap.bytesSize(), maxSubMapSize);
		}

		return maxSubMapSize;
	}

	private static  SortedMap mergeTmpSubMapIntoCompactionMap(
		SortedMap mergeMap,
		GComparator gComparator,
		PageAddress tmpPageAddress,
		BinaryValue tmpBinaryValue,
		TypeSerializer mkTypeSerializer) {

		SortedMap tmpMap = new TreeMap<>(gComparator.getJDKBinaryCompactor());
		GBinarySortedMap gBinarySortedMap = new GBinarySortedMap<>(tmpBinaryValue.getPageMapping().getGByteBuffer(
			tmpPageAddress,
			null).getByteBuffer(),
			mkTypeSerializer,
			gComparator);
		tmpMap.putAll(gBinarySortedMap.getSortedBinaryMap());
		tmpMap.putAll(mergeMap);
		mergeMap = tmpMap;

		return mergeMap;
	}

	private static  List getBaseKeyIndexList(
		List listByOrder,
		int maxSplitValueIndex,
		TypeSerializer mkTypeSerializer,
		GComparator gComparator,
		int mapSplitSubMapSize){

		BinaryValue baseBinaryValue = listByOrder.get(maxSplitValueIndex);
		List baseKeyIndex;
		if (GSortedRoutingValue.isGSortedRoutingValue(baseBinaryValue)) {
			baseKeyIndex = GSortedRoutingValue.getKeyIndexArray(baseBinaryValue);
		} else {
			GBinarySortedMap baseMap = new GBinarySortedMap<>(getDuplicateBB(baseBinaryValue).getByteBuffer(),
				mkTypeSerializer,
				gComparator);
			List> subMapList = SplitSortedMapValueHelper.divideSortedMap(baseMap.getSortedBinaryMap(), mapSplitSubMapSize);
			baseKeyIndex = new ArrayList<>();
			subMapList.forEach(subMap -> baseKeyIndex.add(subMap.lastKey()));
		}

		return baseKeyIndex;
	}

	public static  Map doCompactValueToBinaryMap(
		List binaryValueReversedOrderList, TypeSerializer mkTypeSerializer, GComparator gComparator) {
		try {

			Map newMap = new HashMap<>();

			for (int i = binaryValueReversedOrderList.size() - 1; i >= 0; i--) {
				BinaryValue binaryValue = binaryValueReversedOrderList.get(i);

				if (GSortedRoutingValue.isGSortedRoutingValue(binaryValue)) {
					List> subMapList = GSortedRoutingValue.getAllSubGBinarySortedMap(binaryValue, mkTypeSerializer, gComparator);
					for (GBinarySortedMap subMap : subMapList) {
						newMap.putAll(subMap.getBinaryMap());
					}
				} else {
					GByteBuffer gByteBuffer = getDuplicateBB(binaryValue);
					GBinarySortedMap mapValue = new GBinarySortedMap<>(
						gByteBuffer == null ? null : gByteBuffer.getByteBuffer(),
						mkTypeSerializer,
						gComparator);
					newMap.putAll(mapValue.getBinaryMap());
				}
			}

			return newMap;
		} catch (Exception e) {
			throw new GeminiRuntimeException("Internal BUG " + e.getMessage(), e);
		}
	}

	@VisibleForTesting
	public static  DataPageKSortedMapImpl readKSortedMapPageFrom(
		PageSerdeFlink2Key pageSerdeFlink, GByteBuffer dataPage, int crc) {
		GBinaryHashMap gBinaryHashMap = new GBinaryHashMap<>(dataPage, pageSerdeFlink.getKeySerde(), crc);
		return new DataPageKSortedMapImpl<>(gBinaryHashMap,
			pageSerdeFlink.getKey2Serde(),
			pageSerdeFlink.getValueSerde(),
			pageSerdeFlink.getMapValueTypeSerializer(),
			pageSerdeFlink.getMapComparator());
	}

	@Override
	public Tuple2 getSplitDataByGBinaryMap(
		GBinaryHashMap gBinaryHashMap1, GBinaryHashMap gBinaryHashMap2) {

		DataPageKSortedMapImpl dataPage1 = gBinaryHashMap1 == EMPTY_G_BINARY_HASHMAP
			? null
			: new DataPageKSortedMapImpl<>(gBinaryHashMap1,
				mkTypeSerializer,
				mvTypeSerializer,
				(AbstractGMapValueTypeSerializer) valueTypeSerializer,
				gComparator);
		DataPageKSortedMapImpl dataPage2 = gBinaryHashMap2 == EMPTY_G_BINARY_HASHMAP
			? null
			: new DataPageKSortedMapImpl<>(gBinaryHashMap2,
				mkTypeSerializer,
				mvTypeSerializer,
				(AbstractGMapValueTypeSerializer) valueTypeSerializer,
				gComparator);
		return Tuple2.of(dataPage1, dataPage2);

	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy