All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.bmap.SplitSortedMapValueHelper Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page.bmap;

import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.DataPageSortedSubPageImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;

import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.SortedMap;
import java.util.TreeMap;

import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinarySortedMap.EMPTY_G_BINARY_SORTEDMAP;

/**
 * The helper class for sorted map split.
 */
public class SplitSortedMapValueHelper {

	public static  GBinarySortedMap trySplit(
		DataPage.DataPageType dataPageType,
		List>> keyValueList,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		GComparator gComparator,
		long version,
		int logicPageId,
		Allocator allocator,
		long compactionCount,
		GBufferAddressMapping mapping,
		int mapSplitSubMapSize,
		int mapSplitMinKeyNum) {

		int totalKeys = keyValueList.size();
		if (totalKeys == 0) {
			return EMPTY_G_BINARY_SORTEDMAP;
		}

		List>>> subMapList = splitGSValueMap(keyValueList,
			gComparator,
			keySerializer,
			valueSerializer,
			mapSplitSubMapSize,
			mapSplitMinKeyNum);
		if (subMapList.size() == 1) { //no need split
			return GBinarySortedMap.of(dataPageType,
				keyValueList,
				keySerializer,
				valueSerializer,
				gComparator,
				version,
				logicPageId,
				allocator,
				compactionCount);
		}

		List subMapIdList = new ArrayList<>(subMapList.size());
		List keyIndexList = new ArrayList<>(subMapList.size());
		int subMapMaxSize = 0;
		for (int i = 0; i < subMapList.size(); i++) {
			List>> subMap = subMapList.get(i);
			GBinarySortedMap subGBinarySortedMap = GBinarySortedMap.of(dataPageType,
				subMap,
				keySerializer,
				valueSerializer,
				gComparator,
				version,
				logicPageId,
				allocator,
				compactionCount);
			if (subGBinarySortedMap == EMPTY_G_BINARY_SORTEDMAP) {
				continue;
			}
			int subMapId = mapping.putGByteBufferAddress(new DataPageSortedSubPageImpl(subGBinarySortedMap));
			subMapIdList.add(subMapId);
			keyIndexList.add(subMap.get(subMap.size() - 1).f0);
			Math.max(subGBinarySortedMap.bytesSize(), subMapMaxSize);
		}

		int splitMapIndexLen = 0;
		GSortedHeaderImpl pageHelper = GSortedHeaderImpl.getPageHelper(splitMapIndexLen);
		GByteBuffer gByteBuffer = SplitHashMapValueHelper.genRoutingBufferForSplitMap(DataPage.DataPageType.KSplitSortedRouting,
			subMapIdList,
			keyIndexList,
			subMapMaxSize,
			keySerializer,
			logicPageId,
			allocator);

		return new GBinarySortedMap(pageHelper, gByteBuffer.getByteBuffer(), keySerializer, gComparator);
	}

	private static  List>>> splitGSValueMap(
		List>> keyValueList,
		GComparator gComparator,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		int mapSplitSubMapSize,
		int mapSplitMinKeyNum) {

		if (keyValueList.size() <= mapSplitMinKeyNum) {
			return Collections.singletonList(keyValueList);
		}
		int subMapNum = SplitHashMapValueHelper.getSplitNumBySampling(keyValueList, keySerializer, valueSerializer, mapSplitMinKeyNum, mapSplitSubMapSize);
		if (subMapNum == 1) {
			return Collections.singletonList(keyValueList);
		}

		return divideKeyValueList(keyValueList, gComparator, subMapNum);
	}

	private static  List>>> divideKeyValueList(
		List>> keyValueList, GComparator gComparator, int subMapNum) {

		Collections.sort(keyValueList, (o1, o2) -> gComparator.compare(o1.f0, o2.f0));

		List>>> subMapList = new ArrayList<>(subMapNum);
		int subMapKeyNum = keyValueList.size() / subMapNum + (keyValueList.size() % subMapNum == 0 ? 0 : 1);
		int endIndex = 0;
		for (int i = 0; i < subMapNum; i++) {
			List>> subMap = new ArrayList<>();
			int count = 0;
			while ((endIndex < keyValueList.size()) && ((count < subMapKeyNum) ||
				(endIndex > 0 && gComparator.compare(keyValueList.get(endIndex).f0, keyValueList.get(endIndex - 1).f0) == 0))) {
				subMap.add(keyValueList.get(endIndex++));
				count++;
			}
			subMapList.add(subMap);
		}

		return subMapList;
	}

	public static List> divideSortedMap(SortedMap sortedMap, int mapSplitSubMapSize) {

		List> result = new ArrayList<>();
		result.add(new TreeMap<>(sortedMap.comparator()));
		int sizeSum = 0;
		BinaryKey lastKey = sortedMap.firstKey();
		for (Map.Entry entry : sortedMap.entrySet()) {
			if (sizeSum >= mapSplitSubMapSize && sortedMap.comparator().compare(entry.getKey(), lastKey) != 0) {
				sizeSum = 0;
				result.add(new TreeMap<>(sortedMap.comparator()));
			}

			result.get(result.size() - 1).put(entry.getKey(), entry.getValue());
			sizeSum += entry.getValue().getValueLen();
			lastKey = entry.getKey();
		}

		return result;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy