All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page.bmap;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.IntSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.core.memory.DataInputView;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.GValueType;
import org.apache.flink.runtime.state.gemini.engine.page.compress.CompressorCodec;
import org.apache.flink.runtime.state.gemini.engine.page.compress.GCompressAlgorithm;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.util.MathUtils;
import org.apache.flink.util.Preconditions;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import java.util.zip.CRC32;

import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GHashHeaderImpl.VALUE_TYPE_INDICATOR_BITS;
import static org.apache.flink.runtime.state.gemini.engine.page.bmap.GHashHeaderImpl.VALUE_TYPE_INDICATOR_MARK;

/**
 * GBinaryHashMap.
 * Head
 * hash index (2 ~ 4 bytes count, 2~ 4 bytes indicator, depends on type of GHashHeader)
 * hash code
 * TTL
 * 4byte keyIndicator   //TODO if key is fixed-len, it will be omitted.
 * key set
 * 4byte valueIndicator  //TODO if value is fixed-len, it will be omitted.
 * value set
 */
public class GBinaryHashMap extends GAbstractHashMap {
	private static final Logger LOG = LoggerFactory.getLogger(GBinaryHashMap.class);
	public static final GBinaryHashMap EMPTY_G_BINARY_HASHMAP = new GBinaryHashMap(null, null, null, 0);
	private final GByteBuffer data;
	private final TypeSerializer keyTypeSerializer;
	private final GHashHeader gHashHeader;
	private final int originCheckSum;

	public GBinaryHashMap(GByteBuffer data, TypeSerializer keyTypeSerializer) {
		this(data, keyTypeSerializer, data == null ? 0 : genOriginCheckSum(data.getByteBuffer()));
	}

	public GBinaryHashMap(GByteBuffer data, TypeSerializer keyTypeSerializer, int originCheckSum) {
		this.data = checkCapacity(data);
		this.keyTypeSerializer = keyTypeSerializer;
		int indexLen = indexCount();
		this.gHashHeader = GHashHeaderImpl.getPageHelper(indexLen);
		this.originCheckSum = originCheckSum;
	}

	public GBinaryHashMap(
		GHashHeader pageHelper, GByteBuffer data, TypeSerializer keyTypeSerializer) {
		this(pageHelper, data, keyTypeSerializer, genOriginCheckSum(data.getByteBuffer()));
	}

	public GBinaryHashMap(
		GHashHeader pageHelper, GByteBuffer data, TypeSerializer keyTypeSerializer, int originCheckSum) {
		this.data = checkCapacity(data);
		this.keyTypeSerializer = keyTypeSerializer;
		this.gHashHeader = pageHelper;
		this.originCheckSum = originCheckSum;
	}

	public static GByteBuffer checkCapacity(GByteBuffer data) {
		return (data != null && data.capacity() == 0) ? null : data;
	}

	public GHashHeader getPageHelper() {
		return gHashHeader;
	}

	@Override
	public boolean isEmpty() {
		return keyCount() == 0;
	}

	@Override
	public boolean containsKey(Object key) {
		//TODO improve it.
		return get(key) != null;
	}

	@Override
	public BinaryValue get(Object key) {
		if (key == null || data == null) {
			return null;
		}
		int hashCode = key.hashCode();
		int indexLen = indexCount();
		int keyCount = keyCount();
		int indexSlot = hashCode & (indexLen - 1);
		long indexValue = getIndexSlot(indexSlot);

		int baseKeyOffset = GHashHeaderImpl.getHeaderKeyOffset(data.getByteBuffer());
		int baseValueOffset = GHashHeaderImpl.getHeaderValueOffset(data.getByteBuffer());

		AtomicReference uncompressKeyData = new AtomicReference<>();
		AtomicReference uncompressValueData = new AtomicReference<>();

		if (indexValue != gHashHeader.getInitialIndexValue()) {
			int count = gHashHeader.getCountByIndexValue(indexValue);
			int startSlot = gHashHeader.getSlotByIndexValue(indexValue);

			for (int j = 0; j < count; j++) {
				if (getHashCode(indexLen, startSlot + j) == hashCode) {
					K k = getKeyBySlot(keyCount, baseKeyOffset, baseValueOffset, startSlot + j, uncompressKeyData);
					if (k == key || key.equals(k)) {
						return getBinaryValue(keyCount, indexLen, baseValueOffset, startSlot + j, uncompressValueData);
					}
				}

			}
		}
		return null;
	}

	@Override
	public long getCompactionCount() {
		return data == null ? 0 : GHashHeaderImpl.getHeaderStatCompactionCount(data.getByteBuffer());
	}

	@Override
	public int bytesSize() {
		return data == null ? 0 : data.capacity();
	}

	public byte getDataType() {
		Preconditions.checkArgument(data != null, "BUG");
		return GHashHeaderImpl.getHeadPageType(data.getByteBuffer());
	}

	@Override
	public int keyCount() {
		return data == null ? 0 : GHashHeaderImpl.getHeaderTotalKeyCount(data.getByteBuffer());
	}

	@Override
	public int size() {
		//map interface.
		return data == null ? 0 : GHashHeaderImpl.getHeaderTotalKeyCount(data.getByteBuffer());
	}

	@Override
	public int indexCount() {
		return data == null ? 0 : GHashHeaderImpl.getHeaderIndexCount(data.getByteBuffer());
	}

	public int getLogicPageId() {
		return data == null ? -1 : GHashHeaderImpl.getHeadPageID(data.getByteBuffer());
	}

	public long getVersion() {
		return data == null ? -1 : GHashHeaderImpl.getHeaderVersion(data.getByteBuffer());
	}

	public long getIndexSlot(int indexSlot) {
		return data == null ? -1 : gHashHeader.getIndexBySlot(data.getByteBuffer(), indexSlot);
	}

	@Override
	public int getHashCode(int indexLen, int keyCursor) {
		return data == null ? -1 : gHashHeader.getHashCode(data.getByteBuffer(), indexLen, keyCursor);
	}

	@VisibleForTesting
	public K getKeyBySlot(
		int keyCount,
		int baseKeyOffset,
		int baseValueOffset,
		int keySlot,
		AtomicReference uncompressKeyData) {
		Preconditions.checkArgument(data != null, "BUG");
		int endKeyOffset = -1;
		int startKeyOffset = -1;
		try {

			int uncompressKeySize = getUncompressKeyPartSize();
			ByteBuffer finalBB = data.getByteBuffer();
			if (uncompressKeySize != -1) {

				ByteBuffer uncompressKeyBB = uncompressKeyData == null ? null : uncompressKeyData.get();
				if (uncompressKeyBB == null) {
					uncompressKeyBB = tryUnCompress(getCompressAlgorithm(),
						uncompressKeySize,
						data.getByteBuffer(),
						baseKeyOffset,
						baseValueOffset);
					if (uncompressKeyData != null) {
						uncompressKeyData.set(uncompressKeyBB);
					}
				}
				finalBB = uncompressKeyBB;
				baseKeyOffset = 0;
			}

			endKeyOffset = GHashHeaderImpl.getEndOffsetBySlot(finalBB, baseKeyOffset, keySlot);

			if (keySlot == 0) {
				startKeyOffset = keyCount * Integer.BYTES;
			} else {
				startKeyOffset = GHashHeaderImpl.getEndOffsetBySlot(finalBB, baseKeyOffset, keySlot - 1);
			}

			ByteBufferDataInputView byteBufferDataInputView = new ByteBufferDataInputView(finalBB,
				baseKeyOffset + startKeyOffset,
				endKeyOffset - startKeyOffset);
			return keyTypeSerializer.deserialize(byteBufferDataInputView);
		} catch (Exception e) {
			String debuginfo = "baseKeyOffset=" + baseKeyOffset + " ,startOffset= " + (startKeyOffset) + " ;endOffset=" + endKeyOffset;
			debuginfo += " ,debugInfo=" + debugInfo();
			throw new GeminiRuntimeException("getKeyBySlot has Exception. " + debuginfo, e);
		}
	}

	private String debugInfo() {
		StringBuilder sb = new StringBuilder();
		int indexLen = indexCount();
		int keyCount = keyCount();
		int baseKeyOffset = GHashHeaderImpl.getHeaderKeyOffset(data.getByteBuffer());
		int baseValueOffset = GHashHeaderImpl.getHeaderValueOffset(data.getByteBuffer());
		sb.append(" dataClass=" + data.getClass().getSimpleName());
		sb.append(" ,byteBuffer=" + data.getByteBuffer().getClass().getSimpleName());
		sb.append(" ,position=" +  data.getByteBuffer().position());
		sb.append(" ,limit=" +  data.getByteBuffer().limit());
		sb.append(" ,capacity=" +  data.getByteBuffer().capacity());
		sb.append(" ;indexCount=" + indexLen);
		sb.append(" ;keyCount=" + keyCount);
		sb.append(" ;baseKeyOffset=" + baseKeyOffset);
		sb.append(" ;baseValueOffset=" + baseValueOffset);
		sb.append(" ;dataLen=" + data.capacity());
		sb.append(" ;originChecksum=" + this.originCheckSum);
		sb.append(" ;currentChecksum=" + genOriginCheckSum(data.getByteBuffer()));
		return sb.toString();
	}

	public static  GBinaryHashMap of(
		DataPage.DataPageType dataPageType,
		List>> keyValueList,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		long version,
		int logicPageId,
		Allocator allocator,
		long compactionCount,
		GCompressAlgorithm gCompressAlgorithm) {

		int totalKeys = keyValueList.size();
		if (totalKeys == 0) {
			return EMPTY_G_BINARY_HASHMAP;
		}

		int indexLen = MathUtils.roundUpToPowerOfTwo(totalKeys);
		GHashHeaderImpl pageHelper = GHashHeaderImpl.getPageHelper(indexLen);

		GByteBuffer gbyteBuffer = genByteBuffer(dataPageType,
			keyValueList,
			indexLen,
			totalKeys,
			pageHelper,
			keySerializer,
			valueSerializer,
			version,
			logicPageId,
			allocator,
			compactionCount,
			gCompressAlgorithm);

		return new GBinaryHashMap<>(pageHelper,
			gbyteBuffer,
			keySerializer,
			genOriginCheckSum(gbyteBuffer.getByteBuffer()));
	}

	private static int genOriginCheckSum(ByteBuffer bb) {
		if (bb == null) {
			return 0;
		}
		int position = bb.position();
		CRC32 crc321 = new CRC32();
		crc321.update(bb);
		long crc = crc321.getValue();
		bb.position(position);
		return (int) crc;
	}

	public static  GBinaryHashMap ofBinaryList(
		DataPage.DataPageType dataPageType,
		boolean isMajor,
		long version,
		int logicPageId,
		TypeSerializer keyTypeSerializer,
		Allocator allocator,
		Map newMap,
		long compactionCount,
		@Nullable StateFilter stateFilter,
		@Nullable GRegionContext gRegionContext) {
		List> dataSet = new ArrayList<>();

		for (Map.Entry entry : newMap.entrySet()) {
			BinaryValue binaryValue = entry.getValue();
			if (isMajor && (binaryValue.getgValueType() == GValueType.Delete || (stateFilter != null && stateFilter.filter(
				gRegionContext,
				binaryValue.getSeqID())))) {
				continue;
			}
			dataSet.add(Tuple2.of(entry.getKey(), binaryValue));
		}

		int totalKeys = dataSet.size();
		if (totalKeys == 0) {
			return EMPTY_G_BINARY_HASHMAP;
		}
		int indexLen = MathUtils.roundUpToPowerOfTwo(totalKeys);
		GHashHeaderImpl pageHelper = GHashHeaderImpl.getPageHelper(indexLen);
		GByteBuffer gByteBuffer = genByteBufferByBinary(dataPageType,
			dataSet,
			indexLen,
			totalKeys,
			pageHelper,
			version,
			logicPageId,
			allocator,
			compactionCount,
			gRegionContext == null
				? GCompressAlgorithm.None
				: gRegionContext.getGContext().getInPageGCompressAlgorithm());
		return new GBinaryHashMap<>(pageHelper,
			gByteBuffer,
			keyTypeSerializer,
			genOriginCheckSum(gByteBuffer.getByteBuffer()));
	}

	public byte[] getDataByte(boolean checkSum) {
		Preconditions.checkArgument(data != null, "BUG");
		if (data.getByteBuffer().hasArray()) {
			if (checkSum) {
				CRC32 crc32 = new CRC32();
				crc32.update(data.getByteBuffer().array());
				int crc = (int) crc32.getValue();

				if (crc != this.originCheckSum) {
					throw new GeminiRuntimeException("checkSum changed! originCheckSum=" + originCheckSum + " ,now=" + crc);
				}
			}
			return data.getByteBuffer().array();
		} else {
			byte[] result = new byte[data.capacity()];
			ByteBufferUtils.copyFromBufferToArray(data.getByteBuffer(), result, 0, 0, data.capacity());
			if (checkSum) {
				CRC32 crc32 = new CRC32();
				crc32.update(result);
				int crc = (int) crc32.getValue();

				if (crc != this.originCheckSum) {
					throw new GeminiRuntimeException("checkSum changed! originCheckSum=" + originCheckSum + " ,now=" + crc);
				}
			}
			return result;
		}
	}

	public ByteBuffer getData() {
		return data.getByteBuffer();
	}

	public GByteBuffer getGByteBuffer() {
		return data;
	}

	@Override
	public Map getBinaryMap() {
		int keyCount = keyCount();
		Map dataMap = new HashMap<>(keyCount);
		if (keyCount == 0) {
			return dataMap;
		}
		int indexLen = indexCount();
		int baseKeyOffset = GHashHeaderImpl.getHeaderKeyOffset(data.getByteBuffer());
		int baseValueOffset = GHashHeaderImpl.getHeaderValueOffset(data.getByteBuffer());
		AtomicReference uncompressKeyData = new AtomicReference<>();
		AtomicReference uncompressValueData = new AtomicReference<>();
		for (int i = 0; i < keyCount; i++) {
			//BinaryKey
			BinaryKey binaryKey = getBinaryKey(keyCount,
				indexLen,
				baseKeyOffset,
				baseValueOffset,
				i,
				uncompressKeyData);

			//BinaryValue
			BinaryValue binaryValue = getBinaryValue(keyCount, indexLen, baseValueOffset, i, uncompressValueData);
			dataMap.put(binaryKey, binaryValue);
		}

		return dataMap;
	}

	protected BinaryKey getBinaryKey(
		int keyCount,
		int indexLen,
		int baseKeyOffset,
		int baseValueOffset,
		int i,
		AtomicReference uncompressKeyData) {
		try {
			int uncompressKeySize = getUncompressKeyPartSize();
			ByteBuffer finalBB = data.getByteBuffer();
			if (uncompressKeySize != -1) {

				ByteBuffer uncompressKeyBB = uncompressKeyData == null ? null : uncompressKeyData.get();
				if (uncompressKeyBB == null) {
					uncompressKeyBB = tryUnCompress(getCompressAlgorithm(),
						uncompressKeySize,
						data.getByteBuffer(),
						baseKeyOffset,
						baseValueOffset);
					if (uncompressKeyData != null) {
						uncompressKeyData.set(uncompressKeyBB);
					}
				}
				finalBB = uncompressKeyBB;
				baseKeyOffset = 0;
			}

			int endKeyOffset = GHashHeaderImpl.getEndOffsetBySlot(finalBB, baseKeyOffset, i);

			int startKeyOffset;
			if (i == 0) {
				startKeyOffset = keyCount * Integer.BYTES;
			} else {
				startKeyOffset = GHashHeaderImpl.getEndOffsetBySlot(finalBB, baseKeyOffset, i - 1);
			}

			int hashCode = getHashCode(indexLen, i);
			return new BinaryKey(finalBB, baseKeyOffset + startKeyOffset, endKeyOffset - startKeyOffset, hashCode);
		} catch (Exception e) {
			String debuginfo = "keyCount=" + keyCount + " ,indexLen=" + indexLen + " ,slot=" + i + " ,startOffset= " + (baseKeyOffset) + " ;endOffset=" + baseValueOffset;
			debuginfo += " ,debugInfo=" + debugInfo();
			LOG.error(debuginfo, e);
			throw new GeminiRuntimeException("getKeyBySlot has Exception. " + debuginfo, e);
		}
	}

	protected BinaryValue getBinaryValue(
		int keyCount, int indexLen, int baseValueOffset, int slotNum, AtomicReference uncompressValueData) {
		try {
			BinaryValue binaryValue;
			long seqID = this.gHashHeader.getSeqIDBytSlot(data.getByteBuffer(), indexLen, keyCount, slotNum);

			int uncompressValuePartSize = getUncompressValuePartSize();
			ByteBuffer finalBB = data.getByteBuffer();
			if (uncompressValuePartSize != -1) {
				ByteBuffer uncompressValueBB = uncompressValueData == null ? null : uncompressValueData.get();
				if (uncompressValueBB == null) {
					uncompressValueBB = tryUnCompress(getCompressAlgorithm(),
						uncompressValuePartSize,
						data.getByteBuffer(),
						baseValueOffset,
						data.capacity());
					if (uncompressValueData != null) {
						uncompressValueData.set(uncompressValueBB);
					}
				}
				finalBB = uncompressValueBB;
				baseValueOffset = 0;
			}

			int valueIndicate = GHashHeaderImpl.getEndOffsetBySlot(finalBB, baseValueOffset, slotNum);
			GValueType gValueType = GValueType.valueOf((byte) (valueIndicate >>> VALUE_TYPE_INDICATOR_BITS));
			if (gValueType == GValueType.Delete) {
				binaryValue = new BinaryValue(null, GValueType.Delete, seqID, -1, -1);
			} else {
				int endValueOffset = valueIndicate & VALUE_TYPE_INDICATOR_MARK;
				int startValueOffset;
				if (slotNum == 0) {
					startValueOffset = keyCount * Integer.BYTES;
				} else {
					startValueOffset = (GHashHeaderImpl.getEndOffsetBySlot(finalBB,
						baseValueOffset,
						slotNum - 1)) & VALUE_TYPE_INDICATOR_MARK;
				}
				binaryValue = new BinaryValue(finalBB,
					gValueType,
					seqID,
					baseValueOffset + startValueOffset,
					endValueOffset - startValueOffset);
			}
			return binaryValue;
		} catch (Exception e) {
			String debuginfo = "keyCount=" + keyCount + " ,indexLen=" + indexLen + " ,slotNum=" + slotNum + " ,baseValueOffset=" + baseValueOffset;
			debuginfo += " ,debugInfo=" + debugInfo();
			LOG.error(debuginfo, e);
			throw new GeminiRuntimeException("getKeyBySlot has Exception. " + debuginfo, e);
		}
	}

	@Override
	public TypeSerializer getKeyTypeSerializer() {
		return this.keyTypeSerializer;
	}

	@VisibleForTesting
	public  Map> toPOJOMap(TypeSerializer valueTypeSerializer) {
		Map> result = new HashMap<>();
		return toPOJOMap(result, valueTypeSerializer);
	}

	@VisibleForTesting
	public  Map> toPOJOMap(Map> result, TypeSerializer valueTypeSerializer) {
		try {
			int keyCount = keyCount();
			if (keyCount == 0) {
				return result;
			}
			int indexLen = indexCount();
			int baseKeyOffset = GHashHeaderImpl.getHeaderKeyOffset(data.getByteBuffer());
			int baseValueOffset = GHashHeaderImpl.getHeaderValueOffset(data.getByteBuffer());
			AtomicReference uncompressData = new AtomicReference<>();
			AtomicReference uncompressValueData = new AtomicReference<>();

			for (int i = 0; i < keyCount; i++) {
				K k = getKeyBySlot(keyCount, baseKeyOffset, baseValueOffset, i, uncompressData);
				BinaryValue binaryValue = getBinaryValue(keyCount, indexLen, baseValueOffset, i, uncompressValueData);
				if (binaryValue.getgValueType() == GValueType.Delete) {
					result.put(k, new GSValue<>(null, binaryValue.getgValueType(), binaryValue.getSeqID()));
				} else {
					DataInputView byteBufferDataInputView = new ByteBufferDataInputView(binaryValue.getBb(),
						binaryValue.getValueOffset(),
						binaryValue.getValueLen());
					V value = valueTypeSerializer.deserialize(byteBufferDataInputView);
					result.put(k, new GSValue<>(value, binaryValue.getgValueType(), binaryValue.getSeqID()));
				}
			}
			return result;
		} catch (Exception e) {
			throw new GeminiRuntimeException(e);
		}
	}

	public Set toPOJOSet() {
		Set result = new HashSet<>();
		return toPOJOSet(result);
	}

	public Set toPOJOSet(Set result) {
		try {
			int keyCount = keyCount();
			if (keyCount == 0) {
				return result;
			}
			int baseKeyOffset = GHashHeaderImpl.getHeaderKeyOffset(data.getByteBuffer());
			int baseValueOffset = GHashHeaderImpl.getHeaderValueOffset(data.getByteBuffer());
			AtomicReference uncompressData = new AtomicReference<>();

			for (int i = 0; i < keyCount; i++) {
				K k = getKeyBySlot(keyCount, baseKeyOffset, baseValueOffset, i, uncompressData);
				result.add(k);
			}
			return result;
		} catch (Exception e) {
			throw new GeminiRuntimeException(e);
		}
	}

	protected static  GByteBuffer genByteBuffer(
		DataPage.DataPageType dataPageType,
		List>> keyValueList,
		int indexLen,
		int totalKeys,
		GHashHeader gHeader,
		TypeSerializer keySerializer,
		TypeSerializer valueSerializer,
		long version,
		int logicPageId,
		Allocator allocator,
		long compactionCount,
		GCompressAlgorithm gCompressAlgorithm) {
		try {
			//sort List
			Collections.sort(keyValueList, Comparator.comparingInt((entry) -> entry.f0.hashCode() & (indexLen - 1)));

			byte[] headerAndIndex = new byte[gHeader.getHeaderAndIndexLen(indexLen, totalKeys)];
			ByteBuffer headerAndIndexBB = ByteBuffer.wrap(headerAndIndex);
			//init index
			gHeader.initIndex(headerAndIndexBB, indexLen);
			GByteArrayOutputStreamWithPos outputStreamForKey = new GByteArrayOutputStreamWithPos(1024);
			DataOutputViewStreamWrapper outputViewForKey = new DataOutputViewStreamWrapper(outputStreamForKey);
			int lastKeyPosition = totalKeys * Integer.BYTES;

			GByteArrayOutputStreamWithPos outputStreamForValue = new GByteArrayOutputStreamWithPos(1024);
			DataOutputViewStreamWrapper outputViewForValue = new DataOutputViewStreamWrapper(outputStreamForValue);
			int lastValuePosition = totalKeys * Integer.BYTES;

			Iterator>> iterator = keyValueList.iterator();
			int keyCursor = 0;
			while (iterator.hasNext()) {
				Tuple2> entry = iterator.next();
				int hashCode = entry.f0.hashCode();
				int indexSlot = hashCode & (indexLen - 1);
				long oldIndexValue = gHeader.getIndexBySlot(headerAndIndexBB, indexSlot);
				long newIndexValue = gHeader.getNewIndexValue(oldIndexValue, keyCursor);

				gHeader.writeIndexBySlot(headerAndIndexBB, indexSlot, newIndexValue);
				gHeader.writeHashCode(headerAndIndexBB, indexLen, keyCursor, hashCode);

				//write key
				outputStreamForKey.setPosition(lastKeyPosition);

				keySerializer.serialize(entry.f0, outputViewForKey);
				lastKeyPosition = outputStreamForKey.getPosition();

				//write key indicator
				outputStreamForKey.setPosition(keyCursor * Integer.BYTES);
				IntSerializer.INSTANCE.serialize(lastKeyPosition, outputViewForKey);

				GSValue gsValue = entry.f1;
				//write value
				if (gsValue.getValueType() != GValueType.Delete) {
					outputStreamForValue.setPosition(lastValuePosition);
					valueSerializer.serialize(gsValue.getValue(), outputViewForValue);
					lastValuePosition = outputStreamForValue.getPosition();
				}

				//write value indicator
				outputStreamForValue.setPosition(keyCursor * Integer.BYTES);
				int valueIndicator = (gsValue.getValueType().getCode() << VALUE_TYPE_INDICATOR_BITS) | (lastValuePosition & VALUE_TYPE_INDICATOR_MARK);
				IntSerializer.INSTANCE.serialize(valueIndicator, outputViewForValue);

				gHeader.writeSeqIDBytSlot(headerAndIndexBB, indexLen, totalKeys, gsValue.getSeqID(), keyCursor);

				keyCursor++;
			}
			outputStreamForKey.setPosition(lastKeyPosition);
			outputStreamForValue.setPosition(lastValuePosition);

			ByteBuffer keyBytes = ByteBuffer.wrap(outputStreamForKey.getBuf(), 0, lastKeyPosition);
			ByteBuffer valueBytes = ByteBuffer.wrap(outputStreamForValue.getBuf(), 0, lastValuePosition);
			ByteBuffer keyCompressed = tryCompress(gCompressAlgorithm, keyBytes);
			ByteBuffer valueCompressed = tryCompress(gCompressAlgorithm, valueBytes);

			//-1 means no compress
			if (keyCompressed != null) {
				GHashHeaderImpl.writeHeaderKeyUncompressSize(headerAndIndexBB, lastKeyPosition);
				//set
				keyBytes = keyCompressed;
				lastKeyPosition = keyBytes.limit();
			} else {
				GHashHeaderImpl.writeHeaderKeyUncompressSize(headerAndIndexBB, -1);
			}

			if (valueCompressed != null) {
				GHashHeaderImpl.writeHeaderValueUncompressSize(headerAndIndexBB, lastValuePosition);
				valueBytes = valueCompressed;
				lastValuePosition = valueBytes.limit();
			} else {
				GHashHeaderImpl.writeHeaderValueUncompressSize(headerAndIndexBB, -1);
			}

			//write Header
			GHashHeaderImpl.writeHeadPageType(headerAndIndexBB, dataPageType.getCode());
			GHashHeaderImpl.writeHeadPageID(headerAndIndexBB, logicPageId);
			GHashHeaderImpl.writeHeaderTotalKeyCount(headerAndIndexBB, totalKeys);
			GHashHeaderImpl.writeHeaderIndexCount(headerAndIndexBB, indexLen);
			GHashHeaderImpl.writeHeaderKeyOffset(headerAndIndexBB, headerAndIndex.length);
			GHashHeaderImpl.writeHeaderValueOffset(headerAndIndexBB, headerAndIndex.length + lastKeyPosition);
			GHashHeaderImpl.writeHeaderVersion(headerAndIndexBB, version);
			GHashHeaderImpl.writeHeaderCompressCode(headerAndIndexBB, gCompressAlgorithm.getCode());
			GHashHeaderImpl.writeHeaderStatCompactionCount(headerAndIndexBB, compactionCount);

			int newBufferLen = headerAndIndex.length + lastKeyPosition + lastValuePosition;

			GByteBuffer gByteBuffer = allocator.allocate(newBufferLen);

			//TODO reduce copy num. GByteBuffer support multi ByteBuffer.
			ByteBufferUtils.copyFromArrayToBuffer(gByteBuffer.getByteBuffer(),
				0,
				headerAndIndex,
				0,
				headerAndIndex.length);
			ByteBufferUtils.copyFromBufferToBuffer(keyBytes,
				gByteBuffer.getByteBuffer(),
				0,
				headerAndIndex.length,
				lastKeyPosition);
			ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
				gByteBuffer.getByteBuffer(),
				0,
				headerAndIndex.length + lastKeyPosition,
				lastValuePosition);

			return gByteBuffer;
		} catch (Exception e) {
			throw new GeminiRuntimeException("GBinaryHashMap get exception: " + e.getMessage(), e);
		}
	}

	protected static GByteBuffer genByteBufferByBinary(
		DataPage.DataPageType dataPageType,
		List> keyValueList,
		int indexLen,
		int totalKeys,
		GHashHeader gHeader,
		long version,
		int logicPageId,
		Allocator allocator,
		long compactionCount,
		GCompressAlgorithm gCompressAlgorithm) {

		try {
			//sort List
			Collections.sort(keyValueList,
				Comparator.comparingInt((entry) -> entry.f0.getKeyhashCode() & (indexLen - 1)));

			byte[] headerAndIndex = new byte[gHeader.getHeaderAndIndexLen(indexLen, totalKeys)];
			ByteBuffer headerAndIndexBB = ByteBuffer.wrap(headerAndIndex);
			//init index
			gHeader.initIndex(headerAndIndexBB, indexLen);

			//dd
			GByteArrayOutputStreamWithPos keyByteStream = new GByteArrayOutputStreamWithPos(1024);
			int lastKeyPosition = totalKeys * Integer.BYTES;

			//dd
			GByteArrayOutputStreamWithPos valueByteStream = new GByteArrayOutputStreamWithPos(1024);
			int lastValuePosition = totalKeys * Integer.BYTES;

			Iterator> iterator = keyValueList.iterator();
			int keyCursor = 0;
			while (iterator.hasNext()) {
				Tuple2 entry = iterator.next();
				int hashCode = entry.f0.getKeyhashCode();
				int indexSlot = hashCode & (indexLen - 1);
				long oldIndexValue = gHeader.getIndexBySlot(headerAndIndexBB, indexSlot);
				long newIndexValue = gHeader.getNewIndexValue(oldIndexValue, keyCursor);

				gHeader.writeIndexBySlot(headerAndIndexBB, indexSlot, newIndexValue);
				gHeader.writeHashCode(headerAndIndexBB, indexLen, keyCursor, hashCode);

				//write key
				keyByteStream.setPosition(lastKeyPosition);
				keyByteStream.write(entry.f0.getBb(), entry.f0.getKeyOffset(), entry.f0.getKeyLen());
				lastKeyPosition = keyByteStream.getPosition();

				//write key indicator
				keyByteStream.setPosition(keyCursor * Integer.BYTES);
				keyByteStream.writeInt(lastKeyPosition);

				//write value
				if (entry.f1.getgValueType() != GValueType.Delete) {
					valueByteStream.setPosition(lastValuePosition);
					//TODO mapvalue will be null in same case.
					valueByteStream.write(entry.f1.getBb(), entry.f1.getValueOffset(), entry.f1.getValueLen());
					lastValuePosition = valueByteStream.getPosition();
				}

				//write value indicator
				valueByteStream.setPosition(keyCursor * Integer.BYTES);
				int valueIndicator = (entry.f1.getgValueType().getCode() << VALUE_TYPE_INDICATOR_BITS) | (lastValuePosition & VALUE_TYPE_INDICATOR_MARK);
				valueByteStream.writeInt(valueIndicator);

				gHeader.writeSeqIDBytSlot(headerAndIndexBB, indexLen, totalKeys, entry.f1.getSeqID(), keyCursor);

				keyCursor++;
			}
			keyByteStream.setPosition(lastKeyPosition);
			valueByteStream.setPosition(lastValuePosition);

			ByteBuffer keyBytes = ByteBuffer.wrap(keyByteStream.getBuf(), 0, lastKeyPosition);
			ByteBuffer valueBytes = ByteBuffer.wrap(valueByteStream.getBuf(), 0, lastValuePosition);
			ByteBuffer keyCompressed = tryCompress(gCompressAlgorithm, keyBytes);
			ByteBuffer valueCompressed = tryCompress(gCompressAlgorithm, valueBytes);

			//-1 means no compress
			if (keyCompressed != null) {
				GHashHeaderImpl.writeHeaderKeyUncompressSize(headerAndIndexBB, lastKeyPosition);
				//set
				keyBytes = keyCompressed;
				lastKeyPosition = keyBytes.limit();
			} else {
				GHashHeaderImpl.writeHeaderKeyUncompressSize(headerAndIndexBB, -1);
			}

			if (valueCompressed != null) {
				GHashHeaderImpl.writeHeaderValueUncompressSize(headerAndIndexBB, lastValuePosition);
				valueBytes = valueCompressed;
				lastValuePosition = valueBytes.limit();
			} else {
				GHashHeaderImpl.writeHeaderValueUncompressSize(headerAndIndexBB, -1);
			}

			//write Header
			GHashHeaderImpl.writeHeadPageType(headerAndIndexBB, dataPageType.getCode());
			GHashHeaderImpl.writeHeadPageID(headerAndIndexBB, logicPageId);
			GHashHeaderImpl.writeHeaderTotalKeyCount(headerAndIndexBB, totalKeys);
			GHashHeaderImpl.writeHeaderIndexCount(headerAndIndexBB, indexLen);
			GHashHeaderImpl.writeHeaderKeyOffset(headerAndIndexBB, headerAndIndex.length);
			GHashHeaderImpl.writeHeaderValueOffset(headerAndIndexBB, headerAndIndex.length + lastKeyPosition);
			GHashHeaderImpl.writeHeaderVersion(headerAndIndexBB, version);
			GHashHeaderImpl.writeHeaderCompressCode(headerAndIndexBB, gCompressAlgorithm.getCode());
			GHashHeaderImpl.writeHeaderStatCompactionCount(headerAndIndexBB, compactionCount);

			int newBufferLen = headerAndIndex.length + lastKeyPosition + lastValuePosition;

			GByteBuffer byteBuffer = allocator.allocate(newBufferLen);

			//TODO reduce copy num.
			ByteBufferUtils.copyFromArrayToBuffer(byteBuffer.getByteBuffer(),
				0,
				headerAndIndex,
				0,
				headerAndIndex.length);
			ByteBufferUtils.copyFromBufferToBuffer(keyBytes,
				byteBuffer.getByteBuffer(),
				0,
				headerAndIndex.length,
				lastKeyPosition);
			ByteBufferUtils.copyFromBufferToBuffer(valueBytes,
				byteBuffer.getByteBuffer(),
				0,
				headerAndIndex.length + lastKeyPosition,
				lastValuePosition);

			return byteBuffer;
		} catch (Exception e) {
			throw new GeminiRuntimeException("GBinaryHashMap get exception: " + e.getMessage(), e);
		}
	}

	private static ByteBuffer tryCompress(GCompressAlgorithm algorithm, ByteBuffer bytes) {
		try {
			CompressorCodec compressorCodec = algorithm.getCompressorCodec();
			if (compressorCodec != null) {
				if (bytes.limit() > 1024) {
					ByteBuffer result = ByteBuffer.allocate(compressorCodec.getMaxCompressedSize(bytes.limit()));
					int compressedLen = compressorCodec.compress(bytes, result);
					if (result.position() == compressedLen) {
						return (ByteBuffer) result.flip();
					} else {
						LOG.error("compress fail.");
					}
				}
			}

		} catch (Exception e) {
			LOG.error("compress has exception. bytes info=" + bytes.position() + "/" + bytes.limit() + "/" + bytes.capacity());
		}
		return null;
	}

	private static ByteBuffer tryUnCompress(
		GCompressAlgorithm algorithm, int uncompressSize, ByteBuffer bytes, int baseKeyOffset, int baseValueOffset) {
		try {
			ByteBuffer compressKeyBB = bytes.duplicate();
			compressKeyBB.limit(baseValueOffset);
			compressKeyBB.position(baseKeyOffset);
			ByteBuffer src = compressKeyBB.slice();
			ByteBuffer result = ByteBuffer.allocate(uncompressSize);
			CompressorCodec compressorCodec = algorithm.getCompressorCodec();
			int unComp = compressorCodec.decompress(src, result);
			if (unComp != uncompressSize) {
				throw new GeminiRuntimeException("uncompress size not match: expected:" + uncompressSize + " ;real=" + unComp);
			}
			return result;
		} catch (Exception e) {
			String msg = "uncompress has Exception: " + uncompressSize + " ;endOffset=" + baseValueOffset + " ;startOffset=" + baseKeyOffset + " ;bytes.info=" + bytes.position() + "/" + bytes.limit() + "/" + bytes.capacity();
			LOG.error(msg, e);
			throw new GeminiRuntimeException(msg, e);
		}
	}

	public GCompressAlgorithm getCompressAlgorithm() {
		byte code = GHashHeaderImpl.getHeaderCompressCode(data.getByteBuffer());
		return GCompressAlgorithm.valueOf(code);
	}

	public int getUncompressKeyPartSize() {
		return GHashHeaderImpl.getHeaderKeyUncompressSize(data.getByteBuffer());
	}

	public int getUncompressValuePartSize() {
		return GHashHeaderImpl.getHeaderValueUncompressSize(data.getByteBuffer());
	}

	public void addReferenceCount() {
		if (data != null) {
			data.contain();
		}
	}

	public void delReferenceCount(ReleaseType releaseType) {
		if (data != null) {
			data.release(releaseType);
		}
	}

	public int getOriginChecksum() {
		return originCheckSum;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy