All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.AbstractHashPageStore Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.GRegionID;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiShutDownException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.handler.GeminiEventExecutorTask;
import org.apache.flink.runtime.state.gemini.engine.handler.PageCompactHandler;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryKey;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBufferAddressMapping;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.runtime.state.gemini.engine.vm.DataPageLRU;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.NO_PAGE;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.WAIT_SPLITTING_PAGE;
import static org.apache.flink.util.Preconditions.checkState;

/**
 * General implementation of page store, where page index act as {@link PageIndexHashImpl}.
 */
public abstract class AbstractHashPageStore implements PageStore {
	private static final Logger LOG = LoggerFactory.getLogger(AbstractHashPageStore.class);
	private static final int MIN_NEW_PAGE_SIZE_TO_PERSIST = 2048;
	//TODO provide HashCodePageIndex interface.
	protected final PageIndexHashImpl pageIndex;
	protected final GRegionContext gRegionContext;
	protected final EventExecutor eventExecutor;
	protected final CacheManager cacheManager;
	protected final GContext gContext;
	protected final DataPage.DataPageType dataPageType;

	private final int splitPageSizeThreshold;
	private final int maxChainLenThreshold;
	protected final GRegion gRegion;
	private final PageCompactHandler pageCompactHandler;
	private final int inMemoryCompactionThreshold;
	private final int maxRunningMajorCompaction;
	private final int maxRunningMinorCompaction;
	protected final PageSerdeFlink pageSerdeFlink;

	private final EventExecutor lruIntoMainEventExecutor;
	private long lastLruIntoMainCacheTimeMs = -1;
	private final int lruIntoMainCacheSleepMs;
	private final boolean enableAddIntoMainWhenSplitting;
	private final boolean enableLoadPageFromLRUIntoMainCache;

	protected final long curRegionMemHighMark;

	public AbstractHashPageStore(
		DataPage.DataPageType dataPageType,
		GRegion gRegion,
		EventExecutor eventExecutor) {
		this(dataPageType, gRegion, null, eventExecutor);
	}

	public AbstractHashPageStore(
		DataPage.DataPageType dataPageType,
		GRegion gRegion,
		@Nullable PageIndex pageIndex,
		EventExecutor eventExecutor) {

		this.dataPageType = dataPageType;
		this.gRegion = gRegion;
		this.gRegionContext = gRegion.getGRegionContext();
		this.eventExecutor = eventExecutor;
		GConfiguration configuration = gRegionContext.getGContext().getGConfiguration();
		if (pageIndex != null) {
			//TODO #SR rewrite this to use interface.
			this.pageIndex = (PageIndexHashImpl) pageIndex;
		} else {
			this.pageIndex = new PageIndexHashImpl<>(configuration, this, gRegionContext.getPageStoreStats());
		}
		this.gContext = gRegionContext.getGContext();
		this.cacheManager = this.gContext.getSupervisor().getCacheManager();
		gRegionContext.getPageStoreStats().setPageSizeRate(configuration.getPageSizeRateBetweenPOJOAndHeap());
		this.splitPageSizeThreshold = configuration.getSplitPageSizeThreshold();
		this.maxChainLenThreshold = configuration.getMaxCompactionThreshold();
		this.inMemoryCompactionThreshold = configuration.getInMemoryCompactionThreshold();
		this.maxRunningMajorCompaction = configuration.getMaxRunningMajorCompaction();
		this.maxRunningMinorCompaction = configuration.getMaxRunningMinorCompaction();
		this.pageSerdeFlink = (PageSerdeFlink) gRegionContext.getPageSerdeFlink();
		this.lruIntoMainCacheSleepMs = configuration.getLruIntoMainCacheSleepMs();
		this.enableAddIntoMainWhenSplitting = configuration.isEnableAddIntoMainWhenSplitting();
		this.enableLoadPageFromLRUIntoMainCache = configuration.isEnableLoadPageFromLRUIntoMain();
		this.curRegionMemHighMark = cacheManager.getMemHighMark() / configuration.getRegionThreadNum();
		this.lruIntoMainEventExecutor = gContext.getSupervisor().getLruIntoMainCacheExecutorGroup().next();
		this.pageCompactHandler = new PageCompactHandler() {
			@Override
			public void doAsyncMajorCompaction(
				PageIndexContext pageIndexContext,
				LogicalPageChain logicalPageChain,
				int curPageIndex,
				int curChainIndex,
				long version) {
				doMajorCompaction(pageIndexContext, logicalPageChain, curPageIndex, curChainIndex, version);
			}

			@Override
			public void doAsyncMinorCompaction(
				PageIndexContext pageIndexContext,
				LogicalPageChain logicalPageChain,
				int curChainIndex,
				long version,
				boolean force) {
				doMinorCompaction(pageIndexContext, logicalPageChain, curChainIndex, version, force);
			}

			@Override
			public void doSyncReplace(
				LogicalPageChain logicalPageChain,
				int curPageIndex,
				int oldCompactedPageSize,
				int oldCompactedSubPageNum,
				int oldCompactedSubPageSize,
				long oldRequestCount,
				int inclusiveCompactionStartChainIndex,
				int inclusiveCompactionEndChainIndex,
				DataPage compactedDataPage,
				List invalidPageAddressList,
				int relatedIndex) {
				doSyncReplaceLogicalPage(logicalPageChain,
					curPageIndex,
					oldCompactedPageSize,
					oldCompactedSubPageNum,
					oldCompactedSubPageSize,
					oldRequestCount,
					inclusiveCompactionStartChainIndex,
					inclusiveCompactionEndChainIndex,
					compactedDataPage,
					invalidPageAddressList,
					false,
					relatedIndex);
			}

			@Override
			public void doAsyncMinorCompactionByRead(
				PageIndexContext pageIndexContext,
				LogicalPageChain logicalPageChain,
				int curPageIndex,
				int curChainIndex,
				Map fetchedDataPageMap) {
				doMinorCompactionByRead(pageIndexContext, logicalPageChain, curPageIndex, curChainIndex, fetchedDataPageMap);
			}
		};
	}

	@Override
	public EventExecutor getExecutor() {
		return this.eventExecutor;
	}

	@Override
	public boolean contains(K key) {
		//for common kv, null means not contained.
		return get(key) != null;
	}

	@Override
	public PageIndex getPageIndex() {
		return pageIndex;
	}

	public DataPage.DataPageType getDataPageType() {
		return dataPageType;
	}

	@Override
	public void addPage(PageIndexContext pageIndexContext, List>> dataSet, long version) {
		LogicalPageChain currentLogicalPageChain = pageIndexContext.getLogicalPageChain();
		if (currentLogicalPageChain == NO_PAGE) {
			String msg = "BUG! addOrMergePage receive NO_PAGE request.";
			LOG.error(msg);
			throw new GeminiRuntimeException(msg);
		}
		if (dataSet == null || dataSet.isEmpty()) {
			if (!pageIndexContext.isNeedSplit()) {
				compactPage(pageIndexContext, version);
			}
		} else {
			doWriteDataToPage(pageIndexContext, dataSet, version);
		}
	}

	@Override
	public void compactPage(PageIndexContext pageIndexContext, final long version) {
		try {
			LogicalPageChain logicalPageChain = pageIndexContext.getLogicalPageChain();
			final int curPageIndex = pageIndexContext.getPageIndexID();
			if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
				//compacting has done.
				return;
			}
			//no more page to do compaction.
			if (logicalPageChain.getCurrentPageChainIndex() <= 0) {
				return;
			}

			if (!logicalPageChain.getPageStatus().canCompaction()) {
				return;
			}

			final int curChainIndex = logicalPageChain.getCurrentPageChainIndex();
			final LogicalPageChain compactionLogicalPageChain = logicalPageChain;
			//2. if need, launch an asynchronized major compaction handler
			if (logicalPageChain.getCurrentPageChainIndex() >= maxChainLenThreshold) {
				if (logicalPageChain.getPageStatus().canCompaction()) {

					gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(1);

					if (cacheManager.getCacheStats().getRunningMajorCompactedPages() > maxRunningMajorCompaction) {
						gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);

						// try to minor compaction in force mode
						tryLaunchMinorCompaction(
							pageIndexContext,
							version,
							logicalPageChain,
							curChainIndex,
							compactionLogicalPageChain,
							true);
						return;
					}

					if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
						gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
						return;
					}

					EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
					Set dataPages = compactionLogicalPageChain.getAllDataPageReferenced();
					if (gContext.isDBNormal() && !eventExecutor.isShuttingDown()) {
						eventExecutor.execute(new GeminiEventExecutorTask() {
							@Override
							public void cancel() {
								dataPages.forEach(dataPage -> dataPage.release());
							}

							@Override
							public void run() {
								try {
									pageCompactHandler.doAsyncMajorCompaction(
										pageIndexContext,
										compactionLogicalPageChain,
										curPageIndex,
										curChainIndex,
										version);
								} catch (GeminiShutDownException ignore) {
									LOG.debug("GeminiDB has shutdown!", ignore);
								} catch (Exception e) {
									LOG.error("async major compaction failed", e);
								} finally {
									dataPages.forEach(dataPage -> dataPage.release());
								}
							}
						});
					} else {
						dataPages.forEach(dataPage -> dataPage.release());
					}
				}
			} else if (logicalPageChain.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
				// try to do minor comaction, only involve page which is resident in memory.
				tryLaunchMinorCompaction(
					pageIndexContext,
					version,
					logicalPageChain,
					curChainIndex,
					compactionLogicalPageChain,
					false);
			}

		} catch (Exception e) {
			LOG.error("Bug " + e.getMessage(), e);
			throw new GeminiRuntimeException(e);
		}
	}

	private void tryLaunchMinorCompaction(
		PageIndexContext pageIndexContext,
		long version,
		LogicalPageChain logicalPageChain,
		int curChainIndex,
		LogicalPageChain compactionLogicalPageChain,
		boolean force) {
		if (logicalPageChain.getPageStatus().canCompaction()) {

			gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(1);

			if (!force) {
				if (cacheManager.getCacheStats().getRunningMinorCompactedPages() > maxRunningMinorCompaction) {
					gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
					return;
				}
			}

			int startCompactionIndex = curChainIndex;
			int memCandidatePage = 0;
			long lastSumCompactedThreshold = -1;

			while (startCompactionIndex >= 0) {
				PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
				//only stat, no need reference.
				DataPage dataPage = pageAddress.getDataPageNoReference();
				if (dataPage != null) {
					if (!force) {
						long compactedCount = dataPage.getCompactionCount();
						//improvement for minor compaction
						if (lastSumCompactedThreshold == -1) {
							lastSumCompactedThreshold = compactedCount;
						} else if (lastSumCompactedThreshold >= compactedCount) {
							lastSumCompactedThreshold += compactedCount;
						} else {
							break;
						}
					}
					memCandidatePage++;
					startCompactionIndex--;
				} else {
					break;
				}
			}

			if (memCandidatePage <= inMemoryCompactionThreshold) {
				gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
				return;
			}

			if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
				gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
				return;
			}
			EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
			eventExecutor.execute(() -> {
				try {
					pageCompactHandler.doAsyncMinorCompaction(
						pageIndexContext,
						compactionLogicalPageChain,
						curChainIndex,
						version,
						force);
				} catch (GeminiShutDownException ignore) {
					LOG.debug("GeminiDB has shutdown!", ignore);
				} catch (Exception e) {
					LOG.error("async minor compaction failed", e);
				}
			});
		}
	}

	private LogicalPageChain doSyncReplaceLogicalPage(
		LogicalPageChain logicalPageChain,
		int curPageIndex,
		int oldCompactedPageSize,
		int oldCompactedSubPageNum,
		int oldCompactedSubPageSize,
		long oldRequestCount,
		int inclusiveCompactionStartChainIndex,
		int inclusiveCompactionEndChainIndex,
		DataPage compactedDataPage,
		List invalidPageAddressList,
		boolean isSplit,
		int relatedIndex) {

		if (isSplit) {
			if (pageIndex.getLogicPage(curPageIndex) != WAIT_SPLITTING_PAGE) {
				if (compactedDataPage != null) {
					//compactedDataPage is not used, delReference to reclaim mem.
					compactedDataPage.release();
				}
				return null;
			}
		} else {
			if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
				//compacting has done.
				if (compactedDataPage != null) {
					//compactedDataPage is not used, delReference to reclaim mem.
					compactedDataPage.release();
				}
				return null;
			}
		}

		int compactedPageSize = 0;
		int compactedMemSize = 0;
		int compactedSubPageNum = 0;
		int compactedSubPageSize = 0;
		PageAddress compactedPageAddress = null;
		LogicalPageChain compactedLogicalPageChain = pageIndex.createLogicalPageChain();

		for (int i = 0; i < inclusiveCompactionStartChainIndex; i++) {
			compactedLogicalPageChain.insertPage(logicalPageChain.getPageAddress(i));
		}
		if (compactedDataPage != null) {
			//Major compaction maybe get a null page. and it never be null during Minor compaction, because even by TTL
			// or removeAll, minor compaction will keep these data.
			compactedPageAddress = compactedLogicalPageChain.createPage(compactedDataPage);
			//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
			compactedPageSize = compactedPageAddress.getDataLen();
			//map split maybe keep the subPage in disk.
			compactedMemSize = compactedPageAddress.getMemorySize();
			compactedSubPageNum = compactedPageAddress.getSubPageNum();
			compactedSubPageSize = compactedPageAddress.getSubPageDataLen();
			compactedPageAddress.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
				(int) (oldRequestCount & 0X7FFFFFFF));
		}
		for (int i = inclusiveCompactionEndChainIndex + 1; i <= logicalPageChain.getCurrentPageChainIndex(); i++) {
			compactedLogicalPageChain.insertPage(logicalPageChain.getPageAddress(i));
		}

		compactedLogicalPageChain.addPageSize(logicalPageChain.getPageSize() - oldCompactedPageSize + compactedPageSize);

		pageIndex.updateLogicPage(curPageIndex, compactedLogicalPageChain);

		int oldMemPageSize = syncGetMemPageSizeFromInvalidPageAddressList(invalidPageAddressList);

		//first find if there are reusing pageAddress.

		List findRealNeedDiscardPage = findNeededDiscardPage(invalidPageAddressList, compactedPageAddress);
		//first set page invalid.
		gContext.getSupervisor().discardPage(gRegionContext, findRealNeedDiscardPage);
		removeInvalidPage(gRegion, invalidPageAddressList);

		if (compactedPageAddress != null) {
			//write to dfs or local disk.
			gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, compactedPageAddress, compactedMemSize);
			compactedMemSize = compactedPageAddress.getMemorySize();
			if (compactedMemSize > 0) {
				this.cacheManager.getEvictPolicy().addPage(gRegion, compactedPageAddress);
			}
		}
		this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, compactedMemSize - oldMemPageSize);
		gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, compactedMemSize - oldMemPageSize);

		gRegionContext.getPageStoreStats().addLogicPageSize(compactedLogicalPageChain.getPageSize() - logicalPageChain.getPageSize());
		gRegionContext.getPageStoreStats().addLogicPageChainLen(compactedLogicalPageChain.getCurrentPageChainIndex() - logicalPageChain.getCurrentPageChainIndex());
		gRegionContext.getPageStoreStats().addLogicSubPageCount(compactedSubPageNum - oldCompactedSubPageNum);
		gRegionContext.getPageStoreStats().addLogicSubPageSize(compactedSubPageSize - oldCompactedSubPageSize);
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(compactedLogicalPageChain.getPageChainCapacity() - logicalPageChain.getPageChainCapacity());

		return compactedLogicalPageChain;
	}

	private void removeInvalidPage(
		GRegion gRegion, List invalidPageAddressList) {
		for (PageAddress pageAddress : invalidPageAddressList) {
			this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, pageAddress);
		}
	}

	/**
	 * fix #21430210.
	 * because compaction is async, with Evict evicting page, we need to get the memSize on the 'sync' RegionThread.
	 */
	private int syncGetMemPageSizeFromInvalidPageAddressList(List invalidPageAddressList) {
		return invalidPageAddressList.stream().map(PageAddress::getMemorySize).reduce(0, Integer::sum);
	}

	private void doMinorCompaction(
		PageIndexContext pageIndexContext,
		LogicalPageChain logicalPageChain,
		int curChainIndex,
		long version,
		boolean force) {
		final int curPageIndex = pageIndexContext.getPageIndexID();
		if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
			return;
		}
		// get dataPage list to do compaction.
		List canCompactPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();
		int oldPageSize = 0;
		int oldSubPageNum = 0;
		int oldSubPageSize = 0;
		long oldRequestCount = 0;
		int startCompactionIndex = curChainIndex;
		long lastSumCompactedThreshold = -1;
		while (startCompactionIndex >= 0) {
			PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
			DataPage dataPage = pageAddress.getDataPage();

			if (dataPage != null && dataPage.refCnt() > 1) {
				if (!force) {
					long compactedCount = dataPage.getCompactionCount();
					//improvement for minor compaction
					if (lastSumCompactedThreshold == -1) {
						lastSumCompactedThreshold = compactedCount;
					} else if (lastSumCompactedThreshold >= compactedCount) {
						lastSumCompactedThreshold += compactedCount;
					} else {
						dataPage.release();
						break;
					}
				}
				//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
				oldPageSize += pageAddress.getDataLen();
				oldSubPageNum += pageAddress.getSubPageNum();
				oldSubPageSize += pageAddress.getSubPageDataLen();
				canCompactPageListReversedOrder.add(dataPage);
				invalidPageAddressList.add(pageAddress);
				oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
				startCompactionIndex--;
			} else {
				if (dataPage != null) {
					dataPage.release();
				}
				break;
			}
		}

		if (!gContext.isDBNormal()) {
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
			throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
		}

		if (canCompactPageListReversedOrder.size() <= inMemoryCompactionThreshold) {
			logicalPageChain.compareAndSetStatus(PageStatus.Compacting, PageStatus.Normal);
			gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
			return;
		}

		final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;

		gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());

		final DataPage compactedDataPage = doCompactPage(
			pageIndexContext,
			inclusiveCompactionStartChainIndex == 0,
			canCompactPageListReversedOrder,
			gContext.getCurVersion(),
			pageIndexContext.getPageIndexID());

		canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());

		final long finalOldRequstCount = oldRequestCount;
		final int finalOldPageSize = oldPageSize;
		final int finalOldSubPageNum = oldSubPageNum;
		final int finalOldSubPageSize = oldSubPageSize;
		if (!gContext.isDBNormal() || this.getExecutor().isShuttingDown()) {
			compactedDataPage.release();
		} else {
			gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
				? 0
				: compactedDataPage.getSize());
			this.getExecutor().execute(new GeminiEventExecutorTask() {
				@Override
				public void cancel() {
					compactedDataPage.release();
				}

				@Override
				public void run() {
					try {
						pageCompactHandler.doSyncReplace(logicalPageChain,
							curPageIndex,
							finalOldPageSize,
							finalOldSubPageNum,
							finalOldSubPageSize,
							finalOldRequstCount,
							inclusiveCompactionStartChainIndex,
							curChainIndex,
							compactedDataPage,
							invalidPageAddressList,
							curPageIndex);
					} catch (GeminiShutDownException e) {
						if (compactedDataPage.refCnt() == 1) {
							compactedDataPage.release();
						}
						LOG.warn("GeminiDB has shutdown!");
					} catch (Exception e) {
						LOG.info("Internal Bug", e);
						if (compactedDataPage.refCnt() == 1) {
							compactedDataPage.release();
						}
						gContext.setDBInternalError(e);
					} finally {
						gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
						gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
							? 0
							: compactedDataPage.getSize()));
					}
				}
			});
		}
	}

	private void doMajorCompaction(
		PageIndexContext pageIndexContext,
		LogicalPageChain logicalPageChain,
		int curPageIndex,
		int curChainIndex,
		long version) {
		if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
			return;
		}
		// get dataPage list to do compaction.
		List dataPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();

		long oldRequestCount = 0;
		int oldCompactedPageSize = 0;
		int oldCompactedSubPageNum = 0;
		int oldCompactedSubPageSize = 0;
		int cix = curChainIndex;
		//major compaction would not include the latest page.
		while (cix >= 0 && gContext.isDBNormal()) {
			PageAddress pageAddress = logicalPageChain.getPageAddress(cix);
			DataPage dataPage = pageAddress.getDataPage();
			if (dataPage == null) {
				this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
				//FetchPolicy should be thread safe. because we want compaction also use LRU.
				//NOTICE: compositePageAddress's mainDataPage is not in memory, we can be sure that all of subDataPage are not in memory.
				// and we should prefetch them.
				GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
					logicalPageChain,
					pageIndexContext.getPageIndexID(),
					cix,
					this.gRegionContext,
					false,
					false);
				dataPage = boxDataPage(pageAddress, gByteBuffer, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
			}
			//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
			oldCompactedPageSize += pageAddress.getDataLen();
			oldCompactedSubPageNum += pageAddress.getSubPageNum();
			oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
			dataPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
			cix--;
		}
		if (!gContext.isDBNormal()) {
			dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
			throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
		}

		if (dataPageListReversedOrder.isEmpty()) {
			throw new GeminiRuntimeException("BUG");
		}

		gRegionContext.getPageStoreStats().addMajorCompactedPages(dataPageListReversedOrder.size());

		final DataPage compactedDataPage = doCompactPage(
			pageIndexContext,
			true,
			dataPageListReversedOrder,
			gContext.getCurVersion(),
			curPageIndex);

		dataPageListReversedOrder.forEach(dataPage -> dataPage.release());

		final long finalOldRequestCount = oldRequestCount;
		final int finalOldCompactedPageSize = oldCompactedPageSize;
		final int finalOldCompactedSubPageNum = oldCompactedSubPageNum;
		final int finalOldCompactedSubPageSize = oldCompactedSubPageSize;

		if (!gContext.isDBNormal() || this.getExecutor().isShuttingDown()) {
			compactedDataPage.release();
		} else {
			gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
				? 0
				: compactedDataPage.getSize());
			this.getExecutor().execute(new GeminiEventExecutorTask() {
				@Override
				public void cancel() {
					compactedDataPage.release();
				}

				@Override
				public void run() {
					try {
						pageCompactHandler.doSyncReplace(logicalPageChain,
							curPageIndex,
							finalOldCompactedPageSize,
							finalOldCompactedSubPageNum,
							finalOldCompactedSubPageSize,
							finalOldRequestCount,
							0,
							curChainIndex,
							compactedDataPage,
							invalidPageAddressList,
							curPageIndex);
					} catch (GeminiShutDownException e) {
						if (compactedDataPage.refCnt() == 1) {
							compactedDataPage.release();
						}
						LOG.warn("GeminiDB has shutdown!");
					} catch (Exception e) {
						LOG.error("Internal Bug.", e);
						if (compactedDataPage.refCnt() == 1) {
							compactedDataPage.release();
						}
						gContext.setDBInternalError(e);
					} finally {
						gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
						gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
							? 0
							: compactedDataPage.getSize()));
					}
				}
			});
		}
	}

	private void doMinorCompactionByRead(
		PageIndexContext pageIndexContext,
		LogicalPageChain logicalPageChain,
		int curPageIndex,
		int curChainIndex,
		Map fetchedDataPageMap) {
		if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
			return;
		}
		// get dataPage list to do compaction.
		List canCompactPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();
		int oldCompactedPageSize = 0;
		int oldCompactedSubPageNum = 0;
		int oldCompactedSubPageSize = 0;
		long oldRequestCount = 0;
		int startCompactionIndex = curChainIndex;
		while (startCompactionIndex >= 0) {
			PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
			DataPage dataPage = pageAddress.getDataPage();

			if (dataPage == null || dataPage.refCnt() <= 1) {
				dataPage = fetchedDataPageMap.get(startCompactionIndex);
				if (dataPage == null || dataPage.refCnt() <= 0) {
					break;
				}
				dataPage.retain();
			}

			//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
			oldCompactedPageSize += pageAddress.getDataLen();
			oldCompactedSubPageNum += pageAddress.getSubPageNum();
			oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
			canCompactPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
			startCompactionIndex--;
		}

		if (!gContext.isDBNormal()) {
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
			throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
		}

		if (canCompactPageListReversedOrder.size() < 2) {
			logicalPageChain.compareAndSetStatus(PageStatus.Compacting, PageStatus.Normal);
			gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
			return;
		}

		final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;

		gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());

		DataPage compactedDataPage = doCompactPage(
			pageIndexContext,
			inclusiveCompactionStartChainIndex == 0,
			canCompactPageListReversedOrder,
			gContext.getCurVersion(),
			curPageIndex);

		canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());

		final long finalOldRequstCount = oldRequestCount;
		final int finalOldCompactedPageSize = oldCompactedPageSize;
		final int finalOldCompactedSubPageNum = oldCompactedSubPageNum;
		final int finalOldCompactedSubPageSize = oldCompactedSubPageSize;
		Set dataPages = logicalPageChain.getAllDataPageReferenced();
		if (gContext.isDBNormal() && !this.getExecutor().isShuttingDown()) {
			gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
				? 0
				: compactedDataPage.getSize());
			this.getExecutor().execute(new GeminiEventExecutorTask() {
				@Override
				public void cancel() {
					dataPages.forEach(dataPage -> dataPage.release());
					compactedDataPage.release();
				}

				@Override
				public void run() {
					try {
						pageCompactHandler.doSyncReplace(logicalPageChain,
							curPageIndex,
							finalOldCompactedPageSize,
							finalOldCompactedSubPageNum,
							finalOldCompactedSubPageSize,
							finalOldRequstCount,
							inclusiveCompactionStartChainIndex,
							curChainIndex,
							compactedDataPage,
							invalidPageAddressList,
							curPageIndex);
					} catch (GeminiShutDownException e) {
						LOG.warn("GeminiDB has shutdown!");
					} catch (Exception e) {
						LOG.error("Internal Bug", e);
						gContext.setDBInternalError(e);
					} finally {
						gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
						gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
							? 0
							: compactedDataPage.getSize()));
						dataPages.forEach(dataPage -> dataPage.release());
					}
				}
			});
		} else {
			dataPages.forEach(dataPage -> dataPage.release());
		}
	}

	@Override
	public void splitPage(PageIndexContext pageIndexContext) {
		LogicalPageChain currentLogicPage = pageIndexContext.getLogicalPageChain();

		PageIndexContextHashImpl uPageIndexContext = (PageIndexContextHashImpl) pageIndexContext;

		int curBucketNum = uPageIndexContext.getCurBucketNum();
		int curIndex = uPageIndexContext.getCurIndex();

		//fix fast split bug: GRegionKMapTest#testSimplePutGetRemove
		//for example, when curBucket is 4, and page index is 1.
		//then buckets expand from 4 to 8. and page 1 have not been splited.
		//then buckets expand from 8 to 16.
		//page 1 should first split 1 to 1 and 5 with 8 buckets.
		//And then split 1 to 1 and 9 with 16, and split 5 to 5 and 13 with 16 buckets. it means 1 in 4 buckets,fianlly get 1/5/9/13 pages in 16 buckets
		//but if we get page 1 in the bucket 8 (some key just fall into the page 1), and will split 1 to 1 and 9 directly, so it's wrong.
		//in a short, split page only can be split step by step. we can't use uPageIndexContext to decide the split step.
		curBucketNum = pageIndex.getBucketNumASPageFinishSplit(curBucketNum, curIndex);

		int destIndex = curBucketNum + curIndex;

		if (pageIndex.getLogicPage(destIndex) != WAIT_SPLITTING_PAGE || pageIndex.getLogicPage(curIndex) != currentLogicPage) {
			//Splitting has done.
			return;
		}

		// get dataPage list to do compaction.
		List dataPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();

		long oldRequestNum = 0;
		int oldCompactedPageSize = 0;
		int oldCompactedSubPageNum = 0;
		int oldCompactedSubPageSize = 0;
		int oldMemPageSize = 0;
		int cix = currentLogicPage.getCurrentPageChainIndex();
		while (cix >= 0 && gContext.isDBNormal()) {
			PageAddress pageAddress = currentLogicPage.getPageAddress(cix);
			DataPage dataPage = pageAddress.getDataPage();
			if (dataPage == null) {
				this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
				GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
					currentLogicPage,
					pageIndexContext.getPageIndexID(),
					cix,
					this.gRegionContext,
					false,
					false);
				dataPage = boxDataPage(pageAddress, gByteBuffer, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
			}

			oldMemPageSize += pageAddress.getMemorySize();
			//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
			oldCompactedPageSize += pageAddress.getDataLen();
			oldCompactedSubPageNum += pageAddress.getSubPageNum();
			oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
			dataPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequestNum += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
			cix--;
		}
		if (!gContext.isDBNormal()) {
			dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
			throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
		}

		if (dataPageListReversedOrder.isEmpty()) {
			return;
		}
		DataPage mergeDataPage = doCompactPage(
			pageIndexContext,
			true,
			dataPageListReversedOrder,
			gContext.getCurVersion(),
			pageIndexContext.getPageIndexID());

		//dec reference, We can do it during the doCompactPage.
		dataPageListReversedOrder.forEach(dataPage -> dataPage.release());

		Tuple2 splitDataPages = mergeDataPage == null
			? new Tuple2<>(null, null)
			: mergeDataPage.split(
				pageIndexContext,
				curBucketNum,
				curIndex,
				gContext.getSupervisor().getAllocator(),
				gContext.getInPageGCompressAlgorithm(),
				this.gRegionContext);

		if (mergeDataPage != null) {
			//this will reclaim mergeDataPage's memory.
			mergeDataPage.release();
		}

		if (splitDataPages.f1 == null && splitDataPages.f0 != null) {
			//just as doing a compaction
			doSyncReplaceLogicalPage(currentLogicPage,
				curIndex,
				oldCompactedPageSize,
				oldCompactedSubPageNum,
				oldCompactedSubPageSize,
				oldRequestNum,
				0,
				currentLogicPage.getCurrentPageChainIndex(),
				splitDataPages.f0,
				invalidPageAddressList,
				false,
				destIndex);
			pageIndex.updateLogicPage(destIndex, NO_PAGE);
			return;
		} else if (splitDataPages.f0 == null && splitDataPages.f1 != null) {
			//just as doing a compaction and move page
			doSyncReplaceLogicalPage(currentLogicPage,
				destIndex,
				oldCompactedPageSize,
				oldCompactedSubPageNum,
				oldCompactedSubPageSize,
				oldRequestNum,
				0,
				currentLogicPage.getCurrentPageChainIndex(),
				splitDataPages.f1,
				invalidPageAddressList,
				true,
				curIndex);
			pageIndex.updateLogicPage(curIndex, NO_PAGE);
			return;
		} else if (splitDataPages.f0 == null && splitDataPages.f1 == null) {
			pageIndex.updateLogicPage(destIndex, NO_PAGE);
			pageIndex.updateLogicPage(curIndex, NO_PAGE);

			//first set page invalid.
			gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
			removeInvalidPage(gRegion, invalidPageAddressList);
			//even no evicting this time, still try prepare flush.
			this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, 0);

			//here it set NO_PAGE, so all of related statistics only need to directly reduce.
			gRegionContext.getPageStoreStats().addLogicPageCount(-1);
			gRegionContext.getPageStoreStats().addLogicPageChainLen(0 - currentLogicPage.getCurrentPageChainIndex() - 1);
			gRegionContext.getPageStoreStats().addLogicPageChainCapacity(0 - currentLogicPage.getPageChainCapacity());

			gRegionContext.getPageStoreStats().addLogicPageSize(0 - currentLogicPage.getPageSize());
			gRegionContext.getPageStoreStats().addLogicSubPageCount(0 - currentLogicPage.getSubPageNum());
			gRegionContext.getPageStoreStats().addLogicSubPageSize(0 - currentLogicPage.getSubPageSize());
			gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, 0 - oldMemPageSize);

			return;
		}

		LogicalPageChain pageSpit1 = pageIndex.createLogicalPageChain();
		LogicalPageChain pageSpit2 = pageIndex.createLogicalPageChain();

		PageAddress pageAddressSplit1 = pageSpit1.createPage(splitDataPages.f0);
		PageAddress pageAddressSplit2 = pageSpit2.createPage(splitDataPages.f1);

		pageAddressSplit1.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
			(int) ((oldRequestNum / 2) & 0X7FFFFFFF));
		pageAddressSplit2.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
			(int) ((oldRequestNum - oldRequestNum / 2) & 0X7FFFFFFF));

		pageSpit1.addPageSize(pageAddressSplit1.getDataLen());
		pageSpit2.addPageSize(pageAddressSplit2.getDataLen());

		//at first set expanded page.
		pageIndex.updateLogicPage(destIndex, pageSpit2);
		pageIndex.updateLogicPage(curIndex, pageSpit1);

		List findRealNeedDiscardPage = findNeededDiscardPage(invalidPageAddressList,
			pageAddressSplit1,
			pageAddressSplit2);
		//first set page invalid.
		gContext.getSupervisor().discardPage(gRegionContext, findRealNeedDiscardPage);

		//TODO there will waste some prepared flush page which is reused. but it's rare case.
		removeInvalidPage(gRegion, invalidPageAddressList);

		//write to dfs or local disk.
		gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddressSplit1,
			pageAddressSplit1.getMemorySize());
		//write to dfs or local disk.
		gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddressSplit2,
			pageAddressSplit2.getMemorySize());

		this.cacheManager.getEvictPolicy().addPage(gRegion, pageAddressSplit1);
		this.cacheManager.getEvictPolicy().addPage(gRegion, pageAddressSplit2);
		this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion,
			pageAddressSplit1.getMemorySize() + pageAddressSplit2.getMemorySize() - oldMemPageSize);

		gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion,
			pageAddressSplit1.getMemorySize() + pageAddressSplit2.getMemorySize() - oldMemPageSize);

		gRegionContext.getPageStoreStats().addLogicPageCount(1);
		gRegionContext.getPageStoreStats().addLogicPageChainLen(2 - currentLogicPage.getCurrentPageChainIndex() - 1);
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(pageSpit1.getPageChainCapacity() + pageSpit2.getPageChainCapacity() - currentLogicPage.getPageChainCapacity());

		gRegionContext.getPageStoreStats().addLogicPageSize(pageSpit2.getPageSize() + pageSpit1.getPageSize() - currentLogicPage.getPageSize());
		gRegionContext.getPageStoreStats().addLogicSubPageCount(pageSpit2.getSubPageNum() + pageSpit1.getSubPageNum() - currentLogicPage.getSubPageNum());
		gRegionContext.getPageStoreStats().addLogicSubPageSize(pageSpit2.getSubPageSize() + pageSpit1.getSubPageSize() - currentLogicPage.getSubPageSize());
	}

	private List findNeededDiscardPage(
		List invalidPageAddressList, PageAddress... newPageAddress) {
		if (newPageAddress == null || newPageAddress.length == 0) {
			return invalidPageAddressList;
		} else if (newPageAddress.length == 1 && newPageAddress[0] instanceof PageAddressSingleImpl) {
			return invalidPageAddressList;
		}

		Map newSubPageMap = new HashMap<>();
		for (PageAddress pageAddress : newPageAddress) {
			if (pageAddress instanceof PageAddressCompositeImpl) {
				//only include subPage, because only subPage can be reused.
				PageAddress[] subPages = ((PageAddressCompositeImpl) pageAddress).getSubPageAddress();
				for (PageAddress pageAddressSub : subPages) {
					newSubPageMap.put((PageAddressSingleImpl) pageAddressSub, pageAddress);
				}
			}
		}
		if (newSubPageMap.size() == 0) {
			return invalidPageAddressList;
		}
		List realNeedInvalidPageList = new ArrayList<>();
		for (PageAddress invalidPageAddress : invalidPageAddressList) {
			if (invalidPageAddress instanceof PageAddressCompositeImpl) {
				PageAddressCompositeImpl invalidPageComposite = (PageAddressCompositeImpl) invalidPageAddress;
				//always add main page.
				realNeedInvalidPageList.add(invalidPageComposite.getMainPageAddress());
				for (PageAddress singlePage : invalidPageComposite.getSubPageAddress()) {
					if (!newSubPageMap.containsKey(singlePage)) {
						realNeedInvalidPageList.add(singlePage);
					}
				}
			} else {
				realNeedInvalidPageList.add(invalidPageAddress);
			}
		}
		return realNeedInvalidPageList;
	}

	@Override
	public void mergePage(PageIndexContext pageIndexContextFirst, PageIndexContext pageIndexContextSecond) {
		PageIndexContextHashImpl uPageIndexContextFirst = (PageIndexContextHashImpl) pageIndexContextFirst;
		int curIndexFirst = uPageIndexContextFirst.getCurIndex();
		if (pageIndexContextFirst != pageIndex.getLogicPage(curIndexFirst)) {
			//merge has done.
			return;
		}
		//TODO IN THE FUTURE when shrink index
	}

	private void doWriteDataToPage(
		PageIndexContext pageIndexContext, List>> dataSet, long version) {
		//write data to delta page. mechanism can guarantee dataset have same version.
		LogicalPageChain currentLogicPageID = pageIndexContext.getLogicalPageChain();
		long newRequestCount = getRequestCount(dataSet);

		//add new delta page.
		DataPage newDataPage = createDataPage(version, dataSet, pageIndexContext.getPageIndexID());

		if (newDataPage == null) {
			LOG.warn("doWriteDataToPage write empty value");

		} else {
			PageAddress pageAddress = helpAddDataPage(currentLogicPageID, newRequestCount, newDataPage);

			//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
			int dataSize = pageAddress.getDataLen();
			int memSize = dataSize;
			//because new page is easy to be compacted, and then this page will be invalid. so we don't want to persist this page.
			if (dataSize > MIN_NEW_PAGE_SIZE_TO_PERSIST) {
				gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddress, dataSize);
				memSize = pageAddress.getMemorySize();
			}
			currentLogicPageID.addPageSize(dataSize);
			this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, memSize);
			gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, memSize);

			gRegionContext.getPageStoreStats().addLogicPageSize(dataSize);
			gRegionContext.getPageStoreStats().addLogicSubPageCount(pageAddress.getSubPageNum());
			gRegionContext.getPageStoreStats().addLogicSubPageSize(pageAddress.getSubPageDataLen());
			gRegionContext.getPageStoreStats().addPageRequestCount(newRequestCount);
			gRegionContext.getPageStoreStats().addPage();
		}

		//try to compact page
		if (!pageIndexContext.isNeedSplit()) {
			compactPage(pageIndexContext, version);
		}
	}

	private PageAddress helpAddDataPage(LogicalPageChain currentLogicPageID, long newRequestCount, DataPage dataPage) {
		int oldChainCapacity = currentLogicPageID.getPageChainCapacity();
		PageAddress result = currentLogicPageID.createPage(dataPage);
		int changeCapacity = currentLogicPageID.getPageChainCapacity() - oldChainCapacity;
		result.addRequestCountForNewPage(cacheManager.getCurrentTickTime(), (int) (newRequestCount & 0X7FFFFFFF));
		gRegionContext.getPageStoreStats().addLogicPageChainLen(1);
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(changeCapacity);
		return result;
	}

	@Override
	public void checkResource() {
		if (cacheManager.forbidIndexExpand()) {
			LOG.debug("cacheManager forbid index to expand.");
			return;
		}
		if (gRegionContext.getPageStoreStats().getLogicPageCount() == 0) {
			LOG.debug("no page here");
			return;
		}
		int indexCap = gRegionContext.getPageStoreStats().getIndexCapacity();

		if ((indexCap - gRegionContext.getPageStoreStats().getLogicPageCount()) * 4 > indexCap) {
			LOG.debug("page count {}, so at least 25% index capacity {} not to expand index",
				gRegionContext.getPageStoreStats().getLogicPageCount(),
				gRegionContext.getPageStoreStats().getIndexCapacity());
			return;
		}

		//for skew Page
		long validPageSize = gRegionContext.getPageStoreStats().getLogicPageSize() - gRegionContext.getHugePageTotalSize() - gRegionContext.getPageStoreStats().getLogicSubPageSize();
		int validPageNum = gRegionContext.getPageStoreStats().getLogicPageCount() - gRegionContext.getHugePageMapCount();
		int averagePageSize;
		if (validPageSize <= 0 || validPageNum <= 0 || gRegionContext.getHugePageMapCount() * 2 >= gRegionContext.getPageStoreStats().getLogicPageCount()) {
			//too much Huge Page
			averagePageSize = (int) ((gRegionContext.getPageStoreStats().getLogicPageSize() - gRegionContext.getPageStoreStats().getLogicSubPageSize()) / gRegionContext.getPageStoreStats().getLogicPageCount());
		} else {
			averagePageSize = (int) (validPageSize / validPageNum);
		}
		if (averagePageSize >= splitPageSizeThreshold) {
			pageIndex.expand();
			LOG.info("averagePageSize {}, splitPageSizeThreshold {}, logicPageSize {}, hugePageTotalSize {}, logicSubPageSize {}, logicPageCount {}, hugePageMapCount {}, to expand index up to {}",
				averagePageSize,
				splitPageSizeThreshold,
				gRegionContext.getPageStoreStats().getLogicPageSize(),
				gRegionContext.getHugePageTotalSize(),
				gRegionContext.getPageStoreStats().getLogicSubPageSize(),
				gRegionContext.getPageStoreStats().getLogicPageCount(),
				gRegionContext.getHugePageMapCount(),
				gRegionContext.getPageStoreStats().getIndexCapacity());
		}
	}

	protected void tryLaunchCompactionByRead(
		PageIndexContext pageIndexContext,
		LogicalPageChain logicalPageChain,
		Map fetchedDataPageMap) {
		boolean releaseFetchMap = true;
		try {
			if (logicalPageChain.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
				if (logicalPageChain.getPageStatus().canCompaction()) {
					gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(1);
					if (cacheManager.getCacheStats().getRunningMinorCompactionByRead() > maxRunningMinorCompaction) {
						gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
					} else {
						if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
							gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
							return;
						}
						final int curChainIndex = logicalPageChain.getCurrentPageChainIndex();
						final int curPageIndex = pageIndexContext.getPageIndexID();
						EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
						releaseFetchMap = false;
						eventExecutor.execute(new GeminiEventExecutorTask() {
							@Override
							public void cancel() {
								fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
							}

							@Override
							public void run() {
								try {
									pageCompactHandler.doAsyncMinorCompactionByRead(pageIndexContext,
										logicalPageChain,
										curPageIndex,
										curChainIndex,
										fetchedDataPageMap);
								} catch (GeminiShutDownException e) {
									LOG.debug("GeminiDB has shutdown!", e);
								} catch (Exception e) {
									LOG.error("async minor compaction by read failed", e);
								} finally {
									fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
								}
							}
						});
					}
				}
			} else if (fetchedDataPageMap.size() > 0) {
				if (!enableLoadPageFromLRUIntoMainCache) {
					return;
				}

				if (lruIntoMainCacheSleepMs != -1 && System.currentTimeMillis() - lastLruIntoMainCacheTimeMs < lruIntoMainCacheSleepMs) {
					return;
				}

				lastLruIntoMainCacheTimeMs = System.currentTimeMillis();
				if (cacheTooFull(0)) {
					LOG.warn("Can not add page into main cache because of cache is full.");
					return;
				}
				lruIntoMainEventExecutor.execute(() -> fetchPageFromLRUCacheToPageStore());
			}
		} finally {
			if (releaseFetchMap) {
				fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
			}
		}

	}

	protected DataPage doCompactPageForStructureValue(
		PageIndexContext pageIndexContext,
		boolean isMajor,
		List canCompactPageListReversedOrder,
		long version,
		int logicPageId) {
		List> compactionListReversedOrder = new ArrayList<>();
		for (DataPage dataPage : canCompactPageListReversedOrder) {
			compactionListReversedOrder.add(dataPage.getGBinaryHashMap());
		}

		int index = compactionListReversedOrder.size() - 1;
		//Value list is right order.
		Map> newMap = new HashMap<>(compactionListReversedOrder.get(index).keyCount());
		long compactionCount = 0;
		StateFilter stateFilter = gRegionContext.getGContext().getStateFilter();
		while (index >= 0) {
			GBinaryHashMap gBinaryHashMap = compactionListReversedOrder.get(index);
			for (Map.Entry entry : gBinaryHashMap.getBinaryMap().entrySet()) {
				// NOTE: we must filter here for list page store because list will not be filtered in doCompactValue
				if (isMajor && stateFilter != null && stateFilter.filter(gRegionContext, entry.getValue().getSeqID())) {
					continue;
				}
				if (entry.getValue().getGValueType() == GValueType.Delete) {
					if (isMajor) {
						newMap.remove(entry.getKey());
					} else {
						newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
					}
				} else if (entry.getValue().getGValueType() == GValueType.PutMap || entry.getValue().getGValueType() == GValueType.PutList) {
					newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
				} else {
					if (newMap.containsKey(entry.getKey())) {
						newMap.get(entry.getKey()).add(entry.getValue());
					} else {
						newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
					}
				}
			}

			compactionCount += compactionListReversedOrder.get(index).getCompactionCount();
			index--;
		}

		Map finalCompactedMap = new HashMap<>(newMap.size());

		//TODO the pageStore that is not KMap needn't construct the Object "GBufferAddressMapping"
		GBufferAddressMapping pageMapping = new GBufferAddressMapping(this.gRegionContext, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
		//compaction value
		for (Map.Entry> entry : newMap.entrySet()) {
			if (entry.getValue().size() == 0) {
				GeminiRuntimeException e = new GeminiRuntimeException("Internal Bug!");
				//Internal Bug should stop job.
				gContext.setDBInternalError(e);
				throw e;
			}
			BinaryValue compactedBinaryValue;
			if (entry.getValue().size() == 1 && !isMajor && !isAllowSubPage()) {
				//if it's major compaction, even only one binary value, we need to do compact to remove deleted record.
				compactedBinaryValue = entry.getValue().get(0);
			} else {
				compactedBinaryValue = doCompactValue(entry.getValue(), isMajor, version, logicPageId, pageMapping);
			}
			finalCompactedMap.put(entry.getKey(), compactedBinaryValue);
		}

		//TODO null should be handled by PageStore
		return doBuildDataPageFromGBinaryMap(isMajor,
			version,
			logicPageId,
			this.pageSerdeFlink.getKeySerde(),
			finalCompactedMap,
			compactionCount,
			pageMapping);
	}

	protected boolean isAllowSubPage() {
		return false;
	}

	protected DataPage getDataPageAutoLoadIfNeed(
		K key,
		LogicalPageChain logicalPageChain,
		int logicalPageChainIndex,
		int curIndex,
		Map fetchedDataPageMap) {
		PageAddress pageAddress = logicalPageChain.getPageAddress(curIndex);
		DataPage dataPage = pageAddress.getDataPage();
		if (dataPage == null) {
			cacheManager.getCacheStats().addPageCacheMissCount();
			if (!gContext.getSupervisor().getBloomFilterManager().mightContain(pageAddress, key.hashCode())) {
				cacheManager.getCacheStats().addBloomFilterHitCount();
				return null;
			}
			GByteBuffer gByteBuffer = gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
				logicalPageChain,
				logicalPageChainIndex,
				curIndex,
				gRegionContext,
				gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
				true);

			dataPage = boxDataPage(pageAddress, gByteBuffer, logicalPageChainIndex, logicalPageChain.hashCode());

			//todo to reuse dataPage need send to handler.
			fetchedDataPageMap.put(curIndex, dataPage);
			dataPage.retain();
		} else {
			cacheManager.getCacheStats().addPageCacheHitCount();
		}
		return dataPage;
	}

	@Override
	public void allKeysIncludeDeleted(Set allKeysIncludeDelete) {
		// as we know, removeAll will happen after getAll in mini batch(KeyedBundleOperator), so
		// there is no need to update read cache and trigger compaction
		LogicalPageChain[] chains = pageIndex.getPageIndex();
		for (int logicalPageChainIndex = 0; logicalPageChainIndex < chains.length; ++logicalPageChainIndex) {
			LogicalPageChain logicalPageChain = chains[logicalPageChainIndex];
			if (isNullPage(logicalPageChain)) {
				continue;
			}
			int numPages = logicalPageChain.getCurrentPageChainIndex();
			for (int i = numPages; i >= 0; i--) {
				PageAddress pageAddress = logicalPageChain.getPageAddress(i);
				DataPage dataPage = pageAddress.getDataPage();
				try {
					if (dataPage == null) {
						this.cacheManager.getCacheStats().addPageCacheMissCount();
						GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
							logicalPageChain,
							logicalPageChainIndex,
							i,
							this.gRegionContext,
							this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
							false);
						dataPage = boxDataPage(pageAddress, gByteBuffer, logicalPageChainIndex, logicalPageChain.hashCode());
					} else {
						this.cacheManager.getCacheStats().addPageCacheHitCount();
					}
					allKeysIncludeDelete.addAll(dataPage.getPOJOSet());
				} finally {
					if (dataPage != null) {
						dataPage.release();
					}
				}
			}
		}
	}

	protected void checkDataPageTypeToBox(GByteBuffer byteBuffer) {
		Preconditions.checkNotNull(byteBuffer.getByteBuffer(), "Not supported to box null byte buffer.");
		DataPage.DataPageType toBoxDataPageType = DataPage.DataPageType.valueOf(byteBuffer.getByteBuffer().get(0));
		Preconditions.checkArgument(toBoxDataPageType == dataPageType, "Internal Bug!");
	}

	/**
	 * a different value such as map/list/value will calc differently.
	 *
	 * @param dataSet Objects organized to List.
	 * @return total request count for this list.
	 */
	abstract long getRequestCount(List>> dataSet);

	/**
	 * Create data page with given version, dataset, logic page id.
	 *
	 * @param version     DataPage's version.
	 * @param dataSet     Objects organized to List will be written to this DataPage.
	 * @param logicPageId DataPage's index id.
	 * @return certain DataPage, such as DataPageKVImpl, DataPageKMapImpl or DataPageKListImpl
	 */
	abstract DataPage createDataPage(long version, List>> dataSet, int logicPageId);

	/**
	 * Box the fetched byte buffer into a data page with information provided by page address.
	 *
	 * @param pageAddress The page address to box.
	 * @param byteBuffer  The fetched byte buffer.
	 * @param logicPageChainIndex The index of the {@link LogicalPageChain} who contains the given {@link PageAddress}.
	 * @param logicPageChainHashCode The hashcode of the {@link sun.rmi.runtime.Log} who contains the given {@link PageAddress}.
	 * @return The encapsulated data page with given byte buffer.
	 */
	abstract DataPage boxDataPage(PageAddress pageAddress, GByteBuffer byteBuffer, int logicPageChainIndex, int logicPageChainHashCode);

	/**
	 * @param isMajor                         compaction is major or minor.
	 * @param canCompactPageListReversedOrder DataPages will be compacted, and this list is reversed order.
	 * @param version                         current version for new page.
	 * @param logicPageId                     DataPage's index id.
	 * @return a new comacted DataPage.
	 */
	@VisibleForTesting
	public abstract DataPage doCompactPage(
		PageIndexContext pageIndexContext, boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId);

	/**
	 * invoked by doCompactPageForStructureValue, when value is structural such as Map/List/Set.
	 *
	 * @param binaryValueList value list to be compacted.
	 * @param isMajor         compaction is major or minor.
	 * @param version         current version.
	 * @param logicPageId     DataPage's index id.
	 * @return a compacted version.
	 */
	abstract BinaryValue doCompactValue(
		List binaryValueList,
		boolean isMajor,
		long version,
		int logicPageId,
		GBufferAddressMapping pageMapping);

	protected abstract DataPage doBuildDataPageFromGBinaryMap(
		boolean isMajor,
		long version,
		int logicPageId,
		TypeSerializer keySerde,
		Map finalCompactedMap,
		long compactionCount,
		GBufferAddressMapping pageMapping);

	protected boolean isNullPage(LogicalPageChain logicPageID) {
		return logicPageID == null || logicPageID.getCurrentPageChainIndex() == -1;
	}

	public void fetchPageFromLRUCacheToPageStore() {
		try {
			// the returned data page will always be fetched
			Tuple2 hottestPage = getHottestDataPageFromLRU();
			if (hottestPage != null) {
				cacheManager.getCacheStats().addLRUPagePreIntoMainCache();
				final PageContext hottestPageContext = hottestPage.f1.getPageContext();
				if (hottestPageContext == null) {
					hottestPage = null;
					return;
				}

				int chainIndex = hottestPageContext.getLogicPageIndex();
				final LogicalPageChain pageChain = pageIndex.getLogicPage(chainIndex);

				if (!canSubmitHottestPageToRegionExecutor(chainIndex, hottestPage, pageChain)) {
					hottestPage = null;
					return;
				}

				final PageAddress hottestPageAddress = hottestPage.f0;

				GByteBuffer buffer = hottestPage.f1.getFutureDataPage().get();
				buffer.retain();
				hottestPage = null;

				hottestPageContext.setCacheStatus(PageContext.CacheStatus.CACHING_TO_MAIN);
				// try to fill data from lru cache to page store
				eventExecutor.submit(() -> {
					try {
						// we need to get the new pageChain again, because there may have some split/compaction
						// between submit to the executor and execute the current task.
						LogicalPageChain currentPageChain = pageIndex.getLogicPage(chainIndex);
						if (currentPageChain == null) {
							hottestPageContext.setCacheStatus(PageContext.CacheStatus.IN_LRU);
							return;
						}

						if (!canAddHottestPageToPageStore(chainIndex, hottestPageContext, currentPageChain)) {
							hottestPageContext.setCacheStatus(PageContext.CacheStatus.IN_LRU);
							return;
						}

						tryLoadPageIntoPageAddress(hottestPageAddress, buffer, chainIndex, currentPageChain);
						// remove the hottest page from LRU, whether it is invalid or added into main cache.
						gRegionContext.getGContext().getSupervisor().getFetchPolicy().getDataPageLRU().remove(
							hottestPageAddress);
					} catch (Exception e) {
						gContext.getNoCriticalEvent().pushEvent(e, System.currentTimeMillis());
					} finally {
						buffer.release();
					}
				});
			}
		} catch (Exception e) {
			gContext.getNoCriticalEvent().pushEvent(e, System.currentTimeMillis());
		}
	}

	protected boolean cacheTooFull(int tryAddNewPageSize) {
		// cache memory exceeds high watermark and no ready pages to evict.
		return gRegionContext.getPageStoreStats().getPageUsedMemory() + tryAddNewPageSize > curRegionMemHighMark &&
			gRegionContext.getGContext().getSupervisor().getCacheManager().getEvictPolicy().getEvictHandlerSepImpl(
			gRegion).getReadyToEvictDataPageMap().isEmpty();
	}

	public boolean tryLoadPageIntoPageAddress(
		PageAddress hottestPageAddress,
		GByteBuffer buffer,
		int pageChainIndex,
		LogicalPageChain logicalPageChain) {
		for (int i = 0; i <= logicalPageChain.getCurrentPageChainIndex(); ++i) {
			Iterator pageIter = logicalPageChain.getPageAddress(i).pageIteratorOrdered();
			int idx = -1;
			while (pageIter.hasNext()) {
				PageAddress pageAddress = pageIter.next();
				checkState(pageAddress instanceof PageAddressSingleImpl);
				if (hottestPageAddress.equals(pageAddress)) {
					if (!pageAddress.hasDataPage()) {
						// here we use out page address for Composite page address, because we need to construct the mapping.
						DataPage newDataPage = createDataPageFromGByteBuffer(idx, idx == -1 ? logicalPageChain.getPageAddress(i) : pageAddress, buffer, pageChainIndex, logicalPageChain.hashCode());
						// here we add reference for the underlying GByteBuffer to align the behavior of GByteBuffer,
						// in the constructor of all GByteBuffer subclasses, we'll retain the it self.
						newDataPage.retain();
						pageAddress.setDataPage(newDataPage);

						cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, newDataPage.getSize());
						gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, newDataPage.getSize());

						cacheManager.getCacheStats().addLRUPageIntoMainCache();
					}
					return true;
				}
				idx++;
			}
		}
		return false;
	}

	@SuppressWarnings("unchecked")
	private DataPage createDataPageFromGByteBuffer(int subPageIndex, PageAddress pageAddress, GByteBuffer byteBuffer, int logicPageChainIndex, int logicPageChainHashCode) {
		if (subPageIndex < 0) {
			return boxDataPage(pageAddress, byteBuffer, logicPageChainIndex, logicPageChainHashCode);
		} else {
			//TODO: Currently, we create DataPage here directly for all subpages,
			// because sub pages(is DataPageType.KV) and composite page(DataPageType.KHashMap or DataPageType.KSortedMap) will have different PageType
			checkState(getDataPageType().isKMapType(), "currently split type only support map type.");
			PageSerdeFlink2Key pageSerdeFlink2Key = (PageSerdeFlink2Key) pageSerdeFlink;
			return new DataPageHashSubPageImpl(new GBinaryHashMap(byteBuffer, pageSerdeFlink2Key.getKey2Serde()));
		}
	}

	protected boolean canSubmitHottestPageToRegionExecutor(
		int chainIndex,
		Tuple2 hottestPage,
		LogicalPageChain pageChain) {

		// invalid page context
		if (hottestPage.f1.getPageContext() == null) {
			return false;
		}

		if (pageChain == null) {
			return false;
		}

		if (invalidRegion(gRegionContext.getRegionId(), hottestPage.f1.getPageContext().getGRegionID())) {
			return false;
		}

		if (isPageChainChanged(chainIndex, hottestPage.f1.getPageContext(), pageChain)) {
			return false;
		}

		if (!enableAddIntoMainWhenSplitting && pageChainInSplitting(chainIndex)) {
			return false;
		}

		if (pageChainInCompacting(pageChain)) {
			return false;
		}

		if (cacheTooFull(hottestPage.f1.getFutureDataPage().getSize())) {
			LOG.warn("Can not add page into main cache because of cache is full.");
			return false;
		}

		if (hottestPage.f0.hasDataPage()) {
			return false;
		}

		if (!pageInTheChain(hottestPage.f0, pageChain)) {
			// delete the hottest page from LRU
			gRegionContext.getGContext().getSupervisor().getFetchPolicy().getDataPageLRU().remove(hottestPage.f0);
			return false;
		}

		return true;
	}

	protected boolean pageChainInCompacting(LogicalPageChain pageChain) {
		// TODO: #Cache How to tell whether a LogicPageChain is in Mergeing...
		return pageChain.getPageStatus().equals(PageStatus.Compacting);
	}

	protected boolean pageChainInSplitting(int chainIndex) {
		int halfCapacity = pageIndex.getIndexCapacity() >> 1;
		if (chainIndex >= halfCapacity) {
			return false;
		}
		LogicalPageChain buddyPageChain = pageIndex.getLogicPage(chainIndex + halfCapacity);
		if (buddyPageChain == null) {
			return false;
		}

		return buddyPageChain.getPageStatus().equals(PageStatus.Init);
	}

	protected boolean invalidRegion(GRegionID expectedRegionID, GRegionID actualRegionID) {
		// we just load the data in our region.
		return !expectedRegionID.equals(actualRegionID);
	}

	private boolean canAddHottestPageToPageStore(
		int chainIndex,
		PageContext hottestPageContext,
		LogicalPageChain logicalPageChain) {

		return !isPageChainChanged(chainIndex, hottestPageContext, logicalPageChain);
	}

	protected boolean isPageChainChanged(
		int chainIndex,
		PageContext hottestPageContext,
		LogicalPageChain logicalPageChain) {
		if (chainIndex >= pageIndex.getIndexCapacity()) {
			LOG.error("Received wrong chainIndex {}, current pageIndex capacity {}, hottest page region {}, current region {}.",
				chainIndex,
				pageIndex.getIndexCapacity(),
				hottestPageContext.getGRegionID(),
				gRegionContext.getRegionId());
			return true;
		}

		// hashcode did not equals
		return logicalPageChain.hashCode() != hottestPageContext.getLogicPageChainHashCode();
	}

	private boolean pageInTheChain(PageAddress expectedPageAddress, LogicalPageChain pageChain) {
		boolean founded = false;
		for (int i = 0; !founded && i < pageChain.getCurrentPageChainIndex(); ++i) {
			Iterator iter = pageChain.getPageAddress(i).pageIterator();
			while (iter.hasNext()) {
				PageAddress pageAddress = iter.next();
				checkState(pageAddress instanceof PageAddressSingleImpl);
				if (expectedPageAddress.equals(pageAddress)) {
					founded = true;
					break;
				}
			}
		}
		return founded;
	}

	protected Tuple2 getHottestDataPageFromLRU() {
		return gContext.getSupervisor().getFetchPolicy().getDataPageLRU().getHottestPage(
			gRegionContext.getRegionId(),
			pageIndex);
	}

	@VisibleForTesting
	public CacheManager getCacheManager() {
		return cacheManager;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy