All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.page.AbstractHashPageStore Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.page;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiShutDownException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.handler.PageCompactHandler;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryKey;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.runtime.state.gemini.engine.vm.WaterMark;

import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.NO_PAGE;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.WAIT_SPLITTING;

/**
 * AbstractHashPageStore.
 */
public abstract class AbstractHashPageStore implements PageStore {
	private static final Logger LOG = LoggerFactory.getLogger(AbstractHashPageStore.class);
	//TODO provide HashCodePageIndex interface.
	protected final PageIndexHashImpl pageIndex;
	protected final GRegionContext gRegionContext;
	protected final EventExecutor eventExecutor;
	protected final CacheManager cacheManager;
	protected final GContext gContext;
	protected final SnapshotManager geminiSnapshotManager;
	private final int spilledPageSizeThresholdLow;
	private final int spilledPageSizeThresholdMiddle;
	private final int spilledPageSizeThresholdHigh;
	private final int maxCompactionChainThreshold;
	protected final GRegion gRegion;
	private final PageCompactHandler pageCompactHandler;
	private final int inMemoryCompactionThreshold;
	private final int maxRunningMajorCompaction;
	private final int maxRunningMinorCompaction;
	protected final PageSerdeFlink pageSerdeFlink;

	public AbstractHashPageStore(
		GRegion gRegion, EventExecutor eventExecutor) {
		this(gRegion, null, eventExecutor);
	}

	public AbstractHashPageStore(
		GRegion gRegion, @Nullable PageIndex pageIndex, EventExecutor eventExecutor) {

		this.gRegion = gRegion;
		this.gRegionContext = gRegion.getGRegionContext();
		this.eventExecutor = eventExecutor;
		GConfiguration configuration = gRegionContext.getGContext().getGConfiguration();
		if (pageIndex != null) {
			//TODO #SR rewrite this to use interface.
			this.pageIndex = (PageIndexHashImpl) pageIndex;
		} else {
			this.pageIndex = new PageIndexHashImpl<>(configuration, this, gRegionContext.getPageStoreStats());
		}
		this.gContext = gRegionContext.getGContext();
		this.cacheManager = this.gContext.getSupervisor().getCacheManager();
		gRegionContext.getPageStoreStats().setPageSizeRate(configuration.getPageSizeRateBetweenPOJOAndHeap());
		this.spilledPageSizeThresholdLow = configuration.getSpilledPageSizeThresholdUnderLowMark();
		this.spilledPageSizeThresholdMiddle = configuration.getSpilledPageSizeThresholdUnderMiddleMark();
		this.spilledPageSizeThresholdHigh = configuration.getSpilledPageSizeThresholdUnderHighMark();
		this.geminiSnapshotManager = this.gContext.getSupervisor().getSnapshotManager();
		this.maxCompactionChainThreshold = configuration.getMaxCompactionChainThreshold();
		this.inMemoryCompactionThreshold = configuration.getInMemoryCompactionThreshold();
		this.maxRunningMajorCompaction = configuration.getMaxRunningMajorCompaction();
		this.maxRunningMinorCompaction = configuration.getMaxRunningMinorCompaction();
		this.pageSerdeFlink = (PageSerdeFlink) gRegionContext.getPageSerdeFlink();
		this.pageCompactHandler = new PageCompactHandler() {
			@Override
			public void doAsyncMajorCompaction(
				LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version) {
				doMajorCompaction(logicChainedPage, curPageIndex, curChainIndex, version);
			}

			@Override
			public void doAsyncMinorCompaction(
				LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version, boolean force) {
				doMinorCompaction(logicChainedPage, curPageIndex, curChainIndex, version, force);
			}

			@Override
			public void doSyncReplace(
				LogicChainedPage logicChainedPage,
				int curPageIndex,
				int oldCompatedPageSize,
				int oldMemPageSize,
				long oldRequstCount,
				int inclusiveCompactionStartChainIndex,
				int inclusiveCompactionEndChainIndex,
				DataPage compactedDataPage,
				List invalidPageAddressList,
				int relatedIndex) {
				doSyncReplaceLogicPage(logicChainedPage,
					curPageIndex,
					oldCompatedPageSize,
					oldMemPageSize,
					oldRequstCount,
					inclusiveCompactionStartChainIndex,
					inclusiveCompactionEndChainIndex,
					compactedDataPage,
					invalidPageAddressList,
					false,
					relatedIndex);
			}

			@Override
			public void doAsyncMinorCompactionByRead(
				LogicChainedPage logicPageID,
				int curPageIndex,
				int curChainIndex,
				Map fetchedDataPageMap) {
				doMinorCompactionByRead(logicPageID, curPageIndex, curChainIndex, fetchedDataPageMap);
			}
		};
	}

	@Override
	public EventExecutor getExecutor() {
		return this.eventExecutor;
	}

	@Override
	public boolean contains(K key) {
		//for common kv, null means not contained.
		return get(key) != null;
	}

	@Override
	public PageIndex getPageIndex() {
		return pageIndex;
	}

	@Override
	public void addPage(PageIndexContext pageIndexContext, List>> dataSet, long version) {
		LogicChainedPage currentLogicPageID = pageIndexContext.getPageID();
		if (currentLogicPageID == NO_PAGE) {
			String msg = "BUG! addOrMergePage receive NO_PAGE request.";
			LOG.error(msg);
			throw new GeminiRuntimeException(msg);
		}
		if (dataSet == null || dataSet.isEmpty()) {
			compactPage(pageIndexContext, version);
		} else {
			doWriteDataToPage(pageIndexContext, dataSet, version);
		}
	}

	@Override
	public void compactPage(PageIndexContext pageIndexContext, final long version) {
		try {
			LogicChainedPage logicChainedPage = pageIndexContext.getPageID();
			final int curPageIndex = pageIndexContext.getPageIndexID();
			if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
				//compacting has done.
				return;
			}
			//no more page to do compaction.
			if (logicChainedPage.getCurrentPageChainIndex() <= 0) {
				return;
			}

			if (!logicChainedPage.getPageStatus().canCompaction()) {
				return;
			}

			final int curChainIndex = logicChainedPage.getCurrentPageChainIndex();
			final LogicChainedPage compactionLogicChainedPage = logicChainedPage;
			//2. if need, launch an asynchronized major compaction handler
			if (logicChainedPage.getCurrentPageChainIndex() >= maxCompactionChainThreshold) {
				if (logicChainedPage.getPageStatus().canCompaction()) {

					gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(1);

					if (cacheManager.getCacheStats().getRuningMajorCompactedPages() > maxRunningMajorCompaction) {
						gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);

						// try to minor compaction in force mode
						tryLaunchMinorCompaction(version,
							logicChainedPage,
							curPageIndex,
							curChainIndex,
							compactionLogicChainedPage,
							true);
						return;
					}

					if (!logicChainedPage.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
						gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
						return;
					}

					EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
					eventExecutor.submit(() -> {
						try {
							pageCompactHandler.doAsyncMajorCompaction(compactionLogicChainedPage,
								curPageIndex,
								curChainIndex,
								version);
						} catch (GeminiShutDownException e) {
							LOG.warn("GeminiDB has shutdown!");
						}
					});
				}
			} else if (logicChainedPage.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
				// try to do minor comaction, only involve page which is resident in memory.
				tryLaunchMinorCompaction(version,
					logicChainedPage,
					curPageIndex,
					curChainIndex,
					compactionLogicChainedPage,
					false);
			}

		} catch (Exception e) {
			LOG.error("Bug " + e.getMessage(), e);
			throw new GeminiRuntimeException(e);
		}
	}

	private void tryLaunchMinorCompaction(
		long version,
		LogicChainedPage logicChainedPage,
		int curPageIndex,
		int curChainIndex,
		LogicChainedPage compactionLogicChainedPage,
		boolean force) {
		if (logicChainedPage.getPageStatus().canCompaction()) {

			gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(1);

			if (!force) {
				if (cacheManager.getCacheStats().getRuningMinorCompactedPages() > maxRunningMinorCompaction) {
					gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
					return;
				}
			}

			int startCompactionIndex = curChainIndex;
			int memCandidatePage = 0;
			long lastSumCompactedThreshold = -1;

			while (startCompactionIndex >= 0) {
				PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
				//only stat, no need reference.
				DataPage dataPage = pageAddress.getDataPageNoReference();
				if (dataPage != null) {
					if (!force) {
						long compactedCount = dataPage.getCompactionCount();
						//improvement for minor compaction
						if (lastSumCompactedThreshold == -1) {
							lastSumCompactedThreshold = compactedCount;
						} else if (lastSumCompactedThreshold >= compactedCount) {
							lastSumCompactedThreshold += compactedCount;
						} else {
							break;
						}
					}
					memCandidatePage++;
					startCompactionIndex--;
				} else {
					break;
				}
			}

			if (memCandidatePage <= inMemoryCompactionThreshold) {
				gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
				return;
			}

			if (!logicChainedPage.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
				gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
				return;
			}
			EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
			eventExecutor.submit(() -> {
				try {
					pageCompactHandler.doAsyncMinorCompaction(compactionLogicChainedPage,
						curPageIndex,
						curChainIndex,
						version,
						force);
				} catch (GeminiShutDownException e) {
					LOG.warn("GeminiDB has shutdown!");
				}
			});
		}
	}

	private LogicChainedPage doSyncReplaceLogicPage(
		LogicChainedPage logicChainedPage,
		int curPageIndex,
		int oldCompatedPageSize,
		int oldMemPageSize,
		long oldRequstCount,
		int inclusiveCompactionStartChainIndex,
		int inclusiveCompactionEndChainIndex,
		DataPage compactedDataPage,
		List invalidPageAddressList,
		boolean isSplit,
		int relatedIndex) {

		if (isSplit) {
			if (pageIndex.getLogicPage(curPageIndex) != WAIT_SPLITTING) {
				if (compactedDataPage != null) {
					//compactedDataPage is not used, delReference to reclaim mem.
					compactedDataPage.delReferenceCount(ReleaseType.Normal);
				}
				return null;
			}
		} else {
			if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
				//compacting has done.
				if (compactedDataPage != null) {
					//compactedDataPage is not used, delReference to reclaim mem.
					compactedDataPage.delReferenceCount(ReleaseType.Normal);
				}
				return null;
			}
		}

		int compactedPageSize = 0;
		PageAddress compatedPageAddress = null;
		LogicChainedPage compactedLogicChainedPage = pageIndex.newLogicChainedPage();

		for (int i = 0; i < inclusiveCompactionStartChainIndex; i++) {
			compactedLogicChainedPage.insertPage(logicChainedPage.getPageAddress(i));
		}
		if (compactedDataPage != null) {
			//Major compaction maybe get a null page. and it never be null during Minor compaction, because even by TTL
			// or removeAll, minor compaction will keep these data.
			compatedPageAddress = compactedLogicChainedPage.createPage(oldRequstCount, compactedDataPage);
			compactedPageSize = compactedDataPage.getSize();
		}
		for (int i = inclusiveCompactionEndChainIndex + 1; i <= logicChainedPage.getCurrentPageChainIndex(); i++) {
			compactedLogicChainedPage.insertPage(logicChainedPage.getPageAddress(i));
		}

		compactedLogicChainedPage.addPageSize(logicChainedPage.getPageSize() - oldCompatedPageSize + compactedPageSize);

		pageIndex.updateLogicPage(curPageIndex, compactedLogicChainedPage);

		this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curPageIndex, relatedIndex, invalidPageAddressList);

		if (compactedDataPage != null) {
			this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, compatedPageAddress);
		}

		gRegionContext.getPageStoreStats().addLogicPageSize(compactedLogicChainedPage.getPageSize() - logicChainedPage.getPageSize());
		gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, compactedPageSize - oldMemPageSize);
		gRegionContext.getPageStoreStats().addLogicPageChainLen(compactedLogicChainedPage.getCurrentPageChainIndex() - logicChainedPage.getCurrentPageChainIndex());
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(compactedLogicChainedPage.getPageChainCapacity() - logicChainedPage.getPageChainCapacity());

		gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);

		return compactedLogicChainedPage;
	}

	public void doMinorCompaction(
		LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version, boolean force) {
		if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
			return;
		}
		// get dataPage list to do compaction.
		List canCompactPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();
		int oldPageSize = 0;
		long oldRequstCount = 0;
		int startCompactionIndex = curChainIndex;
		long lastSumCompactedThreshold = -1;
		while (startCompactionIndex >= 0) {
			PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
			DataPage dataPage = pageAddress.getDataPage();

			if (dataPage != null) {
				if (!force) {
					long compactedCount = dataPage.getCompactionCount();
					//improvement for minor compaction
					if (lastSumCompactedThreshold == -1) {
						lastSumCompactedThreshold = compactedCount;
					} else if (lastSumCompactedThreshold >= compactedCount) {
						lastSumCompactedThreshold += compactedCount;
					} else {
						dataPage.delReferenceCount(ReleaseType.Normal);
						break;
					}
				}
				oldPageSize += dataPage.getSize();
				canCompactPageListReversedOrder.add(dataPage);
				invalidPageAddressList.add(pageAddress);
				oldRequstCount += pageAddress.getRequestCount();
				startCompactionIndex--;
			} else {
				break;
			}
		}

		if (!gContext.isDBNormal()) {
			throw new GeminiShutDownException("DB is in abnormal status.");
		}

		if (canCompactPageListReversedOrder.size() <= inMemoryCompactionThreshold) {
			logicChainedPage.setPageStatus(PageStatus.Compacting, PageStatus.Normal);
			gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
			return;
		}

		final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;

		gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());

		DataPage compactedDataPage = doCompactPage(inclusiveCompactionStartChainIndex == 0,
			canCompactPageListReversedOrder,
			gContext.getCurVersion(),
			curPageIndex);

		canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));

		final long finalOldRequstCount = oldRequstCount;
		final int finalOldPageSize = oldPageSize;
		this.getExecutor().submit(() -> {
			try {
				pageCompactHandler.doSyncReplace(logicChainedPage,
					curPageIndex,
					finalOldPageSize,
					finalOldPageSize,
					finalOldRequstCount,
					inclusiveCompactionStartChainIndex,
					curChainIndex,
					compactedDataPage,
					invalidPageAddressList,
					curPageIndex);
				gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
			} catch (GeminiShutDownException e) {
				LOG.warn("GeminiDB has shutdown!");
			}
		});
	}

	public void doMajorCompaction(
		LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version) {
		if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
			return;
		}
		// get dataPage list to do compaction.
		List dataPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();

		long oldRequstCount = 0;
		int oldCompactedPageSize = 0;
		int oldMemPageSize = 0;
		int cix = curChainIndex;
		//major comaction dones't include the latest page.
		while (cix >= 0 && gContext.isDBNormal()) {
			PageAddress pageAddress = logicChainedPage.getPageAddress(cix);
			DataPage dataPage = pageAddress.getDataPage();
			if (dataPage == null) {
				this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
				//FetchPolicy should be thread safe. because we want compaction also use LRU.
				dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
					logicChainedPage,
					cix,
					this.gRegionContext,
					false,
					false);
			} else {
				oldMemPageSize += dataPage.getSize();
			}
			oldCompactedPageSize += dataPage.getSize();
			dataPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequstCount += pageAddress.getRequestCount();
			cix--;
		}
		if (!gContext.isDBNormal()) {
			throw new GeminiShutDownException("DB is in abnormal status.");
		}

		if (dataPageListReversedOrder.isEmpty()) {
			throw new GeminiRuntimeException("BUG");
		}

		gRegionContext.getPageStoreStats().addMajorCompactedPages(dataPageListReversedOrder.size());

		DataPage compactedDataPage = doCompactPage(true,
			dataPageListReversedOrder,
			gContext.getCurVersion(),
			curPageIndex);

		dataPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));

		final long finalOldRequstCount = oldRequstCount;
		final int finalOldCompactedPageSize = oldCompactedPageSize;
		final int finalOldMemPageSize = oldMemPageSize;
		this.getExecutor().submit(() -> {
			try {
				pageCompactHandler.doSyncReplace(logicChainedPage,
					curPageIndex,
					finalOldCompactedPageSize,
					finalOldMemPageSize,
					finalOldRequstCount,
					0,
					curChainIndex,
					compactedDataPage,
					invalidPageAddressList,
					curPageIndex);
				gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
			} catch (GeminiShutDownException e) {
				LOG.warn("GeminiDB has shutdown!");
			}
		});
	}

	public void doMinorCompactionByRead(
		LogicChainedPage logicChainedPage,
		int curPageIndex,
		int curChainIndex,
		Map fetchedDataPageMap) {
		if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
			//compacting has done.
			gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
			return;
		}
		// get dataPage list to do compaction.
		List canCompactPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();
		int oldCompactedPageSize = 0;
		int oldMemPageSize = 0;
		long oldRequstCount = 0;
		int startCompactionIndex = curChainIndex;
		while (startCompactionIndex >= 0) {
			PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
			DataPage dataPage = pageAddress.getDataPage();

			if (dataPage == null) {
				dataPage = fetchedDataPageMap.get(startCompactionIndex);
				if (dataPage == null) {
					break;
				}
				dataPage.addReferenceCount();
			} else {
				oldMemPageSize += dataPage.getSize();
			}

			oldCompactedPageSize += dataPage.getSize();
			canCompactPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequstCount += pageAddress.getRequestCount();
			startCompactionIndex--;
		}

		if (!gContext.isDBNormal()) {
			throw new GeminiShutDownException("DB is in abnormal status.");
		}

		if (canCompactPageListReversedOrder.size() < 2) {
			logicChainedPage.setPageStatus(PageStatus.Compacting, PageStatus.Normal);
			gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
			canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
			return;
		}

		final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;

		gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());

		DataPage compactedDataPage = doCompactPage(inclusiveCompactionStartChainIndex == 0,
			canCompactPageListReversedOrder,
			gContext.getCurVersion(),
			curPageIndex);

		canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));

		final long finalOldRequstCount = oldRequstCount;
		final int finalOldCompactedPageSize = oldCompactedPageSize;
		final int finalOldMemPageSize = oldMemPageSize;
		this.getExecutor().submit(() -> {
			try {
				pageCompactHandler.doSyncReplace(logicChainedPage,
					curPageIndex,
					finalOldCompactedPageSize,
					finalOldMemPageSize,
					finalOldRequstCount,
					inclusiveCompactionStartChainIndex,
					curChainIndex,
					compactedDataPage,
					invalidPageAddressList,
					curPageIndex);
				gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
			} catch (GeminiShutDownException e) {
				LOG.warn("GeminiDB has shutdown!");
			}
		});
	}

	@Override
	public void splitPage(PageIndexContext pageIndexContext) {
		LogicChainedPage currentLogicPage = pageIndexContext.getPageID();

		PageIndexContextHashImpl uPageIndexContext = (PageIndexContextHashImpl) pageIndexContext;

		int curBucketNum = uPageIndexContext.getCurBucketNum();
		int curIndex = uPageIndexContext.getCurIndex();

		//fix fast split bug: GRegionKMapTest#testSimplePutGetRemove
		//for example, when curBucket is 4, and page index is 1.
		//then buckets expand from 4 to 8. and page 1 have not been splited.
		//then buckets expand from 8 to 16.
		//page 1 should first split 1 to 1 and 5 with 8 buckets.
		//And then split 1 to 1 and 9 with 16, and split 5 to 5 and 13 with 16 buckets. it means 1 in 4 buckets,fianlly get 1/5/9/13 pages in 16 buckets
		//but if we get page 1 in the bucket 8 (some key just fall into the page 1), and will split 1 to 1 and 9 directly, so it's wrong.
		//in a short, split page only can be split step by step. we can't use uPageIndexContext to decide the split step.
		curBucketNum = pageIndex.getBucketNumASPageFinishSplit(curBucketNum, curIndex);

		int destIndex = curBucketNum + curIndex;

		if (pageIndex.getLogicPage(destIndex) != WAIT_SPLITTING || pageIndex.getLogicPage(curIndex) != currentLogicPage) {
			//Splitting has done.
			return;
		}

		// get dataPage list to do compaction.
		List dataPageListReversedOrder = new ArrayList<>();
		List invalidPageAddressList = new ArrayList<>();

		long oldRequestNum = 0;
		int oldCompactedPageSize = 0;
		int oldMemPageSize = 0;
		int cix = currentLogicPage.getCurrentPageChainIndex();
		while (cix >= 0 && gContext.isDBNormal()) {
			PageAddress pageAddress = currentLogicPage.getPageAddress(cix);
			DataPage dataPage = pageAddress.getDataPage();
			if (dataPage == null) {
				this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
				dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
					currentLogicPage,
					cix,
					this.gRegionContext,
					false,
					false);
			} else {
				oldMemPageSize += dataPage.getSize();
			}

			oldCompactedPageSize += dataPage.getSize();
			dataPageListReversedOrder.add(dataPage);
			invalidPageAddressList.add(pageAddress);
			oldRequestNum += pageAddress.getRequestCount();
			cix--;
		}
		if (!gContext.isDBNormal()) {
			throw new GeminiShutDownException("DB is in abnormal status.");
		}

		if (dataPageListReversedOrder.isEmpty()) {
			return;
		}
		DataPage mergeDataPage = doCompactPage(true,
			dataPageListReversedOrder,
			gContext.getCurVersion(),
			pageIndexContext.getPageIndexID());

		//dec reference, We can do it during the doCompactPage.
		dataPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));

		Tuple2 splitDataPages = mergeDataPage == null
			? new Tuple2<>(null, null)
			: mergeDataPage.split(curBucketNum,
				curIndex,
				gContext.getSupervisor().getAllocator(),
				gContext.getInPageGCompressAlgorithm());

		if (mergeDataPage != null) {
			//this will reclaim mergeDataPage's memory.
			mergeDataPage.delReferenceCount(ReleaseType.Normal);
		}

		if (splitDataPages.f1 == null && splitDataPages.f0 != null) {
			//just as doing a compaction
			doSyncReplaceLogicPage(currentLogicPage,
				curIndex,
				oldCompactedPageSize,
				oldMemPageSize,
				oldRequestNum,
				0,
				currentLogicPage.getCurrentPageChainIndex(),
				splitDataPages.f0,
				invalidPageAddressList,
				false,
				destIndex);
			pageIndex.updateLogicPage(destIndex, NO_PAGE);
			return;
		} else if (splitDataPages.f0 == null && splitDataPages.f1 != null) {
			//just as doing a compaction and move page
			doSyncReplaceLogicPage(currentLogicPage,
				destIndex,
				oldCompactedPageSize,
				oldMemPageSize,
				oldRequestNum,
				0,
				currentLogicPage.getCurrentPageChainIndex(),
				splitDataPages.f1,
				invalidPageAddressList,
				true,
				curIndex);
			pageIndex.updateLogicPage(curIndex, NO_PAGE);
			return;
		} else if (splitDataPages.f0 == null && splitDataPages.f1 == null) {
			pageIndex.updateLogicPage(destIndex, NO_PAGE);
			pageIndex.updateLogicPage(curIndex, NO_PAGE);

			this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curIndex, destIndex, invalidPageAddressList);

			//here it set NO_PAGE, so all of related statistics only need to directly reduce.
			gRegionContext.getPageStoreStats().addLogicPageCount(-1);
			gRegionContext.getPageStoreStats().addLogicPageChainLen(0 - currentLogicPage.getCurrentPageChainIndex() - 1);
			gRegionContext.getPageStoreStats().addLogicPageChainCapacity(0 - currentLogicPage.getPageChainCapacity());

			gRegionContext.getPageStoreStats().addLogicPageSize(0 - currentLogicPage.getPageSize());
			gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, 0 - oldMemPageSize);

			gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);

			return;
		}

		LogicChainedPage pageSpit1 = pageIndex.newLogicChainedPage();
		LogicChainedPage pageSpit2 = pageIndex.newLogicChainedPage();

		PageAddress pageAddressSplit1 = pageSpit1.createPage(oldRequestNum / 2, splitDataPages.f0);
		PageAddress pageAddressSplit2 = pageSpit2.createPage(oldRequestNum - oldRequestNum / 2, splitDataPages.f1);

		pageSpit1.addPageSize(pageAddressSplit1.getDataLen());
		pageSpit2.addPageSize(pageAddressSplit2.getDataLen());

		//at first set expanded page.
		pageIndex.updateLogicPage(destIndex, pageSpit2);
		pageIndex.updateLogicPage(curIndex, pageSpit1);

		this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curIndex, destIndex, invalidPageAddressList);
		this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddressSplit1);
		this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddressSplit2);

		gRegionContext.getPageStoreStats().addLogicPageCount(1);
		gRegionContext.getPageStoreStats().addLogicPageChainLen(2 - currentLogicPage.getCurrentPageChainIndex() - 1);
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(pageSpit1.getPageChainCapacity() + pageSpit2.getPageChainCapacity() - currentLogicPage.getPageChainCapacity());

		gRegionContext.getPageStoreStats().addLogicPageSize(pageSpit2.getPageSize() + pageSpit1.getPageSize() - currentLogicPage.getPageSize());
		gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, pageSpit2.getPageSize() + pageSpit1.getPageSize() - oldMemPageSize);

		gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
	}

	@Override
	public void mergePage(PageIndexContext pageIndexContextFirst, PageIndexContext pageIndexContextSecond) {
		PageIndexContextHashImpl uPageIndexContextFirst = (PageIndexContextHashImpl) pageIndexContextFirst;
		int curIndexFirst = uPageIndexContextFirst.getCurIndex();
		if (pageIndexContextFirst != pageIndex.getLogicPage(curIndexFirst)) {
			//merge has done.
			return;
		}
		//TODO IN THE FUTURE when shrink index
	}

	private void doWriteDataToPage(
		PageIndexContext pageIndexContext, List>> dataSet, long version) {
		//write data to delta page. mechanism can guarantee dataset have same version.
		LogicChainedPage currentLogicPageID = pageIndexContext.getPageID();
		long newRequestCount = getRequestCount(dataSet);

		//add new delta page.
		DataPage newDataPage = doCreateDataPage(version, dataSet, pageIndexContext.getPageIndexID());

		if (newDataPage == null) {
			LOG.warn("doWriteDataToPage write empty value");

		} else {
			PageAddress pageAddress = helpAddDataPage(currentLogicPageID, newRequestCount, newDataPage);

			//new page no need to do evict, except mem is not enough, but it should be rare.
			if (cacheManager.getMemWaterMark(0) == WaterMark.High) {
				this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddress);
			}

			int dataSize = newDataPage.getSize();
			currentLogicPageID.addPageSize(dataSize);
			gRegionContext.getPageStoreStats().addLogicPageSize(dataSize);
			gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, dataSize);
			gRegionContext.getPageStoreStats().addPageRequestCount(newRequestCount);
			gRegionContext.getPageStoreStats().addPage();
		}

		//try to compact page
		compactPage(pageIndexContext, version);
	}

	private PageAddress helpAddDataPage(LogicChainedPage currentLogicPageID, long newRequestCount, DataPage dataPage) {
		int oldChainCapacity = currentLogicPageID.getPageChainCapacity();
		PageAddress result = currentLogicPageID.createPage(newRequestCount, dataPage);
		int changeCapacity = currentLogicPageID.getPageChainCapacity() - oldChainCapacity;
		gRegionContext.getPageStoreStats().addLogicPageChainLen(1);
		gRegionContext.getPageStoreStats().addLogicPageChainCapacity(changeCapacity);
		return result;
	}

	@Override
	public void checkResource() {
		if (cacheManager.forbidIndexExpand()) {
			LOG.debug("cacheManager forbid index to expand.");
			return;
		}
		if (gRegionContext.getPageStoreStats().getLogicPageCount() == 0) {
			LOG.debug("no page here");
			return;
		}
		if (gRegionContext.getPageStoreStats().getLogicPageCount() * 2 <= gRegionContext.getPageStoreStats().getIndexCapacity()) {
			LOG.debug("page count {} * 2 less than index capacity {}, not to expand index",
				gRegionContext.getPageStoreStats().getLogicPageCount(),
				gRegionContext.getPageStoreStats().getIndexCapacity());
			return;
		}
		int spilledPageSizeThreshold;
		WaterMark waterMark = cacheManager.getIndexCapacityWaterMark();

		if (waterMark == WaterMark.High) {
			spilledPageSizeThreshold = this.spilledPageSizeThresholdHigh;
		} else if (waterMark == WaterMark.Low) {
			spilledPageSizeThreshold = this.spilledPageSizeThresholdMiddle;
		} else {
			spilledPageSizeThreshold = this.spilledPageSizeThresholdLow;
		}
		int averagePageSize = (int) gRegionContext.getPageStoreStats().getLogicPageSize() / gRegionContext.getPageStoreStats().getLogicPageCount();
		if (averagePageSize >= spilledPageSizeThreshold) {
			pageIndex.expand();
			LOG.info("averagePageSize {}, spilledPageSizeThreshold {}, to expand index up to {}",
				averagePageSize,
				spilledPageSizeThreshold,
				gRegionContext.getPageStoreStats().getIndexCapacity());
		}

	}

	protected void tryLaunchCompactionByRead(
		PageIndexContext pageIndexContext, LogicChainedPage logicPageID, Map fetchedDataPageMap) {
		boolean releaseFetchMap = true;
		try {
			if (logicPageID.getCurrentPageChainIndex() > inMemoryCompactionThreshold && fetchedDataPageMap.size() > 1) {
				if (logicPageID.getPageStatus().canCompaction()) {
					gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(1);
					if (cacheManager.getCacheStats().getRuningMinorCompactionByRead() > maxRunningMinorCompaction) {
						gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
					} else {
						if (!logicPageID.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
							gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
							return;
						}
						final int curChainIndex = logicPageID.getCurrentPageChainIndex();
						final int curPageIndex = pageIndexContext.getPageIndexID();
						EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
						releaseFetchMap = false;
						eventExecutor.submit(() -> {
							try {
								pageCompactHandler.doAsyncMinorCompactionByRead(logicPageID,
									curPageIndex,
									curChainIndex,
									fetchedDataPageMap);
							} catch (GeminiShutDownException e) {
								LOG.warn("GeminiDB has shutdown!");
							} finally {
								fetchedDataPageMap.values().forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
							}
						});
					}
				}
			}
		} finally {

			if (releaseFetchMap) {
				fetchedDataPageMap.values().forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
			}
		}

	}

	protected DataPage doCompactPageForStructureValue(
		boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId) {
		List> compactionListReversedOrder = new ArrayList<>();
		for (DataPage dataPage : canCompactPageListReversedOrder) {
			compactionListReversedOrder.add(dataPage.getGBinaryHashMap());
		}

		int index = compactionListReversedOrder.size() - 1;
		//Value list is right order.
		Map> newMap = new HashMap<>(compactionListReversedOrder.get(index).keyCount());
		long compactionCount = 0;
		StateFilter stateFilter = gRegionContext.getGContext().getStateFilter();
		while (index >= 0) {
			for (Map.Entry entry : compactionListReversedOrder.get(index).getBinaryMap().entrySet()) {
				// NOTE: we must filter here for list page store because list will not be filtered in doCompactValue
				if (isMajor && stateFilter != null && stateFilter.filter(gRegionContext, entry.getValue().getSeqID())) {
					continue;
				}
				if (entry.getValue().getgValueType() == GValueType.Delete) {
					if (isMajor) {
						newMap.remove(entry.getKey());
					} else {
						newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
					}
				} else if (entry.getValue().getgValueType() == GValueType.PutMap || entry.getValue().getgValueType() == GValueType.PutList) {
					newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
				} else {
					if (newMap.containsKey(entry.getKey())) {
						newMap.get(entry.getKey()).add(entry.getValue());
					} else {
						newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
					}
				}
			}

			compactionCount += compactionListReversedOrder.get(index).getCompactionCount();
			index--;
		}

		Map finalCompactedMap = new HashMap<>(newMap.size());

		//compaction value

		for (Map.Entry> entry : newMap.entrySet()) {
			if (entry.getValue().size() == 0) {
				throw new GeminiRuntimeException("Internal BUG!");
			}
			BinaryValue compactedBinaryValue;
			if (entry.getValue().size() == 1 && !isMajor) {
				//if it's major compaction, even only one binary value, we need to do compact to remove deleted record.
				compactedBinaryValue = entry.getValue().get(0);
			} else {
				compactedBinaryValue = doCompactValue(entry.getValue(), isMajor, version, logicPageId);
			}
			finalCompactedMap.put(entry.getKey(), compactedBinaryValue);
		}

		//TODO null should be handled by PageStore
		return doBuildDataPageFromGBinaryMap(isMajor,
			version,
			logicPageId,
			this.pageSerdeFlink.getKeySerde(),
			finalCompactedMap,
			compactionCount);
	}

	protected DataPage getDataPageAutoLoadIfNeed(
		LogicChainedPage logicPageID, int curIndex, Map fetchedDataPageMap) {
		PageAddress pageAddress = logicPageID.getPageAddress(curIndex);
		DataPage dataPage = pageAddress.getDataPage();
		if (dataPage == null) {
			this.cacheManager.getCacheStats().addPageCacheMissCount();
			dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
				logicPageID,
				curIndex,
				this.gRegionContext,
				this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
				true);
			//todo to resuse dataPage need send to handler.
			fetchedDataPageMap.put(curIndex, dataPage);
			dataPage.addReferenceCount();
		} else {
			this.cacheManager.getCacheStats().addPageCacheHitCount();
		}
		return dataPage;
	}

	@Override
	public void allKeysIncludeDeleted(Set allKeysIncludeDelete) {
		// as we know, removeAll will happen after getAll in mini batch(KeyedBundleOperator), so
		// there is no need to update read cache and trigger compaction
		LogicChainedPage[] chains = pageIndex.getPageIndex();
		for (LogicChainedPage logicChainedPage : chains) {
			if (isNullPage(logicChainedPage)) {
				continue;
			}
			int numPages = logicChainedPage.getCurrentPageChainIndex();
			for (int i = numPages; i >= 0; i--) {
				PageAddress pageAddress = logicChainedPage.getPageAddress(i);
				DataPage dataPage = pageAddress.getDataPage();
				try {
					if (dataPage == null) {
						this.cacheManager.getCacheStats().addPageCacheMissCount();
						dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
							logicChainedPage,
							i,
							this.gRegionContext,
							this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
							false);
					} else {
						this.cacheManager.getCacheStats().addPageCacheHitCount();
					}
					allKeysIncludeDelete.addAll(dataPage.getPOJOSet());
				} finally {
					if (dataPage != null) {
						dataPage.delReferenceCount(ReleaseType.Normal);
					}
				}
			}
		}
	}

	/**
	 * a different value such as map/list/value will calc differently.
	 *
	 * @param dataSet Objects organized to List.
	 * @return total request count for this list.
	 */
	abstract long getRequestCount(List>> dataSet);

	/**
	 * @param version     DataPage's version.
	 * @param dataSet     Objects organized to List will be wrote to this DataPage.
	 * @param logicPageId DataPage's index id.
	 * @return certain DataPage, such as DataPageKVImpl, DataPageKMapImpl or DataPageKListImpl
	 */
	abstract DataPage doCreateDataPage(long version, List>> dataSet, int logicPageId);

	/**
	 * @param isMajor                         compaction is major or minor.
	 * @param canCompactPageListReversedOrder DataPages will be compacted, and this list is reversed order.
	 * @param version                         current version for new page.
	 * @param logicPageId                     DataPage's index id.
	 * @return a new comacted DataPage.
	 */
	@VisibleForTesting
	public abstract DataPage doCompactPage(
		boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId);

	/**
	 * invoked by doCompactPageForStructureValue, when value is structural such as Map/List/Set.
	 *
	 * @param binaryValueList value list to be compacted.
	 * @param isMajor         compaction is major or minor.
	 * @param version         current version.
	 * @param logicPageId     DataPage's index id.
	 * @return a compacted version.
	 */
	abstract BinaryValue doCompactValue(
		List binaryValueList, boolean isMajor, long version, int logicPageId);

	protected abstract DataPage doBuildDataPageFromGBinaryMap(
		boolean isMajor,
		long version,
		int logicPageId,
		TypeSerializer keySerde,
		Map finalCompactedMap,
		long compactionCount);

	protected boolean isNullPage(LogicChainedPage logicPageID) {
		return logicPageID == null || logicPageID.getCurrentPageChainIndex() == -1;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy