Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.page;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.GRegionID;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiShutDownException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.handler.GeminiEventExecutorTask;
import org.apache.flink.runtime.state.gemini.engine.handler.PageCompactHandler;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryKey;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBufferAddressMapping;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.runtime.state.gemini.engine.vm.DataPageLRU;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.NO_PAGE;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.WAIT_SPLITTING_PAGE;
import static org.apache.flink.util.Preconditions.checkState;
/**
* General implementation of page store, where page index act as {@link PageIndexHashImpl}.
*/
public abstract class AbstractHashPageStore implements PageStore {
private static final Logger LOG = LoggerFactory.getLogger(AbstractHashPageStore.class);
private static final int MIN_NEW_PAGE_SIZE_TO_PERSIST = 2048;
//TODO provide HashCodePageIndex interface.
protected final PageIndexHashImpl pageIndex;
protected final GRegionContext gRegionContext;
protected final EventExecutor eventExecutor;
protected final CacheManager cacheManager;
protected final GContext gContext;
protected final DataPage.DataPageType dataPageType;
private final int splitPageSizeThreshold;
private final int maxChainLenThreshold;
protected final GRegion gRegion;
private final PageCompactHandler pageCompactHandler;
private final int inMemoryCompactionThreshold;
private final int maxRunningMajorCompaction;
private final int maxRunningMinorCompaction;
protected final PageSerdeFlink pageSerdeFlink;
private final EventExecutor lruIntoMainEventExecutor;
private long lastLruIntoMainCacheTimeMs = -1;
private final int lruIntoMainCacheSleepMs;
private final boolean enableAddIntoMainWhenSplitting;
private final boolean enableLoadPageFromLRUIntoMainCache;
protected final long curRegionMemHighMark;
public AbstractHashPageStore(
DataPage.DataPageType dataPageType,
GRegion gRegion,
EventExecutor eventExecutor) {
this(dataPageType, gRegion, null, eventExecutor);
}
public AbstractHashPageStore(
DataPage.DataPageType dataPageType,
GRegion gRegion,
@Nullable PageIndex pageIndex,
EventExecutor eventExecutor) {
this.dataPageType = dataPageType;
this.gRegion = gRegion;
this.gRegionContext = gRegion.getGRegionContext();
this.eventExecutor = eventExecutor;
GConfiguration configuration = gRegionContext.getGContext().getGConfiguration();
if (pageIndex != null) {
//TODO #SR rewrite this to use interface.
this.pageIndex = (PageIndexHashImpl) pageIndex;
} else {
this.pageIndex = new PageIndexHashImpl<>(configuration, this, gRegionContext.getPageStoreStats());
}
this.gContext = gRegionContext.getGContext();
this.cacheManager = this.gContext.getSupervisor().getCacheManager();
gRegionContext.getPageStoreStats().setPageSizeRate(configuration.getPageSizeRateBetweenPOJOAndHeap());
this.splitPageSizeThreshold = configuration.getSplitPageSizeThreshold();
this.maxChainLenThreshold = configuration.getMaxCompactionThreshold();
this.inMemoryCompactionThreshold = configuration.getInMemoryCompactionThreshold();
this.maxRunningMajorCompaction = configuration.getMaxRunningMajorCompaction();
this.maxRunningMinorCompaction = configuration.getMaxRunningMinorCompaction();
this.pageSerdeFlink = (PageSerdeFlink) gRegionContext.getPageSerdeFlink();
this.lruIntoMainCacheSleepMs = configuration.getLruIntoMainCacheSleepMs();
this.enableAddIntoMainWhenSplitting = configuration.isEnableAddIntoMainWhenSplitting();
this.enableLoadPageFromLRUIntoMainCache = configuration.isEnableLoadPageFromLRUIntoMain();
this.curRegionMemHighMark = cacheManager.getMemHighMark() / configuration.getRegionThreadNum();
this.lruIntoMainEventExecutor = gContext.getSupervisor().getLruIntoMainCacheExecutorGroup().next();
this.pageCompactHandler = new PageCompactHandler() {
@Override
public void doAsyncMajorCompaction(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curPageIndex,
int curChainIndex,
long version) {
doMajorCompaction(pageIndexContext, logicalPageChain, curPageIndex, curChainIndex, version);
}
@Override
public void doAsyncMinorCompaction(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curChainIndex,
long version,
boolean force) {
doMinorCompaction(pageIndexContext, logicalPageChain, curChainIndex, version, force);
}
@Override
public void doSyncReplace(
LogicalPageChain logicalPageChain,
int curPageIndex,
int oldCompactedPageSize,
int oldCompactedSubPageNum,
int oldCompactedSubPageSize,
long oldRequestCount,
int inclusiveCompactionStartChainIndex,
int inclusiveCompactionEndChainIndex,
DataPage compactedDataPage,
List invalidPageAddressList,
int relatedIndex) {
doSyncReplaceLogicalPage(logicalPageChain,
curPageIndex,
oldCompactedPageSize,
oldCompactedSubPageNum,
oldCompactedSubPageSize,
oldRequestCount,
inclusiveCompactionStartChainIndex,
inclusiveCompactionEndChainIndex,
compactedDataPage,
invalidPageAddressList,
false,
relatedIndex);
}
@Override
public void doAsyncMinorCompactionByRead(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curPageIndex,
int curChainIndex,
Map fetchedDataPageMap) {
doMinorCompactionByRead(pageIndexContext, logicalPageChain, curPageIndex, curChainIndex, fetchedDataPageMap);
}
};
}
@Override
public EventExecutor getExecutor() {
return this.eventExecutor;
}
@Override
public boolean contains(K key) {
//for common kv, null means not contained.
return get(key) != null;
}
@Override
public PageIndex getPageIndex() {
return pageIndex;
}
public DataPage.DataPageType getDataPageType() {
return dataPageType;
}
@Override
public void addPage(PageIndexContext pageIndexContext, List>> dataSet, long version) {
LogicalPageChain currentLogicalPageChain = pageIndexContext.getLogicalPageChain();
if (currentLogicalPageChain == NO_PAGE) {
String msg = "BUG! addOrMergePage receive NO_PAGE request.";
LOG.error(msg);
throw new GeminiRuntimeException(msg);
}
if (dataSet == null || dataSet.isEmpty()) {
if (!pageIndexContext.isNeedSplit()) {
compactPage(pageIndexContext, version);
}
} else {
doWriteDataToPage(pageIndexContext, dataSet, version);
}
}
@Override
public void compactPage(PageIndexContext pageIndexContext, final long version) {
try {
LogicalPageChain logicalPageChain = pageIndexContext.getLogicalPageChain();
final int curPageIndex = pageIndexContext.getPageIndexID();
if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
return;
}
//no more page to do compaction.
if (logicalPageChain.getCurrentPageChainIndex() <= 0) {
return;
}
if (!logicalPageChain.getPageStatus().canCompaction()) {
return;
}
final int curChainIndex = logicalPageChain.getCurrentPageChainIndex();
final LogicalPageChain compactionLogicalPageChain = logicalPageChain;
//2. if need, launch an asynchronized major compaction handler
if (logicalPageChain.getCurrentPageChainIndex() >= maxChainLenThreshold) {
if (logicalPageChain.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(1);
if (cacheManager.getCacheStats().getRunningMajorCompactedPages() > maxRunningMajorCompaction) {
gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
// try to minor compaction in force mode
tryLaunchMinorCompaction(
pageIndexContext,
version,
logicalPageChain,
curChainIndex,
compactionLogicalPageChain,
true);
return;
}
if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
return;
}
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
Set dataPages = compactionLogicalPageChain.getAllDataPageReferenced();
if (gContext.isDBNormal() && !eventExecutor.isShuttingDown()) {
eventExecutor.execute(new GeminiEventExecutorTask() {
@Override
public void cancel() {
dataPages.forEach(dataPage -> dataPage.release());
}
@Override
public void run() {
try {
pageCompactHandler.doAsyncMajorCompaction(
pageIndexContext,
compactionLogicalPageChain,
curPageIndex,
curChainIndex,
version);
} catch (GeminiShutDownException ignore) {
LOG.debug("GeminiDB has shutdown!", ignore);
} catch (Exception e) {
LOG.error("async major compaction failed", e);
} finally {
dataPages.forEach(dataPage -> dataPage.release());
}
}
});
} else {
dataPages.forEach(dataPage -> dataPage.release());
}
}
} else if (logicalPageChain.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
// try to do minor comaction, only involve page which is resident in memory.
tryLaunchMinorCompaction(
pageIndexContext,
version,
logicalPageChain,
curChainIndex,
compactionLogicalPageChain,
false);
}
} catch (Exception e) {
LOG.error("Bug " + e.getMessage(), e);
throw new GeminiRuntimeException(e);
}
}
private void tryLaunchMinorCompaction(
PageIndexContext pageIndexContext,
long version,
LogicalPageChain logicalPageChain,
int curChainIndex,
LogicalPageChain compactionLogicalPageChain,
boolean force) {
if (logicalPageChain.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(1);
if (!force) {
if (cacheManager.getCacheStats().getRunningMinorCompactedPages() > maxRunningMinorCompaction) {
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
return;
}
}
int startCompactionIndex = curChainIndex;
int memCandidatePage = 0;
long lastSumCompactedThreshold = -1;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
//only stat, no need reference.
DataPage dataPage = pageAddress.getDataPageNoReference();
if (dataPage != null) {
if (!force) {
long compactedCount = dataPage.getCompactionCount();
//improvement for minor compaction
if (lastSumCompactedThreshold == -1) {
lastSumCompactedThreshold = compactedCount;
} else if (lastSumCompactedThreshold >= compactedCount) {
lastSumCompactedThreshold += compactedCount;
} else {
break;
}
}
memCandidatePage++;
startCompactionIndex--;
} else {
break;
}
}
if (memCandidatePage <= inMemoryCompactionThreshold) {
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
return;
}
if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
return;
}
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
eventExecutor.execute(() -> {
try {
pageCompactHandler.doAsyncMinorCompaction(
pageIndexContext,
compactionLogicalPageChain,
curChainIndex,
version,
force);
} catch (GeminiShutDownException ignore) {
LOG.debug("GeminiDB has shutdown!", ignore);
} catch (Exception e) {
LOG.error("async minor compaction failed", e);
}
});
}
}
private LogicalPageChain doSyncReplaceLogicalPage(
LogicalPageChain logicalPageChain,
int curPageIndex,
int oldCompactedPageSize,
int oldCompactedSubPageNum,
int oldCompactedSubPageSize,
long oldRequestCount,
int inclusiveCompactionStartChainIndex,
int inclusiveCompactionEndChainIndex,
DataPage compactedDataPage,
List invalidPageAddressList,
boolean isSplit,
int relatedIndex) {
if (isSplit) {
if (pageIndex.getLogicPage(curPageIndex) != WAIT_SPLITTING_PAGE) {
if (compactedDataPage != null) {
//compactedDataPage is not used, delReference to reclaim mem.
compactedDataPage.release();
}
return null;
}
} else {
if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
if (compactedDataPage != null) {
//compactedDataPage is not used, delReference to reclaim mem.
compactedDataPage.release();
}
return null;
}
}
int compactedPageSize = 0;
int compactedMemSize = 0;
int compactedSubPageNum = 0;
int compactedSubPageSize = 0;
PageAddress compactedPageAddress = null;
LogicalPageChain compactedLogicalPageChain = pageIndex.createLogicalPageChain();
for (int i = 0; i < inclusiveCompactionStartChainIndex; i++) {
compactedLogicalPageChain.insertPage(logicalPageChain.getPageAddress(i));
}
if (compactedDataPage != null) {
//Major compaction maybe get a null page. and it never be null during Minor compaction, because even by TTL
// or removeAll, minor compaction will keep these data.
compactedPageAddress = compactedLogicalPageChain.createPage(compactedDataPage);
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
compactedPageSize = compactedPageAddress.getDataLen();
//map split maybe keep the subPage in disk.
compactedMemSize = compactedPageAddress.getMemorySize();
compactedSubPageNum = compactedPageAddress.getSubPageNum();
compactedSubPageSize = compactedPageAddress.getSubPageDataLen();
compactedPageAddress.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
(int) (oldRequestCount & 0X7FFFFFFF));
}
for (int i = inclusiveCompactionEndChainIndex + 1; i <= logicalPageChain.getCurrentPageChainIndex(); i++) {
compactedLogicalPageChain.insertPage(logicalPageChain.getPageAddress(i));
}
compactedLogicalPageChain.addPageSize(logicalPageChain.getPageSize() - oldCompactedPageSize + compactedPageSize);
pageIndex.updateLogicPage(curPageIndex, compactedLogicalPageChain);
int oldMemPageSize = syncGetMemPageSizeFromInvalidPageAddressList(invalidPageAddressList);
//first find if there are reusing pageAddress.
List findRealNeedDiscardPage = findNeededDiscardPage(invalidPageAddressList, compactedPageAddress);
//first set page invalid.
gContext.getSupervisor().discardPage(gRegionContext, findRealNeedDiscardPage);
removeInvalidPage(gRegion, invalidPageAddressList);
if (compactedPageAddress != null) {
//write to dfs or local disk.
gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, compactedPageAddress, compactedMemSize);
compactedMemSize = compactedPageAddress.getMemorySize();
if (compactedMemSize > 0) {
this.cacheManager.getEvictPolicy().addPage(gRegion, compactedPageAddress);
}
}
this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, compactedMemSize - oldMemPageSize);
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, compactedMemSize - oldMemPageSize);
gRegionContext.getPageStoreStats().addLogicPageSize(compactedLogicalPageChain.getPageSize() - logicalPageChain.getPageSize());
gRegionContext.getPageStoreStats().addLogicPageChainLen(compactedLogicalPageChain.getCurrentPageChainIndex() - logicalPageChain.getCurrentPageChainIndex());
gRegionContext.getPageStoreStats().addLogicSubPageCount(compactedSubPageNum - oldCompactedSubPageNum);
gRegionContext.getPageStoreStats().addLogicSubPageSize(compactedSubPageSize - oldCompactedSubPageSize);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(compactedLogicalPageChain.getPageChainCapacity() - logicalPageChain.getPageChainCapacity());
return compactedLogicalPageChain;
}
private void removeInvalidPage(
GRegion gRegion, List invalidPageAddressList) {
for (PageAddress pageAddress : invalidPageAddressList) {
this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, pageAddress);
}
}
/**
* fix #21430210.
* because compaction is async, with Evict evicting page, we need to get the memSize on the 'sync' RegionThread.
*/
private int syncGetMemPageSizeFromInvalidPageAddressList(List invalidPageAddressList) {
return invalidPageAddressList.stream().map(PageAddress::getMemorySize).reduce(0, Integer::sum);
}
private void doMinorCompaction(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curChainIndex,
long version,
boolean force) {
final int curPageIndex = pageIndexContext.getPageIndexID();
if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
return;
}
// get dataPage list to do compaction.
List canCompactPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
int oldPageSize = 0;
int oldSubPageNum = 0;
int oldSubPageSize = 0;
long oldRequestCount = 0;
int startCompactionIndex = curChainIndex;
long lastSumCompactedThreshold = -1;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage != null && dataPage.refCnt() > 1) {
if (!force) {
long compactedCount = dataPage.getCompactionCount();
//improvement for minor compaction
if (lastSumCompactedThreshold == -1) {
lastSumCompactedThreshold = compactedCount;
} else if (lastSumCompactedThreshold >= compactedCount) {
lastSumCompactedThreshold += compactedCount;
} else {
dataPage.release();
break;
}
}
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
oldPageSize += pageAddress.getDataLen();
oldSubPageNum += pageAddress.getSubPageNum();
oldSubPageSize += pageAddress.getSubPageDataLen();
canCompactPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
startCompactionIndex--;
} else {
if (dataPage != null) {
dataPage.release();
}
break;
}
}
if (!gContext.isDBNormal()) {
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
}
if (canCompactPageListReversedOrder.size() <= inMemoryCompactionThreshold) {
logicalPageChain.compareAndSetStatus(PageStatus.Compacting, PageStatus.Normal);
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
return;
}
final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;
gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());
final DataPage compactedDataPage = doCompactPage(
pageIndexContext,
inclusiveCompactionStartChainIndex == 0,
canCompactPageListReversedOrder,
gContext.getCurVersion(),
pageIndexContext.getPageIndexID());
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
final long finalOldRequstCount = oldRequestCount;
final int finalOldPageSize = oldPageSize;
final int finalOldSubPageNum = oldSubPageNum;
final int finalOldSubPageSize = oldSubPageSize;
if (!gContext.isDBNormal() || this.getExecutor().isShuttingDown()) {
compactedDataPage.release();
} else {
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
? 0
: compactedDataPage.getSize());
this.getExecutor().execute(new GeminiEventExecutorTask() {
@Override
public void cancel() {
compactedDataPage.release();
}
@Override
public void run() {
try {
pageCompactHandler.doSyncReplace(logicalPageChain,
curPageIndex,
finalOldPageSize,
finalOldSubPageNum,
finalOldSubPageSize,
finalOldRequstCount,
inclusiveCompactionStartChainIndex,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
} catch (GeminiShutDownException e) {
if (compactedDataPage.refCnt() == 1) {
compactedDataPage.release();
}
LOG.warn("GeminiDB has shutdown!");
} catch (Exception e) {
LOG.info("Internal Bug", e);
if (compactedDataPage.refCnt() == 1) {
compactedDataPage.release();
}
gContext.setDBInternalError(e);
} finally {
gRegionContext.getPageStoreStats().addRunningMinorCompactedPages(-1);
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
? 0
: compactedDataPage.getSize()));
}
}
});
}
}
private void doMajorCompaction(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curPageIndex,
int curChainIndex,
long version) {
if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
return;
}
// get dataPage list to do compaction.
List dataPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
long oldRequestCount = 0;
int oldCompactedPageSize = 0;
int oldCompactedSubPageNum = 0;
int oldCompactedSubPageSize = 0;
int cix = curChainIndex;
//major compaction would not include the latest page.
while (cix >= 0 && gContext.isDBNormal()) {
PageAddress pageAddress = logicalPageChain.getPageAddress(cix);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
//FetchPolicy should be thread safe. because we want compaction also use LRU.
//NOTICE: compositePageAddress's mainDataPage is not in memory, we can be sure that all of subDataPage are not in memory.
// and we should prefetch them.
GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicalPageChain,
pageIndexContext.getPageIndexID(),
cix,
this.gRegionContext,
false,
false);
dataPage = boxDataPage(pageAddress, gByteBuffer, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
}
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
oldCompactedPageSize += pageAddress.getDataLen();
oldCompactedSubPageNum += pageAddress.getSubPageNum();
oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
dataPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
cix--;
}
if (!gContext.isDBNormal()) {
dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
}
if (dataPageListReversedOrder.isEmpty()) {
throw new GeminiRuntimeException("BUG");
}
gRegionContext.getPageStoreStats().addMajorCompactedPages(dataPageListReversedOrder.size());
final DataPage compactedDataPage = doCompactPage(
pageIndexContext,
true,
dataPageListReversedOrder,
gContext.getCurVersion(),
curPageIndex);
dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
final long finalOldRequestCount = oldRequestCount;
final int finalOldCompactedPageSize = oldCompactedPageSize;
final int finalOldCompactedSubPageNum = oldCompactedSubPageNum;
final int finalOldCompactedSubPageSize = oldCompactedSubPageSize;
if (!gContext.isDBNormal() || this.getExecutor().isShuttingDown()) {
compactedDataPage.release();
} else {
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
? 0
: compactedDataPage.getSize());
this.getExecutor().execute(new GeminiEventExecutorTask() {
@Override
public void cancel() {
compactedDataPage.release();
}
@Override
public void run() {
try {
pageCompactHandler.doSyncReplace(logicalPageChain,
curPageIndex,
finalOldCompactedPageSize,
finalOldCompactedSubPageNum,
finalOldCompactedSubPageSize,
finalOldRequestCount,
0,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
} catch (GeminiShutDownException e) {
if (compactedDataPage.refCnt() == 1) {
compactedDataPage.release();
}
LOG.warn("GeminiDB has shutdown!");
} catch (Exception e) {
LOG.error("Internal Bug.", e);
if (compactedDataPage.refCnt() == 1) {
compactedDataPage.release();
}
gContext.setDBInternalError(e);
} finally {
gRegionContext.getPageStoreStats().addRunningMajorCompactedPages(-1);
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
? 0
: compactedDataPage.getSize()));
}
}
});
}
}
private void doMinorCompactionByRead(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
int curPageIndex,
int curChainIndex,
Map fetchedDataPageMap) {
if (logicalPageChain != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
return;
}
// get dataPage list to do compaction.
List canCompactPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
int oldCompactedPageSize = 0;
int oldCompactedSubPageNum = 0;
int oldCompactedSubPageSize = 0;
long oldRequestCount = 0;
int startCompactionIndex = curChainIndex;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicalPageChain.getPageAddress(startCompactionIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null || dataPage.refCnt() <= 1) {
dataPage = fetchedDataPageMap.get(startCompactionIndex);
if (dataPage == null || dataPage.refCnt() <= 0) {
break;
}
dataPage.retain();
}
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
oldCompactedPageSize += pageAddress.getDataLen();
oldCompactedSubPageNum += pageAddress.getSubPageNum();
oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
canCompactPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequestCount += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
startCompactionIndex--;
}
if (!gContext.isDBNormal()) {
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
}
if (canCompactPageListReversedOrder.size() < 2) {
logicalPageChain.compareAndSetStatus(PageStatus.Compacting, PageStatus.Normal);
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
return;
}
final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;
gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());
DataPage compactedDataPage = doCompactPage(
pageIndexContext,
inclusiveCompactionStartChainIndex == 0,
canCompactPageListReversedOrder,
gContext.getCurVersion(),
curPageIndex);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.release());
final long finalOldRequstCount = oldRequestCount;
final int finalOldCompactedPageSize = oldCompactedPageSize;
final int finalOldCompactedSubPageNum = oldCompactedSubPageNum;
final int finalOldCompactedSubPageSize = oldCompactedSubPageSize;
Set dataPages = logicalPageChain.getAllDataPageReferenced();
if (gContext.isDBNormal() && !this.getExecutor().isShuttingDown()) {
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(compactedDataPage == null
? 0
: compactedDataPage.getSize());
this.getExecutor().execute(new GeminiEventExecutorTask() {
@Override
public void cancel() {
dataPages.forEach(dataPage -> dataPage.release());
compactedDataPage.release();
}
@Override
public void run() {
try {
pageCompactHandler.doSyncReplace(logicalPageChain,
curPageIndex,
finalOldCompactedPageSize,
finalOldCompactedSubPageNum,
finalOldCompactedSubPageSize,
finalOldRequstCount,
inclusiveCompactionStartChainIndex,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
} catch (Exception e) {
LOG.error("Internal Bug", e);
gContext.setDBInternalError(e);
} finally {
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
gRegionContext.getPageStoreStats().addRunningCompactingPageSize(-(compactedDataPage == null
? 0
: compactedDataPage.getSize()));
dataPages.forEach(dataPage -> dataPage.release());
}
}
});
} else {
dataPages.forEach(dataPage -> dataPage.release());
}
}
@Override
public void splitPage(PageIndexContext pageIndexContext) {
LogicalPageChain currentLogicPage = pageIndexContext.getLogicalPageChain();
PageIndexContextHashImpl uPageIndexContext = (PageIndexContextHashImpl) pageIndexContext;
int curBucketNum = uPageIndexContext.getCurBucketNum();
int curIndex = uPageIndexContext.getCurIndex();
//fix fast split bug: GRegionKMapTest#testSimplePutGetRemove
//for example, when curBucket is 4, and page index is 1.
//then buckets expand from 4 to 8. and page 1 have not been splited.
//then buckets expand from 8 to 16.
//page 1 should first split 1 to 1 and 5 with 8 buckets.
//And then split 1 to 1 and 9 with 16, and split 5 to 5 and 13 with 16 buckets. it means 1 in 4 buckets,fianlly get 1/5/9/13 pages in 16 buckets
//but if we get page 1 in the bucket 8 (some key just fall into the page 1), and will split 1 to 1 and 9 directly, so it's wrong.
//in a short, split page only can be split step by step. we can't use uPageIndexContext to decide the split step.
curBucketNum = pageIndex.getBucketNumASPageFinishSplit(curBucketNum, curIndex);
int destIndex = curBucketNum + curIndex;
if (pageIndex.getLogicPage(destIndex) != WAIT_SPLITTING_PAGE || pageIndex.getLogicPage(curIndex) != currentLogicPage) {
//Splitting has done.
return;
}
// get dataPage list to do compaction.
List dataPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
long oldRequestNum = 0;
int oldCompactedPageSize = 0;
int oldCompactedSubPageNum = 0;
int oldCompactedSubPageSize = 0;
int oldMemPageSize = 0;
int cix = currentLogicPage.getCurrentPageChainIndex();
while (cix >= 0 && gContext.isDBNormal()) {
PageAddress pageAddress = currentLogicPage.getPageAddress(cix);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
currentLogicPage,
pageIndexContext.getPageIndexID(),
cix,
this.gRegionContext,
false,
false);
dataPage = boxDataPage(pageAddress, gByteBuffer, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
}
oldMemPageSize += pageAddress.getMemorySize();
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
oldCompactedPageSize += pageAddress.getDataLen();
oldCompactedSubPageNum += pageAddress.getSubPageNum();
oldCompactedSubPageSize += pageAddress.getSubPageDataLen();
dataPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequestNum += dataPage.getRequestCount(cacheManager.getCurrentTickTime());
cix--;
}
if (!gContext.isDBNormal()) {
dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
throw new GeminiShutDownException("DB is in abnormal status " + gContext.getDBStatus().name());
}
if (dataPageListReversedOrder.isEmpty()) {
return;
}
DataPage mergeDataPage = doCompactPage(
pageIndexContext,
true,
dataPageListReversedOrder,
gContext.getCurVersion(),
pageIndexContext.getPageIndexID());
//dec reference, We can do it during the doCompactPage.
dataPageListReversedOrder.forEach(dataPage -> dataPage.release());
Tuple2 splitDataPages = mergeDataPage == null
? new Tuple2<>(null, null)
: mergeDataPage.split(
pageIndexContext,
curBucketNum,
curIndex,
gContext.getSupervisor().getAllocator(),
gContext.getInPageGCompressAlgorithm(),
this.gRegionContext);
if (mergeDataPage != null) {
//this will reclaim mergeDataPage's memory.
mergeDataPage.release();
}
if (splitDataPages.f1 == null && splitDataPages.f0 != null) {
//just as doing a compaction
doSyncReplaceLogicalPage(currentLogicPage,
curIndex,
oldCompactedPageSize,
oldCompactedSubPageNum,
oldCompactedSubPageSize,
oldRequestNum,
0,
currentLogicPage.getCurrentPageChainIndex(),
splitDataPages.f0,
invalidPageAddressList,
false,
destIndex);
pageIndex.updateLogicPage(destIndex, NO_PAGE);
return;
} else if (splitDataPages.f0 == null && splitDataPages.f1 != null) {
//just as doing a compaction and move page
doSyncReplaceLogicalPage(currentLogicPage,
destIndex,
oldCompactedPageSize,
oldCompactedSubPageNum,
oldCompactedSubPageSize,
oldRequestNum,
0,
currentLogicPage.getCurrentPageChainIndex(),
splitDataPages.f1,
invalidPageAddressList,
true,
curIndex);
pageIndex.updateLogicPage(curIndex, NO_PAGE);
return;
} else if (splitDataPages.f0 == null && splitDataPages.f1 == null) {
pageIndex.updateLogicPage(destIndex, NO_PAGE);
pageIndex.updateLogicPage(curIndex, NO_PAGE);
//first set page invalid.
gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
removeInvalidPage(gRegion, invalidPageAddressList);
//even no evicting this time, still try prepare flush.
this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, 0);
//here it set NO_PAGE, so all of related statistics only need to directly reduce.
gRegionContext.getPageStoreStats().addLogicPageCount(-1);
gRegionContext.getPageStoreStats().addLogicPageChainLen(0 - currentLogicPage.getCurrentPageChainIndex() - 1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(0 - currentLogicPage.getPageChainCapacity());
gRegionContext.getPageStoreStats().addLogicPageSize(0 - currentLogicPage.getPageSize());
gRegionContext.getPageStoreStats().addLogicSubPageCount(0 - currentLogicPage.getSubPageNum());
gRegionContext.getPageStoreStats().addLogicSubPageSize(0 - currentLogicPage.getSubPageSize());
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, 0 - oldMemPageSize);
return;
}
LogicalPageChain pageSpit1 = pageIndex.createLogicalPageChain();
LogicalPageChain pageSpit2 = pageIndex.createLogicalPageChain();
PageAddress pageAddressSplit1 = pageSpit1.createPage(splitDataPages.f0);
PageAddress pageAddressSplit2 = pageSpit2.createPage(splitDataPages.f1);
pageAddressSplit1.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
(int) ((oldRequestNum / 2) & 0X7FFFFFFF));
pageAddressSplit2.addRequestCountForNewPage(cacheManager.getCurrentTickTime(),
(int) ((oldRequestNum - oldRequestNum / 2) & 0X7FFFFFFF));
pageSpit1.addPageSize(pageAddressSplit1.getDataLen());
pageSpit2.addPageSize(pageAddressSplit2.getDataLen());
//at first set expanded page.
pageIndex.updateLogicPage(destIndex, pageSpit2);
pageIndex.updateLogicPage(curIndex, pageSpit1);
List findRealNeedDiscardPage = findNeededDiscardPage(invalidPageAddressList,
pageAddressSplit1,
pageAddressSplit2);
//first set page invalid.
gContext.getSupervisor().discardPage(gRegionContext, findRealNeedDiscardPage);
//TODO there will waste some prepared flush page which is reused. but it's rare case.
removeInvalidPage(gRegion, invalidPageAddressList);
//write to dfs or local disk.
gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddressSplit1,
pageAddressSplit1.getMemorySize());
//write to dfs or local disk.
gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddressSplit2,
pageAddressSplit2.getMemorySize());
this.cacheManager.getEvictPolicy().addPage(gRegion, pageAddressSplit1);
this.cacheManager.getEvictPolicy().addPage(gRegion, pageAddressSplit2);
this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion,
pageAddressSplit1.getMemorySize() + pageAddressSplit2.getMemorySize() - oldMemPageSize);
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion,
pageAddressSplit1.getMemorySize() + pageAddressSplit2.getMemorySize() - oldMemPageSize);
gRegionContext.getPageStoreStats().addLogicPageCount(1);
gRegionContext.getPageStoreStats().addLogicPageChainLen(2 - currentLogicPage.getCurrentPageChainIndex() - 1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(pageSpit1.getPageChainCapacity() + pageSpit2.getPageChainCapacity() - currentLogicPage.getPageChainCapacity());
gRegionContext.getPageStoreStats().addLogicPageSize(pageSpit2.getPageSize() + pageSpit1.getPageSize() - currentLogicPage.getPageSize());
gRegionContext.getPageStoreStats().addLogicSubPageCount(pageSpit2.getSubPageNum() + pageSpit1.getSubPageNum() - currentLogicPage.getSubPageNum());
gRegionContext.getPageStoreStats().addLogicSubPageSize(pageSpit2.getSubPageSize() + pageSpit1.getSubPageSize() - currentLogicPage.getSubPageSize());
}
private List findNeededDiscardPage(
List invalidPageAddressList, PageAddress... newPageAddress) {
if (newPageAddress == null || newPageAddress.length == 0) {
return invalidPageAddressList;
} else if (newPageAddress.length == 1 && newPageAddress[0] instanceof PageAddressSingleImpl) {
return invalidPageAddressList;
}
Map newSubPageMap = new HashMap<>();
for (PageAddress pageAddress : newPageAddress) {
if (pageAddress instanceof PageAddressCompositeImpl) {
//only include subPage, because only subPage can be reused.
PageAddress[] subPages = ((PageAddressCompositeImpl) pageAddress).getSubPageAddress();
for (PageAddress pageAddressSub : subPages) {
newSubPageMap.put((PageAddressSingleImpl) pageAddressSub, pageAddress);
}
}
}
if (newSubPageMap.size() == 0) {
return invalidPageAddressList;
}
List realNeedInvalidPageList = new ArrayList<>();
for (PageAddress invalidPageAddress : invalidPageAddressList) {
if (invalidPageAddress instanceof PageAddressCompositeImpl) {
PageAddressCompositeImpl invalidPageComposite = (PageAddressCompositeImpl) invalidPageAddress;
//always add main page.
realNeedInvalidPageList.add(invalidPageComposite.getMainPageAddress());
for (PageAddress singlePage : invalidPageComposite.getSubPageAddress()) {
if (!newSubPageMap.containsKey(singlePage)) {
realNeedInvalidPageList.add(singlePage);
}
}
} else {
realNeedInvalidPageList.add(invalidPageAddress);
}
}
return realNeedInvalidPageList;
}
@Override
public void mergePage(PageIndexContext pageIndexContextFirst, PageIndexContext pageIndexContextSecond) {
PageIndexContextHashImpl uPageIndexContextFirst = (PageIndexContextHashImpl) pageIndexContextFirst;
int curIndexFirst = uPageIndexContextFirst.getCurIndex();
if (pageIndexContextFirst != pageIndex.getLogicPage(curIndexFirst)) {
//merge has done.
return;
}
//TODO IN THE FUTURE when shrink index
}
private void doWriteDataToPage(
PageIndexContext pageIndexContext, List>> dataSet, long version) {
//write data to delta page. mechanism can guarantee dataset have same version.
LogicalPageChain currentLogicPageID = pageIndexContext.getLogicalPageChain();
long newRequestCount = getRequestCount(dataSet);
//add new delta page.
DataPage newDataPage = createDataPage(version, dataSet, pageIndexContext.getPageIndexID());
if (newDataPage == null) {
LOG.warn("doWriteDataToPage write empty value");
} else {
PageAddress pageAddress = helpAddDataPage(currentLogicPageID, newRequestCount, newDataPage);
//NOTICE: need to use PageAddress len, so can get total size if it's a compositePageAddress.
int dataSize = pageAddress.getDataLen();
int memSize = dataSize;
//because new page is easy to be compacted, and then this page will be invalid. so we don't want to persist this page.
if (dataSize > MIN_NEW_PAGE_SIZE_TO_PERSIST) {
gContext.getSupervisor().getPersistencyStrategy().persistPage(gRegion, pageAddress, dataSize);
memSize = pageAddress.getMemorySize();
}
currentLogicPageID.addPageSize(dataSize);
this.cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, memSize);
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, memSize);
gRegionContext.getPageStoreStats().addLogicPageSize(dataSize);
gRegionContext.getPageStoreStats().addLogicSubPageCount(pageAddress.getSubPageNum());
gRegionContext.getPageStoreStats().addLogicSubPageSize(pageAddress.getSubPageDataLen());
gRegionContext.getPageStoreStats().addPageRequestCount(newRequestCount);
gRegionContext.getPageStoreStats().addPage();
}
//try to compact page
if (!pageIndexContext.isNeedSplit()) {
compactPage(pageIndexContext, version);
}
}
private PageAddress helpAddDataPage(LogicalPageChain currentLogicPageID, long newRequestCount, DataPage dataPage) {
int oldChainCapacity = currentLogicPageID.getPageChainCapacity();
PageAddress result = currentLogicPageID.createPage(dataPage);
int changeCapacity = currentLogicPageID.getPageChainCapacity() - oldChainCapacity;
result.addRequestCountForNewPage(cacheManager.getCurrentTickTime(), (int) (newRequestCount & 0X7FFFFFFF));
gRegionContext.getPageStoreStats().addLogicPageChainLen(1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(changeCapacity);
return result;
}
@Override
public void checkResource() {
if (cacheManager.forbidIndexExpand()) {
LOG.debug("cacheManager forbid index to expand.");
return;
}
if (gRegionContext.getPageStoreStats().getLogicPageCount() == 0) {
LOG.debug("no page here");
return;
}
int indexCap = gRegionContext.getPageStoreStats().getIndexCapacity();
if ((indexCap - gRegionContext.getPageStoreStats().getLogicPageCount()) * 4 > indexCap) {
LOG.debug("page count {}, so at least 25% index capacity {} not to expand index",
gRegionContext.getPageStoreStats().getLogicPageCount(),
gRegionContext.getPageStoreStats().getIndexCapacity());
return;
}
//for skew Page
long validPageSize = gRegionContext.getPageStoreStats().getLogicPageSize() - gRegionContext.getHugePageTotalSize() - gRegionContext.getPageStoreStats().getLogicSubPageSize();
int validPageNum = gRegionContext.getPageStoreStats().getLogicPageCount() - gRegionContext.getHugePageMapCount();
int averagePageSize;
if (validPageSize <= 0 || validPageNum <= 0 || gRegionContext.getHugePageMapCount() * 2 >= gRegionContext.getPageStoreStats().getLogicPageCount()) {
//too much Huge Page
averagePageSize = (int) ((gRegionContext.getPageStoreStats().getLogicPageSize() - gRegionContext.getPageStoreStats().getLogicSubPageSize()) / gRegionContext.getPageStoreStats().getLogicPageCount());
} else {
averagePageSize = (int) (validPageSize / validPageNum);
}
if (averagePageSize >= splitPageSizeThreshold) {
pageIndex.expand();
LOG.info("averagePageSize {}, splitPageSizeThreshold {}, logicPageSize {}, hugePageTotalSize {}, logicSubPageSize {}, logicPageCount {}, hugePageMapCount {}, to expand index up to {}",
averagePageSize,
splitPageSizeThreshold,
gRegionContext.getPageStoreStats().getLogicPageSize(),
gRegionContext.getHugePageTotalSize(),
gRegionContext.getPageStoreStats().getLogicSubPageSize(),
gRegionContext.getPageStoreStats().getLogicPageCount(),
gRegionContext.getHugePageMapCount(),
gRegionContext.getPageStoreStats().getIndexCapacity());
}
}
protected void tryLaunchCompactionByRead(
PageIndexContext pageIndexContext,
LogicalPageChain logicalPageChain,
Map fetchedDataPageMap) {
boolean releaseFetchMap = true;
try {
if (logicalPageChain.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
if (logicalPageChain.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(1);
if (cacheManager.getCacheStats().getRunningMinorCompactionByRead() > maxRunningMinorCompaction) {
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
} else {
if (!logicalPageChain.compareAndSetStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRunningMinorCompactionByRead(-1);
return;
}
final int curChainIndex = logicalPageChain.getCurrentPageChainIndex();
final int curPageIndex = pageIndexContext.getPageIndexID();
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
releaseFetchMap = false;
eventExecutor.execute(new GeminiEventExecutorTask() {
@Override
public void cancel() {
fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
}
@Override
public void run() {
try {
pageCompactHandler.doAsyncMinorCompactionByRead(pageIndexContext,
logicalPageChain,
curPageIndex,
curChainIndex,
fetchedDataPageMap);
} catch (GeminiShutDownException e) {
LOG.debug("GeminiDB has shutdown!", e);
} catch (Exception e) {
LOG.error("async minor compaction by read failed", e);
} finally {
fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
}
}
});
}
}
} else if (fetchedDataPageMap.size() > 0) {
if (!enableLoadPageFromLRUIntoMainCache) {
return;
}
if (lruIntoMainCacheSleepMs != -1 && System.currentTimeMillis() - lastLruIntoMainCacheTimeMs < lruIntoMainCacheSleepMs) {
return;
}
lastLruIntoMainCacheTimeMs = System.currentTimeMillis();
if (cacheTooFull(0)) {
LOG.warn("Can not add page into main cache because of cache is full.");
return;
}
lruIntoMainEventExecutor.execute(() -> fetchPageFromLRUCacheToPageStore());
}
} finally {
if (releaseFetchMap) {
fetchedDataPageMap.values().forEach(dataPage -> dataPage.release());
}
}
}
protected DataPage doCompactPageForStructureValue(
PageIndexContext pageIndexContext,
boolean isMajor,
List canCompactPageListReversedOrder,
long version,
int logicPageId) {
List> compactionListReversedOrder = new ArrayList<>();
for (DataPage dataPage : canCompactPageListReversedOrder) {
compactionListReversedOrder.add(dataPage.getGBinaryHashMap());
}
int index = compactionListReversedOrder.size() - 1;
//Value list is right order.
Map> newMap = new HashMap<>(compactionListReversedOrder.get(index).keyCount());
long compactionCount = 0;
StateFilter stateFilter = gRegionContext.getGContext().getStateFilter();
while (index >= 0) {
GBinaryHashMap gBinaryHashMap = compactionListReversedOrder.get(index);
for (Map.Entry entry : gBinaryHashMap.getBinaryMap().entrySet()) {
// NOTE: we must filter here for list page store because list will not be filtered in doCompactValue
if (isMajor && stateFilter != null && stateFilter.filter(gRegionContext, entry.getValue().getSeqID())) {
continue;
}
if (entry.getValue().getGValueType() == GValueType.Delete) {
if (isMajor) {
newMap.remove(entry.getKey());
} else {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
}
} else if (entry.getValue().getGValueType() == GValueType.PutMap || entry.getValue().getGValueType() == GValueType.PutList) {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
} else {
if (newMap.containsKey(entry.getKey())) {
newMap.get(entry.getKey()).add(entry.getValue());
} else {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
}
}
}
compactionCount += compactionListReversedOrder.get(index).getCompactionCount();
index--;
}
Map finalCompactedMap = new HashMap<>(newMap.size());
//TODO the pageStore that is not KMap needn't construct the Object "GBufferAddressMapping"
GBufferAddressMapping pageMapping = new GBufferAddressMapping(this.gRegionContext, pageIndexContext.getPageIndexID(), pageIndexContext.getLogicalPageChain().hashCode());
//compaction value
for (Map.Entry> entry : newMap.entrySet()) {
if (entry.getValue().size() == 0) {
GeminiRuntimeException e = new GeminiRuntimeException("Internal Bug!");
//Internal Bug should stop job.
gContext.setDBInternalError(e);
throw e;
}
BinaryValue compactedBinaryValue;
if (entry.getValue().size() == 1 && !isMajor && !isAllowSubPage()) {
//if it's major compaction, even only one binary value, we need to do compact to remove deleted record.
compactedBinaryValue = entry.getValue().get(0);
} else {
compactedBinaryValue = doCompactValue(entry.getValue(), isMajor, version, logicPageId, pageMapping);
}
finalCompactedMap.put(entry.getKey(), compactedBinaryValue);
}
//TODO null should be handled by PageStore
return doBuildDataPageFromGBinaryMap(isMajor,
version,
logicPageId,
this.pageSerdeFlink.getKeySerde(),
finalCompactedMap,
compactionCount,
pageMapping);
}
protected boolean isAllowSubPage() {
return false;
}
protected DataPage getDataPageAutoLoadIfNeed(
K key,
LogicalPageChain logicalPageChain,
int logicalPageChainIndex,
int curIndex,
Map fetchedDataPageMap) {
PageAddress pageAddress = logicalPageChain.getPageAddress(curIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
cacheManager.getCacheStats().addPageCacheMissCount();
if (!gContext.getSupervisor().getBloomFilterManager().mightContain(pageAddress, key.hashCode())) {
cacheManager.getCacheStats().addBloomFilterHitCount();
return null;
}
GByteBuffer gByteBuffer = gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicalPageChain,
logicalPageChainIndex,
curIndex,
gRegionContext,
gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
true);
dataPage = boxDataPage(pageAddress, gByteBuffer, logicalPageChainIndex, logicalPageChain.hashCode());
//todo to reuse dataPage need send to handler.
fetchedDataPageMap.put(curIndex, dataPage);
dataPage.retain();
} else {
cacheManager.getCacheStats().addPageCacheHitCount();
}
return dataPage;
}
@Override
public void allKeysIncludeDeleted(Set allKeysIncludeDelete) {
// as we know, removeAll will happen after getAll in mini batch(KeyedBundleOperator), so
// there is no need to update read cache and trigger compaction
LogicalPageChain[] chains = pageIndex.getPageIndex();
for (int logicalPageChainIndex = 0; logicalPageChainIndex < chains.length; ++logicalPageChainIndex) {
LogicalPageChain logicalPageChain = chains[logicalPageChainIndex];
if (isNullPage(logicalPageChain)) {
continue;
}
int numPages = logicalPageChain.getCurrentPageChainIndex();
for (int i = numPages; i >= 0; i--) {
PageAddress pageAddress = logicalPageChain.getPageAddress(i);
DataPage dataPage = pageAddress.getDataPage();
try {
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageCacheMissCount();
GByteBuffer gByteBuffer = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicalPageChain,
logicalPageChainIndex,
i,
this.gRegionContext,
this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
false);
dataPage = boxDataPage(pageAddress, gByteBuffer, logicalPageChainIndex, logicalPageChain.hashCode());
} else {
this.cacheManager.getCacheStats().addPageCacheHitCount();
}
allKeysIncludeDelete.addAll(dataPage.getPOJOSet());
} finally {
if (dataPage != null) {
dataPage.release();
}
}
}
}
}
protected void checkDataPageTypeToBox(GByteBuffer byteBuffer) {
Preconditions.checkNotNull(byteBuffer.getByteBuffer(), "Not supported to box null byte buffer.");
DataPage.DataPageType toBoxDataPageType = DataPage.DataPageType.valueOf(byteBuffer.getByteBuffer().get(0));
Preconditions.checkArgument(toBoxDataPageType == dataPageType, "Internal Bug!");
}
/**
* a different value such as map/list/value will calc differently.
*
* @param dataSet Objects organized to List.
* @return total request count for this list.
*/
abstract long getRequestCount(List>> dataSet);
/**
* Create data page with given version, dataset, logic page id.
*
* @param version DataPage's version.
* @param dataSet Objects organized to List will be written to this DataPage.
* @param logicPageId DataPage's index id.
* @return certain DataPage, such as DataPageKVImpl, DataPageKMapImpl or DataPageKListImpl
*/
abstract DataPage createDataPage(long version, List>> dataSet, int logicPageId);
/**
* Box the fetched byte buffer into a data page with information provided by page address.
*
* @param pageAddress The page address to box.
* @param byteBuffer The fetched byte buffer.
* @param logicPageChainIndex The index of the {@link LogicalPageChain} who contains the given {@link PageAddress}.
* @param logicPageChainHashCode The hashcode of the {@link sun.rmi.runtime.Log} who contains the given {@link PageAddress}.
* @return The encapsulated data page with given byte buffer.
*/
abstract DataPage boxDataPage(PageAddress pageAddress, GByteBuffer byteBuffer, int logicPageChainIndex, int logicPageChainHashCode);
/**
* @param isMajor compaction is major or minor.
* @param canCompactPageListReversedOrder DataPages will be compacted, and this list is reversed order.
* @param version current version for new page.
* @param logicPageId DataPage's index id.
* @return a new comacted DataPage.
*/
@VisibleForTesting
public abstract DataPage doCompactPage(
PageIndexContext pageIndexContext, boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId);
/**
* invoked by doCompactPageForStructureValue, when value is structural such as Map/List/Set.
*
* @param binaryValueList value list to be compacted.
* @param isMajor compaction is major or minor.
* @param version current version.
* @param logicPageId DataPage's index id.
* @return a compacted version.
*/
abstract BinaryValue doCompactValue(
List binaryValueList,
boolean isMajor,
long version,
int logicPageId,
GBufferAddressMapping pageMapping);
protected abstract DataPage doBuildDataPageFromGBinaryMap(
boolean isMajor,
long version,
int logicPageId,
TypeSerializer keySerde,
Map finalCompactedMap,
long compactionCount,
GBufferAddressMapping pageMapping);
protected boolean isNullPage(LogicalPageChain logicPageID) {
return logicPageID == null || logicPageID.getCurrentPageChainIndex() == -1;
}
public void fetchPageFromLRUCacheToPageStore() {
try {
// the returned data page will always be fetched
Tuple2 hottestPage = getHottestDataPageFromLRU();
if (hottestPage != null) {
cacheManager.getCacheStats().addLRUPagePreIntoMainCache();
final PageContext hottestPageContext = hottestPage.f1.getPageContext();
if (hottestPageContext == null) {
hottestPage = null;
return;
}
int chainIndex = hottestPageContext.getLogicPageIndex();
final LogicalPageChain pageChain = pageIndex.getLogicPage(chainIndex);
if (!canSubmitHottestPageToRegionExecutor(chainIndex, hottestPage, pageChain)) {
hottestPage = null;
return;
}
final PageAddress hottestPageAddress = hottestPage.f0;
GByteBuffer buffer = hottestPage.f1.getFutureDataPage().get();
buffer.retain();
hottestPage = null;
hottestPageContext.setCacheStatus(PageContext.CacheStatus.CACHING_TO_MAIN);
// try to fill data from lru cache to page store
eventExecutor.submit(() -> {
try {
// we need to get the new pageChain again, because there may have some split/compaction
// between submit to the executor and execute the current task.
LogicalPageChain currentPageChain = pageIndex.getLogicPage(chainIndex);
if (currentPageChain == null) {
hottestPageContext.setCacheStatus(PageContext.CacheStatus.IN_LRU);
return;
}
if (!canAddHottestPageToPageStore(chainIndex, hottestPageContext, currentPageChain)) {
hottestPageContext.setCacheStatus(PageContext.CacheStatus.IN_LRU);
return;
}
tryLoadPageIntoPageAddress(hottestPageAddress, buffer, chainIndex, currentPageChain);
// remove the hottest page from LRU, whether it is invalid or added into main cache.
gRegionContext.getGContext().getSupervisor().getFetchPolicy().getDataPageLRU().remove(
hottestPageAddress);
} catch (Exception e) {
gContext.getNoCriticalEvent().pushEvent(e, System.currentTimeMillis());
} finally {
buffer.release();
}
});
}
} catch (Exception e) {
gContext.getNoCriticalEvent().pushEvent(e, System.currentTimeMillis());
}
}
protected boolean cacheTooFull(int tryAddNewPageSize) {
// cache memory exceeds high watermark and no ready pages to evict.
return gRegionContext.getPageStoreStats().getPageUsedMemory() + tryAddNewPageSize > curRegionMemHighMark &&
gRegionContext.getGContext().getSupervisor().getCacheManager().getEvictPolicy().getEvictHandlerSepImpl(
gRegion).getReadyToEvictDataPageMap().isEmpty();
}
public boolean tryLoadPageIntoPageAddress(
PageAddress hottestPageAddress,
GByteBuffer buffer,
int pageChainIndex,
LogicalPageChain logicalPageChain) {
for (int i = 0; i <= logicalPageChain.getCurrentPageChainIndex(); ++i) {
Iterator pageIter = logicalPageChain.getPageAddress(i).pageIteratorOrdered();
int idx = -1;
while (pageIter.hasNext()) {
PageAddress pageAddress = pageIter.next();
checkState(pageAddress instanceof PageAddressSingleImpl);
if (hottestPageAddress.equals(pageAddress)) {
if (!pageAddress.hasDataPage()) {
// here we use out page address for Composite page address, because we need to construct the mapping.
DataPage newDataPage = createDataPageFromGByteBuffer(idx, idx == -1 ? logicalPageChain.getPageAddress(i) : pageAddress, buffer, pageChainIndex, logicalPageChain.hashCode());
// here we add reference for the underlying GByteBuffer to align the behavior of GByteBuffer,
// in the constructor of all GByteBuffer subclasses, we'll retain the it self.
newDataPage.retain();
pageAddress.setDataPage(newDataPage);
cacheManager.getEvictPolicy().tryPrepareFlush(gRegion, newDataPage.getSize());
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, newDataPage.getSize());
cacheManager.getCacheStats().addLRUPageIntoMainCache();
}
return true;
}
idx++;
}
}
return false;
}
@SuppressWarnings("unchecked")
private DataPage createDataPageFromGByteBuffer(int subPageIndex, PageAddress pageAddress, GByteBuffer byteBuffer, int logicPageChainIndex, int logicPageChainHashCode) {
if (subPageIndex < 0) {
return boxDataPage(pageAddress, byteBuffer, logicPageChainIndex, logicPageChainHashCode);
} else {
//TODO: Currently, we create DataPage here directly for all subpages,
// because sub pages(is DataPageType.KV) and composite page(DataPageType.KHashMap or DataPageType.KSortedMap) will have different PageType
checkState(getDataPageType().isKMapType(), "currently split type only support map type.");
PageSerdeFlink2Key pageSerdeFlink2Key = (PageSerdeFlink2Key) pageSerdeFlink;
return new DataPageHashSubPageImpl(new GBinaryHashMap(byteBuffer, pageSerdeFlink2Key.getKey2Serde()));
}
}
protected boolean canSubmitHottestPageToRegionExecutor(
int chainIndex,
Tuple2 hottestPage,
LogicalPageChain pageChain) {
// invalid page context
if (hottestPage.f1.getPageContext() == null) {
return false;
}
if (pageChain == null) {
return false;
}
if (invalidRegion(gRegionContext.getRegionId(), hottestPage.f1.getPageContext().getGRegionID())) {
return false;
}
if (isPageChainChanged(chainIndex, hottestPage.f1.getPageContext(), pageChain)) {
return false;
}
if (!enableAddIntoMainWhenSplitting && pageChainInSplitting(chainIndex)) {
return false;
}
if (pageChainInCompacting(pageChain)) {
return false;
}
if (cacheTooFull(hottestPage.f1.getFutureDataPage().getSize())) {
LOG.warn("Can not add page into main cache because of cache is full.");
return false;
}
if (hottestPage.f0.hasDataPage()) {
return false;
}
if (!pageInTheChain(hottestPage.f0, pageChain)) {
// delete the hottest page from LRU
gRegionContext.getGContext().getSupervisor().getFetchPolicy().getDataPageLRU().remove(hottestPage.f0);
return false;
}
return true;
}
protected boolean pageChainInCompacting(LogicalPageChain pageChain) {
// TODO: #Cache How to tell whether a LogicPageChain is in Mergeing...
return pageChain.getPageStatus().equals(PageStatus.Compacting);
}
protected boolean pageChainInSplitting(int chainIndex) {
int halfCapacity = pageIndex.getIndexCapacity() >> 1;
if (chainIndex >= halfCapacity) {
return false;
}
LogicalPageChain buddyPageChain = pageIndex.getLogicPage(chainIndex + halfCapacity);
if (buddyPageChain == null) {
return false;
}
return buddyPageChain.getPageStatus().equals(PageStatus.Init);
}
protected boolean invalidRegion(GRegionID expectedRegionID, GRegionID actualRegionID) {
// we just load the data in our region.
return !expectedRegionID.equals(actualRegionID);
}
private boolean canAddHottestPageToPageStore(
int chainIndex,
PageContext hottestPageContext,
LogicalPageChain logicalPageChain) {
return !isPageChainChanged(chainIndex, hottestPageContext, logicalPageChain);
}
protected boolean isPageChainChanged(
int chainIndex,
PageContext hottestPageContext,
LogicalPageChain logicalPageChain) {
if (chainIndex >= pageIndex.getIndexCapacity()) {
LOG.error("Received wrong chainIndex {}, current pageIndex capacity {}, hottest page region {}, current region {}.",
chainIndex,
pageIndex.getIndexCapacity(),
hottestPageContext.getGRegionID(),
gRegionContext.getRegionId());
return true;
}
// hashcode did not equals
return logicalPageChain.hashCode() != hottestPageContext.getLogicPageChainHashCode();
}
private boolean pageInTheChain(PageAddress expectedPageAddress, LogicalPageChain pageChain) {
boolean founded = false;
for (int i = 0; !founded && i < pageChain.getCurrentPageChainIndex(); ++i) {
Iterator iter = pageChain.getPageAddress(i).pageIterator();
while (iter.hasNext()) {
PageAddress pageAddress = iter.next();
checkState(pageAddress instanceof PageAddressSingleImpl);
if (expectedPageAddress.equals(pageAddress)) {
founded = true;
break;
}
}
}
return founded;
}
protected Tuple2 getHottestDataPageFromLRU() {
return gContext.getSupervisor().getFetchPolicy().getDataPageLRU().getHottestPage(
gRegionContext.getRegionId(),
pageIndex);
}
@VisibleForTesting
public CacheManager getCacheManager() {
return cacheManager;
}
}