Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.page;
import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiShutDownException;
import org.apache.flink.runtime.state.gemini.engine.filter.StateFilter;
import org.apache.flink.runtime.state.gemini.engine.handler.PageCompactHandler;
import org.apache.flink.runtime.state.gemini.engine.memstore.GSValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryKey;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.BinaryValue;
import org.apache.flink.runtime.state.gemini.engine.page.bmap.GBinaryHashMap;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.runtime.state.gemini.engine.vm.WaterMark;
import org.apache.flink.shaded.guava18.com.google.common.collect.Lists;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import javax.annotation.Nullable;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.NO_PAGE;
import static org.apache.flink.runtime.state.gemini.engine.page.PageIndexHashImpl.WAIT_SPLITTING;
/**
* AbstractHashPageStore.
*/
public abstract class AbstractHashPageStore implements PageStore {
private static final Logger LOG = LoggerFactory.getLogger(AbstractHashPageStore.class);
//TODO provide HashCodePageIndex interface.
protected final PageIndexHashImpl pageIndex;
protected final GRegionContext gRegionContext;
protected final EventExecutor eventExecutor;
protected final CacheManager cacheManager;
protected final GContext gContext;
protected final SnapshotManager geminiSnapshotManager;
private final int spilledPageSizeThresholdLow;
private final int spilledPageSizeThresholdMiddle;
private final int spilledPageSizeThresholdHigh;
private final int maxCompactionChainThreshold;
protected final GRegion gRegion;
private final PageCompactHandler pageCompactHandler;
private final int inMemoryCompactionThreshold;
private final int maxRunningMajorCompaction;
private final int maxRunningMinorCompaction;
protected final PageSerdeFlink pageSerdeFlink;
public AbstractHashPageStore(
GRegion gRegion, EventExecutor eventExecutor) {
this(gRegion, null, eventExecutor);
}
public AbstractHashPageStore(
GRegion gRegion, @Nullable PageIndex pageIndex, EventExecutor eventExecutor) {
this.gRegion = gRegion;
this.gRegionContext = gRegion.getGRegionContext();
this.eventExecutor = eventExecutor;
GConfiguration configuration = gRegionContext.getGContext().getGConfiguration();
if (pageIndex != null) {
//TODO #SR rewrite this to use interface.
this.pageIndex = (PageIndexHashImpl) pageIndex;
} else {
this.pageIndex = new PageIndexHashImpl<>(configuration, this, gRegionContext.getPageStoreStats());
}
this.gContext = gRegionContext.getGContext();
this.cacheManager = this.gContext.getSupervisor().getCacheManager();
gRegionContext.getPageStoreStats().setPageSizeRate(configuration.getPageSizeRateBetweenPOJOAndHeap());
this.spilledPageSizeThresholdLow = configuration.getSpilledPageSizeThresholdUnderLowMark();
this.spilledPageSizeThresholdMiddle = configuration.getSpilledPageSizeThresholdUnderMiddleMark();
this.spilledPageSizeThresholdHigh = configuration.getSpilledPageSizeThresholdUnderHighMark();
this.geminiSnapshotManager = this.gContext.getSupervisor().getSnapshotManager();
this.maxCompactionChainThreshold = configuration.getMaxCompactionChainThreshold();
this.inMemoryCompactionThreshold = configuration.getInMemoryCompactionThreshold();
this.maxRunningMajorCompaction = configuration.getMaxRunningMajorCompaction();
this.maxRunningMinorCompaction = configuration.getMaxRunningMinorCompaction();
this.pageSerdeFlink = (PageSerdeFlink) gRegionContext.getPageSerdeFlink();
this.pageCompactHandler = new PageCompactHandler() {
@Override
public void doAsyncMajorCompaction(
LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version) {
doMajorCompaction(logicChainedPage, curPageIndex, curChainIndex, version);
}
@Override
public void doAsyncMinorCompaction(
LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version, boolean force) {
doMinorCompaction(logicChainedPage, curPageIndex, curChainIndex, version, force);
}
@Override
public void doSyncReplace(
LogicChainedPage logicChainedPage,
int curPageIndex,
int oldCompatedPageSize,
int oldMemPageSize,
long oldRequstCount,
int inclusiveCompactionStartChainIndex,
int inclusiveCompactionEndChainIndex,
DataPage compactedDataPage,
List invalidPageAddressList,
int relatedIndex) {
doSyncReplaceLogicPage(logicChainedPage,
curPageIndex,
oldCompatedPageSize,
oldMemPageSize,
oldRequstCount,
inclusiveCompactionStartChainIndex,
inclusiveCompactionEndChainIndex,
compactedDataPage,
invalidPageAddressList,
false,
relatedIndex);
}
@Override
public void doAsyncMinorCompactionByRead(
LogicChainedPage logicPageID,
int curPageIndex,
int curChainIndex,
Map fetchedDataPageMap) {
doMinorCompactionByRead(logicPageID, curPageIndex, curChainIndex, fetchedDataPageMap);
}
};
}
@Override
public EventExecutor getExecutor() {
return this.eventExecutor;
}
@Override
public boolean contains(K key) {
//for common kv, null means not contained.
return get(key) != null;
}
@Override
public PageIndex getPageIndex() {
return pageIndex;
}
@Override
public void addPage(PageIndexContext pageIndexContext, List>> dataSet, long version) {
LogicChainedPage currentLogicPageID = pageIndexContext.getPageID();
if (currentLogicPageID == NO_PAGE) {
String msg = "BUG! addOrMergePage receive NO_PAGE request.";
LOG.error(msg);
throw new GeminiRuntimeException(msg);
}
if (dataSet == null || dataSet.isEmpty()) {
compactPage(pageIndexContext, version);
} else {
doWriteDataToPage(pageIndexContext, dataSet, version);
}
}
@Override
public void compactPage(PageIndexContext pageIndexContext, final long version) {
try {
LogicChainedPage logicChainedPage = pageIndexContext.getPageID();
final int curPageIndex = pageIndexContext.getPageIndexID();
if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
return;
}
//no more page to do compaction.
if (logicChainedPage.getCurrentPageChainIndex() <= 0) {
return;
}
if (!logicChainedPage.getPageStatus().canCompaction()) {
return;
}
final int curChainIndex = logicChainedPage.getCurrentPageChainIndex();
final LogicChainedPage compactionLogicChainedPage = logicChainedPage;
//2. if need, launch an asynchronized major compaction handler
if (logicChainedPage.getCurrentPageChainIndex() >= maxCompactionChainThreshold) {
if (logicChainedPage.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(1);
if (cacheManager.getCacheStats().getRuningMajorCompactedPages() > maxRunningMajorCompaction) {
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
// try to minor compaction in force mode
tryLaunchMinorCompaction(version,
logicChainedPage,
curPageIndex,
curChainIndex,
compactionLogicChainedPage,
true);
return;
}
if (!logicChainedPage.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
return;
}
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
eventExecutor.submit(() -> {
try {
pageCompactHandler.doAsyncMajorCompaction(compactionLogicChainedPage,
curPageIndex,
curChainIndex,
version);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
}
});
}
} else if (logicChainedPage.getCurrentPageChainIndex() > inMemoryCompactionThreshold) {
// try to do minor comaction, only involve page which is resident in memory.
tryLaunchMinorCompaction(version,
logicChainedPage,
curPageIndex,
curChainIndex,
compactionLogicChainedPage,
false);
}
} catch (Exception e) {
LOG.error("Bug " + e.getMessage(), e);
throw new GeminiRuntimeException(e);
}
}
private void tryLaunchMinorCompaction(
long version,
LogicChainedPage logicChainedPage,
int curPageIndex,
int curChainIndex,
LogicChainedPage compactionLogicChainedPage,
boolean force) {
if (logicChainedPage.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(1);
if (!force) {
if (cacheManager.getCacheStats().getRuningMinorCompactedPages() > maxRunningMinorCompaction) {
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
return;
}
}
int startCompactionIndex = curChainIndex;
int memCandidatePage = 0;
long lastSumCompactedThreshold = -1;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
//only stat, no need reference.
DataPage dataPage = pageAddress.getDataPageNoReference();
if (dataPage != null) {
if (!force) {
long compactedCount = dataPage.getCompactionCount();
//improvement for minor compaction
if (lastSumCompactedThreshold == -1) {
lastSumCompactedThreshold = compactedCount;
} else if (lastSumCompactedThreshold >= compactedCount) {
lastSumCompactedThreshold += compactedCount;
} else {
break;
}
}
memCandidatePage++;
startCompactionIndex--;
} else {
break;
}
}
if (memCandidatePage <= inMemoryCompactionThreshold) {
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
return;
}
if (!logicChainedPage.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
return;
}
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
eventExecutor.submit(() -> {
try {
pageCompactHandler.doAsyncMinorCompaction(compactionLogicChainedPage,
curPageIndex,
curChainIndex,
version,
force);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
}
});
}
}
private LogicChainedPage doSyncReplaceLogicPage(
LogicChainedPage logicChainedPage,
int curPageIndex,
int oldCompatedPageSize,
int oldMemPageSize,
long oldRequstCount,
int inclusiveCompactionStartChainIndex,
int inclusiveCompactionEndChainIndex,
DataPage compactedDataPage,
List invalidPageAddressList,
boolean isSplit,
int relatedIndex) {
if (isSplit) {
if (pageIndex.getLogicPage(curPageIndex) != WAIT_SPLITTING) {
if (compactedDataPage != null) {
//compactedDataPage is not used, delReference to reclaim mem.
compactedDataPage.delReferenceCount(ReleaseType.Normal);
}
return null;
}
} else {
if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
if (compactedDataPage != null) {
//compactedDataPage is not used, delReference to reclaim mem.
compactedDataPage.delReferenceCount(ReleaseType.Normal);
}
return null;
}
}
int compactedPageSize = 0;
PageAddress compatedPageAddress = null;
LogicChainedPage compactedLogicChainedPage = pageIndex.newLogicChainedPage();
for (int i = 0; i < inclusiveCompactionStartChainIndex; i++) {
compactedLogicChainedPage.insertPage(logicChainedPage.getPageAddress(i));
}
if (compactedDataPage != null) {
//Major compaction maybe get a null page. and it never be null during Minor compaction, because even by TTL
// or removeAll, minor compaction will keep these data.
compatedPageAddress = compactedLogicChainedPage.createPage(oldRequstCount, compactedDataPage);
compactedPageSize = compactedDataPage.getSize();
}
for (int i = inclusiveCompactionEndChainIndex + 1; i <= logicChainedPage.getCurrentPageChainIndex(); i++) {
compactedLogicChainedPage.insertPage(logicChainedPage.getPageAddress(i));
}
compactedLogicChainedPage.addPageSize(logicChainedPage.getPageSize() - oldCompatedPageSize + compactedPageSize);
pageIndex.updateLogicPage(curPageIndex, compactedLogicChainedPage);
this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curPageIndex, relatedIndex, invalidPageAddressList);
if (compactedDataPage != null) {
this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, compatedPageAddress);
}
gRegionContext.getPageStoreStats().addLogicPageSize(compactedLogicChainedPage.getPageSize() - logicChainedPage.getPageSize());
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, compactedPageSize - oldMemPageSize);
gRegionContext.getPageStoreStats().addLogicPageChainLen(compactedLogicChainedPage.getCurrentPageChainIndex() - logicChainedPage.getCurrentPageChainIndex());
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(compactedLogicChainedPage.getPageChainCapacity() - logicChainedPage.getPageChainCapacity());
gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
return compactedLogicChainedPage;
}
public void doMinorCompaction(
LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version, boolean force) {
if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
return;
}
// get dataPage list to do compaction.
List canCompactPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
int oldPageSize = 0;
long oldRequstCount = 0;
int startCompactionIndex = curChainIndex;
long lastSumCompactedThreshold = -1;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage != null) {
if (!force) {
long compactedCount = dataPage.getCompactionCount();
//improvement for minor compaction
if (lastSumCompactedThreshold == -1) {
lastSumCompactedThreshold = compactedCount;
} else if (lastSumCompactedThreshold >= compactedCount) {
lastSumCompactedThreshold += compactedCount;
} else {
dataPage.delReferenceCount(ReleaseType.Normal);
break;
}
}
oldPageSize += dataPage.getSize();
canCompactPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequstCount += pageAddress.getRequestCount();
startCompactionIndex--;
} else {
break;
}
}
if (!gContext.isDBNormal()) {
throw new GeminiShutDownException("DB is in abnormal status.");
}
if (canCompactPageListReversedOrder.size() <= inMemoryCompactionThreshold) {
logicChainedPage.setPageStatus(PageStatus.Compacting, PageStatus.Normal);
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
return;
}
final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;
gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());
DataPage compactedDataPage = doCompactPage(inclusiveCompactionStartChainIndex == 0,
canCompactPageListReversedOrder,
gContext.getCurVersion(),
curPageIndex);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
final long finalOldRequstCount = oldRequstCount;
final int finalOldPageSize = oldPageSize;
this.getExecutor().submit(() -> {
try {
pageCompactHandler.doSyncReplace(logicChainedPage,
curPageIndex,
finalOldPageSize,
finalOldPageSize,
finalOldRequstCount,
inclusiveCompactionStartChainIndex,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
gRegionContext.getPageStoreStats().addRuningMinorCompactedPages(-1);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
}
});
}
public void doMajorCompaction(
LogicChainedPage logicChainedPage, int curPageIndex, int curChainIndex, long version) {
if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
return;
}
// get dataPage list to do compaction.
List dataPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
long oldRequstCount = 0;
int oldCompactedPageSize = 0;
int oldMemPageSize = 0;
int cix = curChainIndex;
//major comaction dones't include the latest page.
while (cix >= 0 && gContext.isDBNormal()) {
PageAddress pageAddress = logicChainedPage.getPageAddress(cix);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
//FetchPolicy should be thread safe. because we want compaction also use LRU.
dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicChainedPage,
cix,
this.gRegionContext,
false,
false);
} else {
oldMemPageSize += dataPage.getSize();
}
oldCompactedPageSize += dataPage.getSize();
dataPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequstCount += pageAddress.getRequestCount();
cix--;
}
if (!gContext.isDBNormal()) {
throw new GeminiShutDownException("DB is in abnormal status.");
}
if (dataPageListReversedOrder.isEmpty()) {
throw new GeminiRuntimeException("BUG");
}
gRegionContext.getPageStoreStats().addMajorCompactedPages(dataPageListReversedOrder.size());
DataPage compactedDataPage = doCompactPage(true,
dataPageListReversedOrder,
gContext.getCurVersion(),
curPageIndex);
dataPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
final long finalOldRequstCount = oldRequstCount;
final int finalOldCompactedPageSize = oldCompactedPageSize;
final int finalOldMemPageSize = oldMemPageSize;
this.getExecutor().submit(() -> {
try {
pageCompactHandler.doSyncReplace(logicChainedPage,
curPageIndex,
finalOldCompactedPageSize,
finalOldMemPageSize,
finalOldRequstCount,
0,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
gRegionContext.getPageStoreStats().addRuningMajorCompactedPages(-1);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
}
});
}
public void doMinorCompactionByRead(
LogicChainedPage logicChainedPage,
int curPageIndex,
int curChainIndex,
Map fetchedDataPageMap) {
if (logicChainedPage != pageIndex.getLogicPage(curPageIndex)) {
//compacting has done.
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
return;
}
// get dataPage list to do compaction.
List canCompactPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
int oldCompactedPageSize = 0;
int oldMemPageSize = 0;
long oldRequstCount = 0;
int startCompactionIndex = curChainIndex;
while (startCompactionIndex >= 0) {
PageAddress pageAddress = logicChainedPage.getPageAddress(startCompactionIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
dataPage = fetchedDataPageMap.get(startCompactionIndex);
if (dataPage == null) {
break;
}
dataPage.addReferenceCount();
} else {
oldMemPageSize += dataPage.getSize();
}
oldCompactedPageSize += dataPage.getSize();
canCompactPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequstCount += pageAddress.getRequestCount();
startCompactionIndex--;
}
if (!gContext.isDBNormal()) {
throw new GeminiShutDownException("DB is in abnormal status.");
}
if (canCompactPageListReversedOrder.size() < 2) {
logicChainedPage.setPageStatus(PageStatus.Compacting, PageStatus.Normal);
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
return;
}
final int inclusiveCompactionStartChainIndex = startCompactionIndex + 1;
gRegionContext.getPageStoreStats().addMinorCompactedPages(canCompactPageListReversedOrder.size());
DataPage compactedDataPage = doCompactPage(inclusiveCompactionStartChainIndex == 0,
canCompactPageListReversedOrder,
gContext.getCurVersion(),
curPageIndex);
canCompactPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
final long finalOldRequstCount = oldRequstCount;
final int finalOldCompactedPageSize = oldCompactedPageSize;
final int finalOldMemPageSize = oldMemPageSize;
this.getExecutor().submit(() -> {
try {
pageCompactHandler.doSyncReplace(logicChainedPage,
curPageIndex,
finalOldCompactedPageSize,
finalOldMemPageSize,
finalOldRequstCount,
inclusiveCompactionStartChainIndex,
curChainIndex,
compactedDataPage,
invalidPageAddressList,
curPageIndex);
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
}
});
}
@Override
public void splitPage(PageIndexContext pageIndexContext) {
LogicChainedPage currentLogicPage = pageIndexContext.getPageID();
PageIndexContextHashImpl uPageIndexContext = (PageIndexContextHashImpl) pageIndexContext;
int curBucketNum = uPageIndexContext.getCurBucketNum();
int curIndex = uPageIndexContext.getCurIndex();
//fix fast split bug: GRegionKMapTest#testSimplePutGetRemove
//for example, when curBucket is 4, and page index is 1.
//then buckets expand from 4 to 8. and page 1 have not been splited.
//then buckets expand from 8 to 16.
//page 1 should first split 1 to 1 and 5 with 8 buckets.
//And then split 1 to 1 and 9 with 16, and split 5 to 5 and 13 with 16 buckets. it means 1 in 4 buckets,fianlly get 1/5/9/13 pages in 16 buckets
//but if we get page 1 in the bucket 8 (some key just fall into the page 1), and will split 1 to 1 and 9 directly, so it's wrong.
//in a short, split page only can be split step by step. we can't use uPageIndexContext to decide the split step.
curBucketNum = pageIndex.getBucketNumASPageFinishSplit(curBucketNum, curIndex);
int destIndex = curBucketNum + curIndex;
if (pageIndex.getLogicPage(destIndex) != WAIT_SPLITTING || pageIndex.getLogicPage(curIndex) != currentLogicPage) {
//Splitting has done.
return;
}
// get dataPage list to do compaction.
List dataPageListReversedOrder = new ArrayList<>();
List invalidPageAddressList = new ArrayList<>();
long oldRequestNum = 0;
int oldCompactedPageSize = 0;
int oldMemPageSize = 0;
int cix = currentLogicPage.getCurrentPageChainIndex();
while (cix >= 0 && gContext.isDBNormal()) {
PageAddress pageAddress = currentLogicPage.getPageAddress(cix);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageForceFetchByCompactionCount();
dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
currentLogicPage,
cix,
this.gRegionContext,
false,
false);
} else {
oldMemPageSize += dataPage.getSize();
}
oldCompactedPageSize += dataPage.getSize();
dataPageListReversedOrder.add(dataPage);
invalidPageAddressList.add(pageAddress);
oldRequestNum += pageAddress.getRequestCount();
cix--;
}
if (!gContext.isDBNormal()) {
throw new GeminiShutDownException("DB is in abnormal status.");
}
if (dataPageListReversedOrder.isEmpty()) {
return;
}
DataPage mergeDataPage = doCompactPage(true,
dataPageListReversedOrder,
gContext.getCurVersion(),
pageIndexContext.getPageIndexID());
//dec reference, We can do it during the doCompactPage.
dataPageListReversedOrder.forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
Tuple2 splitDataPages = mergeDataPage == null
? new Tuple2<>(null, null)
: mergeDataPage.split(curBucketNum,
curIndex,
gContext.getSupervisor().getAllocator(),
gContext.getInPageGCompressAlgorithm());
if (mergeDataPage != null) {
//this will reclaim mergeDataPage's memory.
mergeDataPage.delReferenceCount(ReleaseType.Normal);
}
if (splitDataPages.f1 == null && splitDataPages.f0 != null) {
//just as doing a compaction
doSyncReplaceLogicPage(currentLogicPage,
curIndex,
oldCompactedPageSize,
oldMemPageSize,
oldRequestNum,
0,
currentLogicPage.getCurrentPageChainIndex(),
splitDataPages.f0,
invalidPageAddressList,
false,
destIndex);
pageIndex.updateLogicPage(destIndex, NO_PAGE);
return;
} else if (splitDataPages.f0 == null && splitDataPages.f1 != null) {
//just as doing a compaction and move page
doSyncReplaceLogicPage(currentLogicPage,
destIndex,
oldCompactedPageSize,
oldMemPageSize,
oldRequestNum,
0,
currentLogicPage.getCurrentPageChainIndex(),
splitDataPages.f1,
invalidPageAddressList,
true,
curIndex);
pageIndex.updateLogicPage(curIndex, NO_PAGE);
return;
} else if (splitDataPages.f0 == null && splitDataPages.f1 == null) {
pageIndex.updateLogicPage(destIndex, NO_PAGE);
pageIndex.updateLogicPage(curIndex, NO_PAGE);
this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curIndex, destIndex, invalidPageAddressList);
//here it set NO_PAGE, so all of related statistics only need to directly reduce.
gRegionContext.getPageStoreStats().addLogicPageCount(-1);
gRegionContext.getPageStoreStats().addLogicPageChainLen(0 - currentLogicPage.getCurrentPageChainIndex() - 1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(0 - currentLogicPage.getPageChainCapacity());
gRegionContext.getPageStoreStats().addLogicPageSize(0 - currentLogicPage.getPageSize());
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, 0 - oldMemPageSize);
gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
return;
}
LogicChainedPage pageSpit1 = pageIndex.newLogicChainedPage();
LogicChainedPage pageSpit2 = pageIndex.newLogicChainedPage();
PageAddress pageAddressSplit1 = pageSpit1.createPage(oldRequestNum / 2, splitDataPages.f0);
PageAddress pageAddressSplit2 = pageSpit2.createPage(oldRequestNum - oldRequestNum / 2, splitDataPages.f1);
pageSpit1.addPageSize(pageAddressSplit1.getDataLen());
pageSpit2.addPageSize(pageAddressSplit2.getDataLen());
//at first set expanded page.
pageIndex.updateLogicPage(destIndex, pageSpit2);
pageIndex.updateLogicPage(curIndex, pageSpit1);
this.cacheManager.getEvictPolicy().removeInvalidPage(gRegion, curIndex, destIndex, invalidPageAddressList);
this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddressSplit1);
this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddressSplit2);
gRegionContext.getPageStoreStats().addLogicPageCount(1);
gRegionContext.getPageStoreStats().addLogicPageChainLen(2 - currentLogicPage.getCurrentPageChainIndex() - 1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(pageSpit1.getPageChainCapacity() + pageSpit2.getPageChainCapacity() - currentLogicPage.getPageChainCapacity());
gRegionContext.getPageStoreStats().addLogicPageSize(pageSpit2.getPageSize() + pageSpit1.getPageSize() - currentLogicPage.getPageSize());
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, pageSpit2.getPageSize() + pageSpit1.getPageSize() - oldMemPageSize);
gContext.getSupervisor().discardPage(gRegionContext, invalidPageAddressList);
}
@Override
public void mergePage(PageIndexContext pageIndexContextFirst, PageIndexContext pageIndexContextSecond) {
PageIndexContextHashImpl uPageIndexContextFirst = (PageIndexContextHashImpl) pageIndexContextFirst;
int curIndexFirst = uPageIndexContextFirst.getCurIndex();
if (pageIndexContextFirst != pageIndex.getLogicPage(curIndexFirst)) {
//merge has done.
return;
}
//TODO IN THE FUTURE when shrink index
}
private void doWriteDataToPage(
PageIndexContext pageIndexContext, List>> dataSet, long version) {
//write data to delta page. mechanism can guarantee dataset have same version.
LogicChainedPage currentLogicPageID = pageIndexContext.getPageID();
long newRequestCount = getRequestCount(dataSet);
//add new delta page.
DataPage newDataPage = doCreateDataPage(version, dataSet, pageIndexContext.getPageIndexID());
if (newDataPage == null) {
LOG.warn("doWriteDataToPage write empty value");
} else {
PageAddress pageAddress = helpAddDataPage(currentLogicPageID, newRequestCount, newDataPage);
//new page no need to do evict, except mem is not enough, but it should be rare.
if (cacheManager.getMemWaterMark(0) == WaterMark.High) {
this.cacheManager.getEvictPolicy().addEvictablePage(gRegion, pageAddress);
}
int dataSize = newDataPage.getSize();
currentLogicPageID.addPageSize(dataSize);
gRegionContext.getPageStoreStats().addLogicPageSize(dataSize);
gRegionContext.getPageStoreStats().addPageUsedMemory(gRegion, dataSize);
gRegionContext.getPageStoreStats().addPageRequestCount(newRequestCount);
gRegionContext.getPageStoreStats().addPage();
}
//try to compact page
compactPage(pageIndexContext, version);
}
private PageAddress helpAddDataPage(LogicChainedPage currentLogicPageID, long newRequestCount, DataPage dataPage) {
int oldChainCapacity = currentLogicPageID.getPageChainCapacity();
PageAddress result = currentLogicPageID.createPage(newRequestCount, dataPage);
int changeCapacity = currentLogicPageID.getPageChainCapacity() - oldChainCapacity;
gRegionContext.getPageStoreStats().addLogicPageChainLen(1);
gRegionContext.getPageStoreStats().addLogicPageChainCapacity(changeCapacity);
return result;
}
@Override
public void checkResource() {
if (cacheManager.forbidIndexExpand()) {
LOG.debug("cacheManager forbid index to expand.");
return;
}
if (gRegionContext.getPageStoreStats().getLogicPageCount() == 0) {
LOG.debug("no page here");
return;
}
if (gRegionContext.getPageStoreStats().getLogicPageCount() * 2 <= gRegionContext.getPageStoreStats().getIndexCapacity()) {
LOG.debug("page count {} * 2 less than index capacity {}, not to expand index",
gRegionContext.getPageStoreStats().getLogicPageCount(),
gRegionContext.getPageStoreStats().getIndexCapacity());
return;
}
int spilledPageSizeThreshold;
WaterMark waterMark = cacheManager.getIndexCapacityWaterMark();
if (waterMark == WaterMark.High) {
spilledPageSizeThreshold = this.spilledPageSizeThresholdHigh;
} else if (waterMark == WaterMark.Low) {
spilledPageSizeThreshold = this.spilledPageSizeThresholdMiddle;
} else {
spilledPageSizeThreshold = this.spilledPageSizeThresholdLow;
}
int averagePageSize = (int) gRegionContext.getPageStoreStats().getLogicPageSize() / gRegionContext.getPageStoreStats().getLogicPageCount();
if (averagePageSize >= spilledPageSizeThreshold) {
pageIndex.expand();
LOG.info("averagePageSize {}, spilledPageSizeThreshold {}, to expand index up to {}",
averagePageSize,
spilledPageSizeThreshold,
gRegionContext.getPageStoreStats().getIndexCapacity());
}
}
protected void tryLaunchCompactionByRead(
PageIndexContext pageIndexContext, LogicChainedPage logicPageID, Map fetchedDataPageMap) {
boolean releaseFetchMap = true;
try {
if (logicPageID.getCurrentPageChainIndex() > inMemoryCompactionThreshold && fetchedDataPageMap.size() > 1) {
if (logicPageID.getPageStatus().canCompaction()) {
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(1);
if (cacheManager.getCacheStats().getRuningMinorCompactionByRead() > maxRunningMinorCompaction) {
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
} else {
if (!logicPageID.setPageStatus(PageStatus.Normal, PageStatus.Compacting)) {
gRegionContext.getPageStoreStats().addRuningMinorCompactionByRead(-1);
return;
}
final int curChainIndex = logicPageID.getCurrentPageChainIndex();
final int curPageIndex = pageIndexContext.getPageIndexID();
EventExecutor eventExecutor = gContext.getSupervisor().getCompactionExecutorGroup().next();
releaseFetchMap = false;
eventExecutor.submit(() -> {
try {
pageCompactHandler.doAsyncMinorCompactionByRead(logicPageID,
curPageIndex,
curChainIndex,
fetchedDataPageMap);
} catch (GeminiShutDownException e) {
LOG.warn("GeminiDB has shutdown!");
} finally {
fetchedDataPageMap.values().forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
}
});
}
}
}
} finally {
if (releaseFetchMap) {
fetchedDataPageMap.values().forEach(dataPage -> dataPage.delReferenceCount(ReleaseType.Normal));
}
}
}
protected DataPage doCompactPageForStructureValue(
boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId) {
List> compactionListReversedOrder = new ArrayList<>();
for (DataPage dataPage : canCompactPageListReversedOrder) {
compactionListReversedOrder.add(dataPage.getGBinaryHashMap());
}
int index = compactionListReversedOrder.size() - 1;
//Value list is right order.
Map> newMap = new HashMap<>(compactionListReversedOrder.get(index).keyCount());
long compactionCount = 0;
StateFilter stateFilter = gRegionContext.getGContext().getStateFilter();
while (index >= 0) {
for (Map.Entry entry : compactionListReversedOrder.get(index).getBinaryMap().entrySet()) {
// NOTE: we must filter here for list page store because list will not be filtered in doCompactValue
if (isMajor && stateFilter != null && stateFilter.filter(gRegionContext, entry.getValue().getSeqID())) {
continue;
}
if (entry.getValue().getgValueType() == GValueType.Delete) {
if (isMajor) {
newMap.remove(entry.getKey());
} else {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
}
} else if (entry.getValue().getgValueType() == GValueType.PutMap || entry.getValue().getgValueType() == GValueType.PutList) {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
} else {
if (newMap.containsKey(entry.getKey())) {
newMap.get(entry.getKey()).add(entry.getValue());
} else {
newMap.put(entry.getKey(), Lists.newArrayList(entry.getValue()));
}
}
}
compactionCount += compactionListReversedOrder.get(index).getCompactionCount();
index--;
}
Map finalCompactedMap = new HashMap<>(newMap.size());
//compaction value
for (Map.Entry> entry : newMap.entrySet()) {
if (entry.getValue().size() == 0) {
throw new GeminiRuntimeException("Internal BUG!");
}
BinaryValue compactedBinaryValue;
if (entry.getValue().size() == 1 && !isMajor) {
//if it's major compaction, even only one binary value, we need to do compact to remove deleted record.
compactedBinaryValue = entry.getValue().get(0);
} else {
compactedBinaryValue = doCompactValue(entry.getValue(), isMajor, version, logicPageId);
}
finalCompactedMap.put(entry.getKey(), compactedBinaryValue);
}
//TODO null should be handled by PageStore
return doBuildDataPageFromGBinaryMap(isMajor,
version,
logicPageId,
this.pageSerdeFlink.getKeySerde(),
finalCompactedMap,
compactionCount);
}
protected DataPage getDataPageAutoLoadIfNeed(
LogicChainedPage logicPageID, int curIndex, Map fetchedDataPageMap) {
PageAddress pageAddress = logicPageID.getPageAddress(curIndex);
DataPage dataPage = pageAddress.getDataPage();
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageCacheMissCount();
dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicPageID,
curIndex,
this.gRegionContext,
this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
true);
//todo to resuse dataPage need send to handler.
fetchedDataPageMap.put(curIndex, dataPage);
dataPage.addReferenceCount();
} else {
this.cacheManager.getCacheStats().addPageCacheHitCount();
}
return dataPage;
}
@Override
public void allKeysIncludeDeleted(Set allKeysIncludeDelete) {
// as we know, removeAll will happen after getAll in mini batch(KeyedBundleOperator), so
// there is no need to update read cache and trigger compaction
LogicChainedPage[] chains = pageIndex.getPageIndex();
for (LogicChainedPage logicChainedPage : chains) {
if (isNullPage(logicChainedPage)) {
continue;
}
int numPages = logicChainedPage.getCurrentPageChainIndex();
for (int i = numPages; i >= 0; i--) {
PageAddress pageAddress = logicChainedPage.getPageAddress(i);
DataPage dataPage = pageAddress.getDataPage();
try {
if (dataPage == null) {
this.cacheManager.getCacheStats().addPageCacheMissCount();
dataPage = this.gContext.getSupervisor().getFetchPolicy().fetch(pageAddress,
logicChainedPage,
i,
this.gRegionContext,
this.gRegionContext.getGContext().getGConfiguration().getEnablePrefetch(),
false);
} else {
this.cacheManager.getCacheStats().addPageCacheHitCount();
}
allKeysIncludeDelete.addAll(dataPage.getPOJOSet());
} finally {
if (dataPage != null) {
dataPage.delReferenceCount(ReleaseType.Normal);
}
}
}
}
}
/**
* a different value such as map/list/value will calc differently.
*
* @param dataSet Objects organized to List.
* @return total request count for this list.
*/
abstract long getRequestCount(List>> dataSet);
/**
* @param version DataPage's version.
* @param dataSet Objects organized to List will be wrote to this DataPage.
* @param logicPageId DataPage's index id.
* @return certain DataPage, such as DataPageKVImpl, DataPageKMapImpl or DataPageKListImpl
*/
abstract DataPage doCreateDataPage(long version, List>> dataSet, int logicPageId);
/**
* @param isMajor compaction is major or minor.
* @param canCompactPageListReversedOrder DataPages will be compacted, and this list is reversed order.
* @param version current version for new page.
* @param logicPageId DataPage's index id.
* @return a new comacted DataPage.
*/
@VisibleForTesting
public abstract DataPage doCompactPage(
boolean isMajor, List canCompactPageListReversedOrder, long version, int logicPageId);
/**
* invoked by doCompactPageForStructureValue, when value is structural such as Map/List/Set.
*
* @param binaryValueList value list to be compacted.
* @param isMajor compaction is major or minor.
* @param version current version.
* @param logicPageId DataPage's index id.
* @return a compacted version.
*/
abstract BinaryValue doCompactValue(
List binaryValueList, boolean isMajor, long version, int logicPageId);
protected abstract DataPage doBuildDataPageFromGBinaryMap(
boolean isMajor,
long version,
int logicPageId,
TypeSerializer keySerde,
Map finalCompactedMap,
long compactionCount);
protected boolean isNullPage(LogicChainedPage logicPageID) {
return logicPageID == null || logicPageID.getCurrentPageChainIndex() == -1;
}
}