Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.page;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.snapshot.RegionSnapshot;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotMetaFile;
import org.apache.flink.util.MathUtils;
import org.apache.flink.util.Preconditions;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.Arrays;
import java.util.Collection;
import java.util.Iterator;
import java.util.Map;
import java.util.Objects;
import java.util.Spliterators;
import java.util.stream.StreamSupport;
import static org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotMetaFile.writerFunc;
import static org.apache.flink.util.Preconditions.checkArgument;
/**
* The default hash version of {@link PageIndex} implementation.
*/
public class PageIndexHashImpl implements PageIndex {
private static final Logger LOG = LoggerFactory.getLogger(PageIndexHashImpl.class);
public static final LogicalPageChain WAIT_SPLITTING_PAGE = new LogicalPageChainImpl(PageStatus.Init);
public static final LogicalPageChain NO_PAGE = null;
private final int baseBucketNum;
private volatile int curBucketNum;
private final PageStore pageStore;
private final PageStoreStats pageStoreStats;
private volatile LogicalPageChain[] pageIndex;
private final int logicPageChainLenDefault;
public PageIndexHashImpl(
GConfiguration config,
PageStore pageStore,
PageStoreStats pageStoreStats) {
int configBucketNum = config.getPageIndexBucketLenDefault();
checkArgument((configBucketNum & configBucketNum - 1) == 0, "curBucketNum should be a power of 2.");
baseBucketNum = configBucketNum;
curBucketNum = configBucketNum;
pageIndex = new LogicalPageChain[configBucketNum];
this.pageStore = pageStore;
this.pageStoreStats = pageStoreStats;
this.logicPageChainLenDefault = config.getLogicTableDefaultChainLen();
this.pageStoreStats.setIndexCapacity(curBucketNum);
}
public PageIndexHashImpl(
LogicalPageChain[] pageIndex,
PageStore pageStore,
PageStoreStats pageStoreStats,
int baseBucketNum,
int curBucketNum,
int logicPageChainLenDefault) {
this.baseBucketNum = baseBucketNum;
this.curBucketNum = curBucketNum;
this.pageIndex = pageIndex;
this.pageStore = pageStore;
this.pageStoreStats = pageStoreStats;
this.logicPageChainLenDefault = logicPageChainLenDefault;
this.pageStoreStats.setIndexCapacity(curBucketNum);
}
private PageIndexHashImpl(PageIndexHashImpl pageIndexHash, Map copiedDataPage) {
this.baseBucketNum = pageIndexHash.baseBucketNum;
this.curBucketNum = pageIndexHash.curBucketNum;
this.pageStore = pageIndexHash.pageStore;
this.pageStoreStats = pageIndexHash.pageStoreStats;
this.logicPageChainLenDefault = pageIndexHash.logicPageChainLenDefault;
this.pageIndex = new LogicalPageChain[pageIndexHash.pageIndex.length];
for (int i = 0; i < pageIndex.length; i++) {
LogicalPageChain pageChain = pageIndexHash.pageIndex[i];
if (pageChain != null) {
this.pageIndex[i] = pageChain.copy(copiedDataPage);
}
}
}
@Override
public PageIndexContext getPageIndexContext(K key, boolean createIfMissing) {
int hash = MathUtils.bitMix(key.hashCode());
int checkBucketNum = this.curBucketNum;
int curIndex = hash & (checkBucketNum - 1);
if (createIfMissing) {
//no need collision detection.
return internalGetPageIndexContext(hash, curIndex, checkBucketNum, true);
}
//no deadly loop threaten.
while (true) {
//1. first get the reference of page, this is important.
PageIndexContextHashImpl result = (PageIndexContextHashImpl) internalGetPageIndexContext(hash,
curIndex,
checkBucketNum,
false);
//thread safe collision detection
//2. if spilling happen during this operation, redo
if (checkBucketNum != this.curBucketNum) {
checkBucketNum = this.curBucketNum;
curIndex = hash & (checkBucketNum - 1);
continue;
}
//3. spill have happened, but page split maybe is not finished, so we check whether need to redo.
//and we check curBucketNum but not index, because index maybe same, but curBucketNum will be change when needing to recursiveGetPageContext.
if (result.getCurBucketNum() != this.curBucketNum) {
PageIndexContextHashImpl checkResult = (PageIndexContextHashImpl) internalGetPageIndexContext(hash,
hash & (this.curBucketNum - 1),
this.curBucketNum,
false);
if (result.getCurIndex() == checkResult.getCurIndex()) {
// if the index does not change,it's safe. because we fist get the reference of page.
return result;
}
//other way is to check the previous index, now we easily to redo it. because spilling is a short volatile status.
continue;
} else {
//even though spill happen at this time, it's safe because we have reference of this page which makes sure to have right data.
return result;
}
}
}
private PageIndexContext internalGetPageIndexContext(
int hash, int curIndex, int checkBucketNum, boolean createIfMiss) {
PageIndexContext result = pageIndex[curIndex] == WAIT_SPLITTING_PAGE
? recursiveGetPageContext(hash, checkBucketNum)
: PageIndexContextHashImpl.of(checkBucketNum, curIndex, pageIndex[curIndex], false);
if (result.getLogicalPageChain() != NO_PAGE || !createIfMiss) {
return result;
} else {
pageIndex[curIndex] = createLogicalPageChain();
this.pageStoreStats.addLogicPageCount(1);
this.pageStoreStats.addLogicPageChainCapacity(logicPageChainLenDefault);
return PageIndexContextHashImpl.of(checkBucketNum, curIndex, pageIndex[curIndex], false);
}
}
private PageIndexContext recursiveGetPageContext(int hash, int checkBucketNum) {
checkBucketNum = checkBucketNum >> 1;
int index;
while (checkBucketNum >= this.baseBucketNum) {
index = hash & (checkBucketNum - 1);
if (pageIndex[index] != WAIT_SPLITTING_PAGE) {
return PageIndexContextHashImpl.of(checkBucketNum, index, pageIndex[index], true);
}
checkBucketNum = checkBucketNum >> 1;
}
throw new GeminiRuntimeException("Internal Bug!");
}
@Override
public void expand() {
//prevent from replicated expand operator by mechanism.
LogicalPageChain[] pageIndexNew = new LogicalPageChain[curBucketNum << 1];
System.arraycopy(pageIndex, 0, pageIndexNew, 0, pageIndex.length);
Arrays.fill(pageIndexNew, pageIndex.length, pageIndexNew.length, WAIT_SPLITTING_PAGE);
this.pageIndex = pageIndexNew;
curBucketNum = pageIndexNew.length;
this.pageStoreStats.setIndexCapacity(curBucketNum);
}
@Override
public void shrink() {
//TODO
this.pageStoreStats.setIndexCapacity(curBucketNum);
}
@Override
public void snapshot(Collection regionSnapshots) throws IOException {
writerFunc(regionSnapshots, w -> {
w.writeInt(baseBucketNum);
w.writeInt(curBucketNum);
w.writeInt(logicPageChainLenDefault);
w.writeInt(pageIndex.length);
});
for (LogicalPageChain logicalPageChain : pageIndex) {
if (logicalPageChain == null) {
// write emptyPage
writerFunc(regionSnapshots, w -> w.writeBoolean(true));
} else {
// not emptyPage
writerFunc(regionSnapshots, w -> w.writeBoolean(false));
if (logicalPageChain == WAIT_SPLITTING_PAGE) {
// write wait splitting
writerFunc(regionSnapshots, w -> w.writeBoolean(true));
} else {
// write not wait splitting
writerFunc(regionSnapshots, w -> w.writeBoolean(false));
logicalPageChain.snapshot(regionSnapshots);
}
}
}
}
@Override
public boolean updateLogicPageStatus(
int logicPageId, PageStatus expectedStatus, PageStatus targetStatus) {
Preconditions.checkArgument(logicPageId < pageIndex.length,
String.format("It's illegal to update page status at index %s, due to out of current pages bound: %s.", logicPageId, pageIndex.length));
return pageIndex[logicPageId].compareAndSetStatus(expectedStatus, targetStatus);
}
@Override
public PageIndex copy(Map copiedDataPage) {
return new PageIndexHashImpl<>(this, copiedDataPage);
}
@Override
public void removeLogicPage(int logicPageId) {
pageIndex[logicPageId] = NO_PAGE;
this.pageStoreStats.addLogicPageCount(-1);
}
@Override
public LogicalPageChain getLogicPage(int logicPageId) {
return pageIndex[logicPageId];
}
@Override
public void updateLogicPage(
int logicPageId, LogicalPageChain targetLogicPage) {
Preconditions.checkArgument(logicPageId < pageIndex.length,
String.format("It's illegal to update page at index %s, due to out of current pages bound: %s.", logicPageId, pageIndex.length));
pageIndex[logicPageId] = targetLogicPage;
}
@Override
public LogicalPageChain createLogicalPageChain() {
return new LogicalPageChainImpl(PageStatus.Normal, logicPageChainLenDefault);
}
@Override
public Iterator pageIterator() {
return Arrays.stream(pageIndex).filter(Objects::nonNull)
.flatMap(index -> StreamSupport.stream(
Spliterators.spliteratorUnknownSize(
index.pageIterator(), 0), false))
.iterator();
}
@Override
public int getIndexCapacity() {
return curBucketNum;
}
@Override
public LogicalPageChain[] getPageIndex() {
return pageIndex;
}
/**
* get page's min bucketNum need to split.
*
* @param curBucketNum this is a record of bucketNum when finding the index.
* @param curIndex the index refer to data.
* @return what's bucketNum when this page have finished split.
*/
public int getBucketNumASPageFinishSplit(int curBucketNum, int curIndex) {
int checkBucketNum;
while (true) {
if (getLogicPage(curIndex) == WAIT_SPLITTING_PAGE) {
throw new GeminiRuntimeException("Gemini Internal Bug, want to split a invalid page");
}
checkBucketNum = curBucketNum >> 1;
if (checkBucketNum <= curIndex) {
//it means this page must been splitted.
break;
}
if (getLogicPage(curIndex + checkBucketNum) == WAIT_SPLITTING_PAGE) {
curBucketNum = checkBucketNum;
} else {
break;
}
}
return curBucketNum;
}
/**
* Builder.
*/
public static class Builder {
private final int baseBucketNum;
private final int curBucketNum;
private final int logicPageChainLenDefault;
private final LogicalPageChain[] pageIndex;
private final GRegionContext regionContext;
public Builder(SnapshotMetaFile.Reader reader, GRegionContext context) throws IOException {
// restore all the needed things
this.baseBucketNum = reader.readInt();
this.curBucketNum = reader.readInt();
this.logicPageChainLenDefault = reader.readInt();
this.regionContext = context;
int pageIndexLength = reader.readInt();
pageIndex = new LogicalPageChain[pageIndexLength];
for (int i = 0; i < pageIndexLength; i++) {
boolean emptyPage = reader.readBoolean();
LogicalPageChain logicalPageChain;
if (emptyPage) {
logicalPageChain = null;
} else {
boolean waitSplitting = reader.readBoolean();
if (waitSplitting) {
// set status to wait_splitting .
logicalPageChain = WAIT_SPLITTING_PAGE;
} else {
// restored page as normal status
logicalPageChain = new LogicalPageChainImpl(PageStatus.Normal);
logicalPageChain.restore(reader, this.regionContext.getPageStoreStats());
this.regionContext.getPageStoreStats().addLogicPageCount(1);
}
}
pageIndex[i] = logicalPageChain;
}
}
public PageIndex build() {
// TODO: #SR how to pass in the pageStore and pageStoreStats.
return new PageIndexHashImpl(pageIndex, null, regionContext.getPageStoreStats(), baseBucketNum, curBucketNum, logicPageChainLenDefault);
}
}
}