All Downloads are FREE. Search and download functionalities are using the official Maven repository.
Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
org.apache.flink.runtime.state.gemini.engine.memstore.AbstractWriteBuffer Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.state.gemini.engine.memstore;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.filecache.FileCache;
import org.apache.flink.runtime.state.gemini.engine.handler.PageHandler;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageIndex;
import org.apache.flink.runtime.state.gemini.engine.page.PageStore;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotCompletableFuture;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotOperation;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotStat;
import org.apache.flink.util.Preconditions;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutorGroup;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.TimeUnit;
/**
* AbstractWriteBuffer.
*/
public abstract class AbstractWriteBuffer implements WriteBuffer {
private static final Logger LOG = LoggerFactory.getLogger(AbstractWriteBuffer.class);
protected final EventExecutor eventExecutor;
protected final GRegionContext gRegionContext;
protected long segmentID = 0;
protected final GRegion gRegion;
protected final PageStore pageStore;
private final WriteBufferManager writeBufferManager;
private long printTS = System.currentTimeMillis();
private CompletableFuture lastFuture;
public AbstractWriteBuffer(
GRegion gRegion, EventExecutor eventExecutor, PageStore pageStore) {
this.gRegionContext = gRegion.getGRegionContext();
this.gRegion = gRegion;
this.eventExecutor = eventExecutor;
this.pageStore = pageStore;
this.writeBufferManager = gRegionContext.getGContext().getSupervisor().getWriteBufferManager();
}
@Override
public EventExecutor getExecutor() {
return this.eventExecutor;
}
void checkResource() {
//1. check if snapshot happened
//2. check WriteBufferStats to decide if cut-off segment.
//3. ask WriteBufferManager to block.
//no estimated size now.
long startTime = System.currentTimeMillis();
if (printTS + 60000 < startTime) {
printTS = startTime;
if (LOG.isDebugEnabled()) {
LOG.debug("writeBufferStats: {}", gRegionContext.getWriteBufferStats());
LOG.debug("pageStoreStats: {}", gRegionContext.getPageStoreStats());
}
}
if (gRegionContext.getWriteBufferStats().getAverageKeyLen() < 0) {
if (gRegionContext.getWriteBufferStats().getAverageKeyLen() == -2) {
//launch a estimateHandler
lastFuture = new CompletableFuture();
gRegionContext.getWriteBufferStats().setAverageKeyLen(-1);
final Segment segmentCopy = getActiveSegment().copySegment();
eventExecutor.execute(() -> {
createPageHandler(segmentCopy, true).handle();
lastFuture.complete(null);
});
}
if (getActiveSegment().getRecordCount() > 1000) {
if (!lastFuture.isDone()) {
try {
lastFuture.get(10, TimeUnit.MILLISECONDS);
} catch (Exception e) {
return;
}
}
} else {
return;
}
}
//estimating MapValue and List based on it's element.
int writeBufferEstimatedSize = getEstimatedSize(getActiveSegment().getRecordCount());
int totalWriteBufferEstimatedSize = writeBufferEstimatedSize + getEstimatedSize(writeBufferManager.getTotalRecordCount());
if (writeBufferEstimatedSize < gRegionContext.getWriteBufferWaterMark()) {
if (totalWriteBufferEstimatedSize < writeBufferManager.getTotalMemSize() || !writeBufferManager.isBestChoiceWriteBufferFlushing(
this)) {
return;
}
}
GContext gContext = gRegionContext.getGContext();
long waitTime = System.currentTimeMillis() - startTime;
while (gRegionContext.getWriteBufferStats().getFlushingSegmentCount() >= gContext.getGConfiguration().getNumFlushingSegment()) {
gContext.checkDBStatus();
if (writeBufferManager.canFlushWriteBuffer(this)) {
break;
}
synchronized (this) {
try {
wait(1);
//Let's see how much is it. if too much, then change it.
writeBufferManager.increaseWriteBufferFlushBlock();
} catch (InterruptedException e) {
throw new GeminiRuntimeException(e);
}
}
}
if (waitTime > 10) {
LOG.info("too much flushing segment or evict too long, wait time ={} ...", waitTime);
}
Segment rs = addFlushingSegment();
gRegionContext.getWriteBufferStats().addTotalFlushingRecordCount(rs.getRecordCount());
gRegionContext.getWriteBufferStats().addTotalRecordCount(-rs.getRecordCount());
doSegmentFlush(rs, writeBufferEstimatedSize);
}
void doSegmentFlush(Segment segment, int estimatedSize) {
eventExecutor.execute(() -> {
try {
PageHandler pageHandler = createPageHandler(segment, false);
pageHandler.handle();
//TODO to handle exception, add mechanism to re-flush this segment.
endSegmentFlush(segment.getSegmentID());
} catch (Exception e) {
LOG.error("flush segment failed. {}", e);
}
});
}
private void endSegmentFlush(long segmentID) {
Segment segment = pollFlushingSegment();
Preconditions.checkArgument(segment != null, "error segment!");
Preconditions.checkArgument(segment.getSegmentID() == segmentID, "error segment!");
gRegionContext.getWriteBufferStats().addFlushingSegmentCount(-1);
gRegionContext.getWriteBufferStats().addTotalFlushingRecordCount(-segment.getRecordCount());
synchronized (this) {
notify();
}
}
public abstract Segment getActiveSegment();
abstract Segment addFlushingSegment();
abstract Segment pollFlushingSegment();
abstract PageHandler createPageHandler(Segment segment, boolean onlyEstimatedSize);
private int getEstimatedSize(long elementSize) {
if (gRegionContext.getWriteBufferStats().getAverageKeyLen() <= 0) {
LOG.error("Let's see whether it will happen!");
return 0;
}
return (int) (gRegionContext.getPageStoreStats().getPageSizeRate() * (gRegionContext.getWriteBufferStats().getAverageKeyLen() + gRegionContext.getWriteBufferStats().getAverageValueLen()) * elementSize);
}
@Override
public void doSnapshot(SnapshotOperation snapshotOperation) {
SnapshotManager.PendingSnapshot pendingSnapshot = snapshotOperation.getPendingSnapshot();
long checkpointId = pendingSnapshot.getCheckpointId();
SnapshotCompletableFuture snapshotCompletableFuture = pendingSnapshot.getResultFuture();
if (snapshotCompletableFuture.isEndSnapshot()) {
return;
}
boolean isLocalSnapshotEnabled = gRegionContext.getGContext().getGConfiguration().isLocalSnapshotEnabled();
snapshotCompletableFuture.incRunningTask();
try {
Segment rs = addFlushingSegment();
PageHandler pageHandler = createPageHandler(rs, false);
gRegionContext.getWriteBufferStats().addTotalFlushingRecordCount(rs.getRecordCount());
gRegionContext.getWriteBufferStats().addTotalRecordCount(-rs.getRecordCount());
// segment to page.
if (LOG.isDebugEnabled()) {
LOG.debug("Start to snapshot write buffer for {}.", checkpointId);
}
// TODO #SR error handle and UT
eventExecutor.execute(() -> {
final Map allAddReferenceDataPage = new HashMap<>();
try {
pageHandler.handle();
endSegmentFlush(rs.getSegmentID());
// copy page index
PageIndex copyPageIndex = pageStore.getPageIndex().deepCopy(allAddReferenceDataPage);
pendingSnapshot.addGRegionSnapshotMeta(gRegionContext.getTableName(),
gRegionContext.getRegionId(),
copyPageIndex,
gRegionContext.getLastSeqID(),
gRegionContext.getRemoveAllSeqID());
if (LOG.isDebugEnabled()) {
LOG.debug("Segment flush and pageIndex copy done for {}, will start to flush.", checkpointId);
}
// increase running task for the submitted task.
snapshotCompletableFuture.incRunningTask();
gRegionContext.getGContext().getSupervisor().getSnapshotManager().getSnapshotExecutor().execute(() -> {
try {
Iterator pageIterator = copyPageIndex.pageIterator();
EventExecutorGroup snapshotEventExecutorGroup = gRegionContext.getGContext().getSupervisor().getSnapshotExecutorGroup();
FileCache fileCache = gRegionContext.getGContext().getSupervisor().getFileCache();
int totalPage = 0;
long totalSize = 0;
int incrementalPages = 0;
long incrementalSize = 0;
int totalLocalPage = 0;
long totalLocalSize = 0;
int localIncrementalPages = 0;
long localIncrementalSize = 0;
while (gRegionContext.getGContext().isDBNormal() && pageIterator.hasNext() && !snapshotCompletableFuture.isEndSnapshot()) {
PageAddress pageAddress = pageIterator.next();
try {
EventExecutor snapshotEventExecutor = snapshotEventExecutorGroup.next();
snapshotCompletableFuture.incRunningTask();
if (!pageAddress.isDfsValid()) {
++incrementalPages;
incrementalSize += pageAddress.getDataLen();
}
// TODO We can deliver a Callable to fileCache.flushPage, and execute it before async flush,
// but i think it's a bit ugly, and we will discuss it later. For now, it takes no effect in file cache.
fileCache.flushPage(pageAddress, gRegionContext, snapshotEventExecutor,
// TODO this should be true for the first snapshot after scale.
false, (success, throwable) -> {
if (!success) {
LOG.error("Write error when snapshot dfs.");
snapshotCompletableFuture.setEndSnapshot();
snapshotCompletableFuture.completeExceptionally(throwable);
}
snapshotCompletableFuture.decRunningTask();
});
// dfs statistics
++totalPage;
totalSize += pageAddress.getDataLen();
if (isLocalSnapshotEnabled) {
snapshotCompletableFuture.incRunningTask();
if (!pageAddress.isLocalValid()) {
++localIncrementalPages;
localIncrementalSize += pageAddress.getDataLen();
}
fileCache.addPage(pageAddress,
gRegionContext,
snapshotEventExecutor,
(success, throwable) -> {
if (!success) {
LOG.error("Write error when snapshot local.");
snapshotCompletableFuture.setEndSnapshot();
snapshotCompletableFuture.completeExceptionally(throwable);
}
snapshotCompletableFuture.decRunningTask();
});
++totalLocalPage;
totalLocalSize += pageAddress.getDataLen();
}
// remove for fast GC. not support retry snapshot.
pageIterator.remove();
} finally {
//best effort to fast free space
DataPage dataPage = allAddReferenceDataPage.remove(pageAddress);
if (dataPage != null) {
dataPage.delReferenceCount(ReleaseType.Normal);
}
}
}
SnapshotStat snapshotStat = pendingSnapshot.getSnapshotStat();
snapshotStat.addAndGetTotalPages(totalPage);
snapshotStat.addAndGetTotalSize(totalSize);
snapshotStat.addAndGetIncrementalPages(incrementalPages);
snapshotStat.addAndGetIncrementalSize(incrementalSize);
snapshotStat.addAndGetTotalLocalPages(totalLocalPage);
snapshotStat.addAndGetTotalLocalSize(totalLocalSize);
snapshotStat.addAndGetLocalIncrementalPages(localIncrementalPages);
snapshotStat.addAndGetLocalIncrementalSize(localIncrementalSize);
snapshotCompletableFuture.decRunningTask();
} finally {
allAddReferenceDataPage.values().forEach(datapage -> datapage.delReferenceCount(ReleaseType.Normal));
}
});
} catch (Exception e) {
snapshotCompletableFuture.setEndSnapshot();
snapshotCompletableFuture.completeExceptionally(e);
allAddReferenceDataPage.values().forEach(datapage -> datapage.delReferenceCount(ReleaseType.Normal));
LOG.error("Page handle error for {} with exception {}.", checkpointId, e.getMessage(), e);
} finally {
snapshotCompletableFuture.decRunningTask();
}
});
} catch (Exception e) {
snapshotCompletableFuture.decRunningTask();
snapshotCompletableFuture.setEndSnapshot();
snapshotCompletableFuture.completeExceptionally(e);
LOG.error("add flushing segment failed with exception {}", e);
throw e;
}
}
}