All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.vm.EvictHandlerSepImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.vm;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.Supervisor;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressCompositeImpl;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressSingleImpl;
import org.apache.flink.runtime.state.gemini.engine.vm.EvictablePagePool.SortedEntry;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.base.MoreObjects;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutorGroup;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

/**
 * EvictHandlerSepImpl is a divide-and-conquer to handle evicting.
 * start filling the pool if memory usage accesses low mark.(depends which pool is used).
 * start flushing page if memory usage accesses mid mark.
 * start evicting page if memory usage accesses high mark.
 */
public class EvictHandlerSepImpl {
	private static final Logger LOG = LoggerFactory.getLogger(EvictHandlerSepImpl.class);

	private final String name;
	private final GContext gContext;
	private final Supervisor supervisor;
	private final CacheManager cacheManager;
	private final EventExecutorGroup flushEventExecutorGroup;
	/**
	 * only contains PageAddressSingleImpls.
	 */
	private final Map readyToEvictDataPageMap = new LinkedHashMap<>();
	private final AtomicLong preparedFlushedPageSize = new AtomicLong(0);
	private final AtomicLong flushingPageSize = new AtomicLong(0);
	/**
	 * contains both PageAddressSingleImpl and PageAddressCompositeImpl. Region can have individual strategy.
	 */
	private final EvictablePagePool pagePriorityPool;
	private final Map allRegions = new ConcurrentHashMap<>();
	private final AtomicLong curThreadTotalPageUsedMem = new AtomicLong(0);
	private final AtomicLong curThreadTotalFlushedSize = new AtomicLong(0);
	private final AtomicLong curThreadTotalEvictedSize = new AtomicLong(0);
	private final long curThreadMemLowMark;
	private final long curThreadMemMidMark;
	private final long curThreadMemHighMark;
	private final int maxPreparedFlushSize;

	private volatile int dynamicPreparedFlushSize;
	//5 minutes. TODO configure it.
	private final long intervalShrinkDynamicPreparedFlushSize = 300 * 1000;
	private volatile long lastTimeChangeMaxFlushSize = System.currentTimeMillis();

	public EvictHandlerSepImpl(String name, GContext gContext) {
		this.name = name;
		this.gContext = gContext;
		this.supervisor = gContext.getSupervisor();
		this.cacheManager = gContext.getSupervisor().getCacheManager();
		this.flushEventExecutorGroup = gContext.getSupervisor().getFlushExecutorGroup();

		//TODO can be dynamic adjust ratio
		int totalThread = gContext.getGConfiguration().getRegionThreadNum();
		curThreadMemLowMark = cacheManager.getMemLowMark() / totalThread;
		curThreadMemMidMark = cacheManager.getMemMidMark() / totalThread;
		curThreadMemHighMark = cacheManager.getMemHighMark() / totalThread;
		maxPreparedFlushSize = gContext.getGConfiguration().getMaxPreparedFlushSize();
		dynamicPreparedFlushSize = maxPreparedFlushSize;

		this.pagePriorityPool = new EvictablePagePoolSampleImpl(this, gContext, cacheManager);

		LOG.info(
			"EvictHandlerSepImpl {} curThreadMemLowMark={},curThreadMemMidMark={},curThreadMemHighMark={},maxPreparedFlushSize={},pagePriorityPool={}",
			name,
			curThreadMemLowMark,
			curThreadMemMidMark,
			curThreadMemHighMark,
			maxPreparedFlushSize,
			pagePriorityPool);
	}

	public int removeInvalidPage(PageAddress pageAddress) {
		Iterator pageAddressIterator = pageAddress.pageIterator();
		int delete = 0;
		//for PageAddressComposite.
		delete += pagePriorityPool.remove(pageAddress) ? 1 : 0;
		while (pageAddressIterator.hasNext()) {
			PageAddress subPageAddress = pageAddressIterator.next();
			//for subPage
			delete += pagePriorityPool.remove(subPageAddress) ? 1 : 0;
			if (readyToEvictDataPageMap.remove(subPageAddress) != null) {
				preparedFlushedPageSize.addAndGet(-subPageAddress.getDataLen());
				delete++;
			}
		}
		return delete;
	}

	public void addPage(PageAddress pageAddress, GRegion gRegion) {
		pagePriorityPool.add(pageAddress, gRegion);
	}

	void tryPrepareFlush(int minSize) {
		if (this.curThreadTotalPageUsedMem.get() < curThreadMemMidMark) {
			if (this.curThreadTotalPageUsedMem.get() >= curThreadMemLowMark - maxPreparedFlushSize) {
				tryFillPool();
			}
			return;
		}
		long currentTime = System.currentTimeMillis();
		if (dynamicPreparedFlushSize < minSize) {
			dynamicPreparedFlushSize = maxPreparedFlushSize + minSize;
			lastTimeChangeMaxFlushSize = currentTime;
		} else if (dynamicPreparedFlushSize > maxPreparedFlushSize && maxPreparedFlushSize > minSize) {
			//if a hugePage requested only once, we need gradually shrink minNeededPageSize.
			if (changeMaxFlushSizeTimeOut(currentTime)) {
				//that means after 10 * 5 minutes the dynamicPreparedFlushSize will recovery to the original size.
				dynamicPreparedFlushSize = maxPreparedFlushSize + (dynamicPreparedFlushSize - maxPreparedFlushSize) * 3 / 4;
				lastTimeChangeMaxFlushSize = currentTime;
			}

		}
		doPrepareFlush(minSize, (int) (dynamicPreparedFlushSize - preparedFlushedPageSize.get()));
	}

	boolean changeMaxFlushSizeTimeOut(long currentTime) {
		return currentTime - lastTimeChangeMaxFlushSize >= intervalShrinkDynamicPreparedFlushSize;
	}

	boolean isPageAlreadyInEvict(PageAddress oriPageAddress, PageAddress subPageAddress) {
		return pagePriorityPool.containsPage(oriPageAddress) || readyToEvictDataPageMap.containsKey(subPageAddress);
	}

	@VisibleForTesting
	void doPrepareFlush(int minSize, int bestFlushSize) {
		if (bestFlushSize <= 0) {
			return;
		}
		if (this.curThreadTotalPageUsedMem.get() < curThreadMemMidMark) {
			return;
		}

		long flushedSize = 0;
		int scanPage = 0;
		while (flushedSize < bestFlushSize && gContext.isDBNormal()) {
			Iterator dataListIterator = getSortedList().iterator();
			while (dataListIterator.hasNext() && gContext.isDBNormal()) {
				SortedEntry sortedEntry = dataListIterator.next();
				final PageAddress pageAddress = sortedEntry.pageAddress;
				final GRegion gRegion = sortedEntry.region;
				//we use postpone orderedList, so maybe it's be remove
				if (!pagePriorityPool.containsPage(pageAddress)) {
					dataListIterator.remove();
					continue;
				}

				if (!pageAddress.isPageValid() || readyToEvictDataPageMap.containsKey(pageAddress)) {
					dataListIterator.remove();
					pagePriorityPool.remove(pageAddress);
					continue;
				}

				PageAddress finalPageAddress = null;
				//if a PageAddressSingleImpl or all subPages of a PageAddressCompositeImpl have been handled.
				boolean needRemoveFromPool = false;
				//when it's a PageAddressCompositeImpl, its score is lowest, so its subPage would be evictable.
				if (pageAddress instanceof PageAddressSingleImpl) {
					finalPageAddress = pageAddress;
					needRemoveFromPool = true;
				} else if (pageAddress instanceof PageAddressCompositeImpl) {
					PageAddress[] subPages = ((PageAddressCompositeImpl) pageAddress).getSubPageAddress();
					long tickTime = cacheManager.getCurrentTickTime();

					List dataList = Arrays.stream(subPages).map(pa -> {
						//only do an effort to avoid invalid page.
						DataPage dataPage = pa.getDataPageNoReference();
						if (dataPage != null && pa.isPageValid() && !readyToEvictDataPageMap.containsKey(pa)) {
							return new SortedEntry(pa, gRegion, dataPage.score(tickTime));
						}
						return null;
					}).filter(Objects::nonNull).sorted(Comparator.comparingDouble(SortedEntry::getScore)).collect(
						Collectors.toList());

					boolean flushEnough = false;
					for (SortedEntry entry : dataList) {
						int curFlushedSize = prepareFlushSinglePage(entry.pageAddress, gRegion);
						if (curFlushedSize > 0) {
							pagePriorityPool.partialSubPageFlush(pageAddress, curFlushedSize);
						}
						flushedSize += curFlushedSize;
						scanPage++;
						if (flushedSize >= bestFlushSize) {
							flushEnough = true;
							break;
						}
					}
					if (!flushEnough) {
						PageAddress mainPageAddress = ((PageAddressCompositeImpl) pageAddress).getMainPageAddress();
						finalPageAddress = !pageAddress.isPageValid() || readyToEvictDataPageMap.containsKey(
							mainPageAddress) ? null : mainPageAddress;
						needRemoveFromPool = true;
					}
				} else {
					throw new GeminiRuntimeException("Internal Bug: error PageAddress");
				}

				if (finalPageAddress != null) {
					flushedSize += prepareFlushSinglePage(finalPageAddress, gRegion);
					scanPage++;
				}
				if (needRemoveFromPool) {
					pagePriorityPool.remove(pageAddress);
					dataListIterator.remove();
				}
				if (flushedSize >= bestFlushSize) {
					break;
				}
			}
			tryFillPool();

			if (preparedFlushedPageSize.get() > minSize) {
				break;
			}

			LOG.info(
				"EvictHandler doFlushRegion NOT WORK bestFlushSize({}) minSize({}) flushedPageSize({}) preparedFlushedPageSize({}) scanPage({}) {}",
				bestFlushSize,
				minSize,
				flushedSize,
				preparedFlushedPageSize.get(),
				scanPage,
				toString());
		}
	}

	List getSortedList() {
		return pagePriorityPool.getSortedList();
	}

	boolean tryFillPool() {
		return pagePriorityPool.tryFillPool(allRegions.keySet());
	}

	private int prepareFlushSinglePage(PageAddress pageAddress, GRegion gRegion) {
		Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl, "Internal Bug");
		final DataPage dataPage = pageAddress.getDataPage();
		if (dataPage == null || !pageAddress.isPageValid()) {
			return 0;
		}
		final int dataPageSize = dataPage.getSize();

		if (readyToEvictDataPageMap.containsKey(pageAddress)) {
			throw new GeminiRuntimeException("Internal Bug");
		} else {
			readyToEvictDataPageMap.put(pageAddress, gRegion);
		}

		flushingPageSize.addAndGet(dataPageSize);
		preparedFlushedPageSize.addAndGet(dataPageSize);

		final EventExecutor flushEventExecutor = flushEventExecutorGroup.next();
		this.supervisor.getFileCache().addPage(pageAddress,
			gRegion.getGRegionContext(),
			flushEventExecutor,
			(success, throwable) -> {
				flushingPageSize.addAndGet(-dataPageSize);
				curThreadTotalFlushedSize.addAndGet(dataPageSize);
				dataPage.release();
				if (!success) {
					LOG.error("prepare flush {} failed", pageAddress, throwable);
					//TODO actually we needn't stop db here. for stability, wo throw the Exception for now.
					gContext.setDBInternalError(new GeminiRuntimeException("Prepare flush failed, " + throwable));
				}
			});

		return dataPageSize;
	}

	void addPageUsedMemory(int logicPageSize, boolean needEvict) {
		long mem = this.curThreadTotalPageUsedMem.addAndGet(logicPageSize);
		//do evict
		if (needEvict && mem > this.curThreadMemHighMark) {
			doEvict((int) (mem - this.curThreadMemHighMark));
		}
	}

	void doEvict(int expectedSize) {
		if (expectedSize <= 0) {
			return;
		}
		//No page is ready in the pool, try to fill the pool and return.
		if (readyToEvictDataPageMap.size() == 0 && preparedFlushedPageSize.get() == 0) {
			tryFillPool();
			LOG.info("EvictHandler doEvict NOT WORK expectedSize({}) {}", expectedSize, toString());
			return;
		}
		long startTime = System.currentTimeMillis();
		int evictedSize = 0;
		int totalScanPageCount = 0;
		long running;

		while (evictedSize < expectedSize && gContext.isDBNormal()) {
			running = flushingPageSize.get();
			Iterator> readyIterator = readyToEvictDataPageMap.entrySet().iterator();
			while (readyIterator.hasNext()) {
				Map.Entry entry = readyIterator.next();
				PageAddress pageAddress = entry.getKey();
				Preconditions.checkArgument(pageAddress instanceof PageAddressSingleImpl);
				totalScanPageCount++;

				//no need add reference.
				DataPage dataPage = pageAddress.getDataPageNoReference();
				if (dataPage == null) {
					if (!pageAddress.isPageValid()) {
						//later will remove it by removeInvalidPage
						continue;
					}
					throw new GeminiRuntimeException("Internal Bug");
				}
				if (this.supervisor.getFileCache().isCached(pageAddress)) {
					evictedSize += pageAddress.getDataLen();
					//for removing reference from PageAddress
					this.supervisor.getBloomFilterManager().addBloomFilter(pageAddress, dataPage);
					pageAddress.setDataPage(null);
					dataPage.release();
					entry.getValue().getGRegionContext().getPageStoreStats().addPageUsedMemory(entry.getValue(),
						-pageAddress.getDataLen(),
						false);
					readyIterator.remove();
					preparedFlushedPageSize.addAndGet(-pageAddress.getDataLen());
					curThreadTotalEvictedSize.addAndGet(pageAddress.getDataLen());
					cacheManager.getCacheStats().addPageCacheEvictSize(pageAddress.getDataLen());
				} else {
					//one by one evict
					continue;
				}

				if (evictedSize >= expectedSize) {
					break;
				}

			}

			long nowTime = System.currentTimeMillis();
			if (evictedSize < expectedSize) {
				//for corner case, for example a very huge page is evicted.
				if (this.curThreadTotalPageUsedMem.get() < curThreadMemHighMark) {
					break;
				}
				//slow flushing and some wrong condition both be regarded as block.
				this.cacheManager.getCacheStats().addEvictBlock(1);
				//for next time, ignore the running flushing page.
				tryPrepareFlush(expectedSize - evictedSize);
				//currentFlushingPageSize is enough, retry directly.
				if (running > expectedSize || flushingPageSize.get() > (expectedSize - evictedSize)) {
					continue;
				}

				//prepared flush pages are not enough.
				LOG.info(
					"EvictHandler doEvict NOT WORK,have run ({})ms,expectedSize({}), evictedSize({}), scanPageCount({}), currentRunning({}) {}",
					(nowTime - startTime),
					expectedSize,
					evictedSize,
					totalScanPageCount,
					running,
					toString());
			}
		}
	}

	@Override
	public String toString() {
		return MoreObjects.toStringHelper(this).
			add("name", name).
			add("curThreadMemLowMark", curThreadMemLowMark).
			add("curThreadMemMidMark", curThreadMemMidMark).
			add("curThreadMemHighMark", curThreadMemHighMark).
			add("curThreadTotalPageUsedMem", curThreadTotalPageUsedMem).
			add("maxPreparedFlushSize", maxPreparedFlushSize).
			add("flushingPageSize", flushingPageSize).
			add("preparedFlushedPageSize", preparedFlushedPageSize).
			add("readyToEvictDataPageMapCount", readyToEvictDataPageMap.size()).
			add("logicPagePriorityPoolSize", pagePriorityPool.size()).
			add("logicPagePriorityPoolDataLen", pagePriorityPool.dataSize()).
			add("curThreadTotalEvictedSize", curThreadTotalEvictedSize).
			add("curThreadTotalFlushedSize", curThreadTotalFlushedSize).
			add("dynamicPreparedFlushSize", dynamicPreparedFlushSize).
			add("pagePriorityPool", pagePriorityPool.toString()).toString();
	}

	public void addRegion(GRegion gRegion) {
		allRegions.put(gRegion, gRegion);
	}

	public long getMaxPreparedFlushSize() {
		return maxPreparedFlushSize;
	}

	@VisibleForTesting
	public int getDynamicPreparedFlushSize() {
		return dynamicPreparedFlushSize;
	}

	public long getCurThreadMemLowMark() {
		return curThreadMemLowMark;
	}

	@VisibleForTesting
	long getCurThreadTotalPageUsedMem() {
		return curThreadTotalPageUsedMem.get();
	}

	@VisibleForTesting
	public long getCurThreadMemMidMark() {
		return curThreadMemMidMark;
	}

	@VisibleForTesting
	EvictablePagePool getEvictablePagePool() {
		return pagePriorityPool;
	}

	@VisibleForTesting
	public Map getReadyToEvictDataPageMap() {
		return readyToEvictDataPageMap;
	}

	@VisibleForTesting
	long getPreparedFlushedPageSize() {
		return preparedFlushedPageSize.get();
	}

	public void shutdown() {
		pagePriorityPool.shutdown();
	}

	public String getName() {
		return name;
	}

	public long getLastTimeChangeMaxFlushSize() {
		return lastTimeChangeMaxFlushSize;
	}

	public EvictPolicy.MemoryUsedWaterMark getMemoryUsedWaterMark(int addSize) {
		if (curThreadTotalPageUsedMem.get() + addSize < curThreadMemLowMark) {
			return EvictPolicy.MemoryUsedWaterMark.Normal;
		} else if (curThreadTotalPageUsedMem.get() + addSize < curThreadMemMidMark) {
			return EvictPolicy.MemoryUsedWaterMark.Low;
		} else if (curThreadTotalPageUsedMem.get() + addSize < curThreadMemHighMark) {
			return EvictPolicy.MemoryUsedWaterMark.Middle;
		} else {
			return EvictPolicy.MemoryUsedWaterMark.High;
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy