All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.handler.EvictHandlerSepImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.handler;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.dbms.Supervisor;
import org.apache.flink.runtime.state.gemini.engine.exceptions.GeminiRuntimeException;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.LogicChainedPage;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.rm.ReferenceCount.ReleaseType;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;

import org.apache.flink.shaded.guava18.com.google.common.base.MoreObjects;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutor;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutorGroup;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

/**
 * EvictHandlerImpl.
 */
public class EvictHandlerSepImpl implements Handler {
	private static final Logger LOG = LoggerFactory.getLogger(EvictHandlerSepImpl.class);
	private final String name;
	private final GContext gContext;
	private final Supervisor supervisor;
	private final CacheManager cacheManager;
	private final EventExecutorGroup flushEventExecutorGroup;
	private final Map readyToEvictDataPageMap = new LinkedHashMap<>();
	private final AtomicLong preparedFlushedPageSize = new AtomicLong(0);
	private final AtomicLong runningFlushedPageSize = new AtomicLong(0);
	private final LogicPagePriorityPool pagePriorityPool;
	private final AtomicLong curThreadTotalPageUsedMem = new AtomicLong(0);
	private final AtomicLong curThreadTotalFlushedSize = new AtomicLong(0);
	private final AtomicLong curThreadTotalEvictedSize = new AtomicLong(0);
	private final long curThreadMemLowMark;
	private final long curThreadMemMidMark;
	private final long curThreadMemHighMark;
	private final long maxPreparedFlushSize;
	private final int batchSortCount;
	private volatile int autoFillCursor = 0;

	public EvictHandlerSepImpl(String name, GContext gContext) {
		this.name = name;
		this.gContext = gContext;
		this.supervisor = gContext.getSupervisor();
		this.cacheManager = gContext.getSupervisor().getCacheManager();
		this.flushEventExecutorGroup = gContext.getSupervisor().getFlushExecutorGroup();

		//TODO can be dynamic adjust ratio
		int totalThread = gContext.getGConfiguration().getRegionThreadNum();
		curThreadMemLowMark = cacheManager.getMemLowMark() / totalThread;
		curThreadMemMidMark = cacheManager.getMemMidMark() / totalThread;
		curThreadMemHighMark = cacheManager.getMemHighMark() / totalThread;
		maxPreparedFlushSize = curThreadMemMidMark / 20;
		batchSortCount = gContext.getGConfiguration().getBatchSortCount();
		this.pagePriorityPool = new LogicPagePriorityPool(batchSortCount, (long) (maxPreparedFlushSize * 1.5));
	}

	@Override
	public void handle() {
	}

	public void addEvictablePage(PageAddress pageAddress, GRegion gRegion) {
		if (this.curThreadTotalPageUsedMem.get() < curThreadMemLowMark) {
			return;
		}
		pagePriorityPool.add(pageAddress, gRegion);
		doPrepareFlush();
	}

	public void removeInvalidPage(GRegion gRegion, int curIndex, int relatedIndex, List pageAddressList) {
		//1. remove invalid Page
		int totalRemoveReadToEvictSize = 0;
		int totalRemoveCount = 0;
		for (PageAddress pageAddress : pageAddressList) {
			totalRemoveCount += pagePriorityPool.remove(pageAddress) ? 1 : 0;
			if (readyToEvictDataPageMap.remove(pageAddress) != null) {
				totalRemoveReadToEvictSize += pageAddress.getDataLen();
				totalRemoveCount++;
			}
		}

		if (totalRemoveCount > 0) {
			tryFillPool(gRegion, curIndex, relatedIndex, totalRemoveCount);
		}

		if (totalRemoveReadToEvictSize > 0) {
			preparedFlushedPageSize.addAndGet(-totalRemoveReadToEvictSize);
			//2. prepare flush another pages
			if (this.curThreadTotalPageUsedMem.get() < curThreadMemLowMark) {
				return;
			}
			doPrepareFlush();
		}
	}

	private int tryFillPool(GRegion gRegion, int curIndex, int relatedIndex, int minAddPage) {

		if (pagePriorityPool.getCurDataLen() >= maxPreparedFlushSize) {
			return curIndex;
		}
		//we need to feed pool.
		LogicChainedPage[] pages = gRegion.getPageStore().getPageIndex().getPageIndex();
		int scanCount = 0;
		int addedPage = 0;
		int cursor = curIndex + 1;
		int indexDeep = 0;
		while (indexDeep < 3 && addedPage < minAddPage && gContext.isDBNormal()) {
			if (cursor >= pages.length) {
				cursor = 0;
			}
			if (cursor == curIndex) {
				indexDeep++;
				cursor = curIndex + 1;
				continue;
			}
			if (cursor == relatedIndex) {
				//skip the split or compact page. because it will be added soon.
				cursor++;
				continue;
			}
			scanCount++;
			if (scanCount >= pages.length * 3) {
				break;
			}
			LogicChainedPage logicChainedPage = pages[cursor];
			if (logicChainedPage != null && logicChainedPage.getCurrentPageChainIndex() >= indexDeep) {
				//only check index 0, best-efforts to feed the pool.
				PageAddress pageAddress = logicChainedPage.getPageAddress(indexDeep);
				if (pageAddress != null && pageAddress.getDataPageNoReference() != null) {
					//thread safe, so we don't need to add reference.
					if (!pagePriorityPool.dataMap.containsKey(pageAddress) && !readyToEvictDataPageMap.containsKey(
						pageAddress)) {
						pagePriorityPool.add(pageAddress, gRegion);
						addedPage++;
						if (addedPage >= minAddPage) {
							break;
						}
					}
				}
			}
			cursor++;
		}
		LOG.debug("tryFillPool scanCount=" + scanCount + " ,minAddPage=" + minAddPage + " ,addedPage=" + addedPage + " ,cursor=" + cursor + " ,pages.length=" + pages.length);
		return cursor >= pages.length ? 0 : cursor;
	}

	private void doPrepareFlush() {
		long needPrepareFlush = maxPreparedFlushSize - preparedFlushedPageSize.get();
		if (needPrepareFlush <= 0) {
			return;
		}

		if (this.curThreadTotalPageUsedMem.get() < curThreadMemMidMark) {
			return;
		}

		long flushedSize = 0;

		//no need to sort, that's a small pool.
		Iterator> pagesIterator = pagePriorityPool.dataMap.entrySet().iterator();
		int scanPage = 0;
		while (pagesIterator.hasNext()) {
			final Map.Entry entry = pagesIterator.next();
			final DataPage dataPage = entry.getKey().getDataPage();
			if (dataPage == null) {
				throw new GeminiRuntimeException("Internal Bug");
			}
			final int dataPageSize = dataPage.getSize();
			pagesIterator.remove();
			pagePriorityPool.addDataLen(-dataPageSize);
			scanPage++;

			if (readyToEvictDataPageMap.containsKey(entry.getKey())) {
				throw new GeminiRuntimeException("Internal Bug");
			} else {
				readyToEvictDataPageMap.put(entry.getKey(), entry.getValue());
			}

			runningFlushedPageSize.addAndGet(dataPageSize);
			preparedFlushedPageSize.addAndGet(dataPageSize);

			final EventExecutor flushEventExecutor = flushEventExecutorGroup.next();
			gContext.getSupervisor().getFileCache().addPage(entry.getKey(),
				entry.getValue().getGRegionContext(),
				flushEventExecutor,
				(success, throwable) -> {
					runningFlushedPageSize.addAndGet(-dataPageSize);
					curThreadTotalFlushedSize.addAndGet(dataPageSize);
					dataPage.delReferenceCount(ReleaseType.Normal);
					if (!success) {
						LOG.error("prepare flush {} failed, {}", entry.getKey(), throwable);
						gContext.setDBInternalError(new GeminiRuntimeException("Prepare flush failed, " + throwable));
					}
				});

			flushedSize += dataPageSize;
			if (flushedSize >= needPrepareFlush) {
				break;
			}
		}

		LOG.debug(
			"EvictHandler doFlushRegion totalPreparePool({}) readyToEvictDataPageMap({}) expectedSize ({}) flushedPageSize({}) scanPage({}) preparedFlushedPageSize({}),runningFlushedPageSize({})",
			pagePriorityPool.size(),
			readyToEvictDataPageMap.size(),
			needPrepareFlush,
			flushedSize,
			scanPage,
			preparedFlushedPageSize.get(),
			runningFlushedPageSize.get());
	}

	public void addPageUsedMemory(GRegion gRegion, int logicPageSize, boolean needEvict) {
		this.curThreadTotalPageUsedMem.addAndGet(logicPageSize);
		//do evict
		if (needEvict) {
			doEvict(gRegion, logicPageSize);
		}
	}

	private void doEvict(GRegion gRegion, int expectedSize) {
		if (this.curThreadTotalPageUsedMem.get() < this.curThreadMemHighMark) {
			return;
		}
		//no any page is readied on pool, just return.
		if (readyToEvictDataPageMap.size() == 0 && preparedFlushedPageSize.get() == 0) {
			autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
			return;
		}
		long startTime = System.currentTimeMillis();
		int evictedSize = 0;
		int totalScanPageCount = 0;
		int totalEvictPageCount = 0;

		while (evictedSize < expectedSize && gContext.isDBNormal()) {
			long curRunningSize = runningFlushedPageSize.get();
			Iterator> readyIterator = readyToEvictDataPageMap.entrySet().iterator();
			while (readyIterator.hasNext()) {
				Map.Entry entry = readyIterator.next();
				PageAddress pageAddress = entry.getKey();

				totalScanPageCount++;

				//no need add reference.
				DataPage dataPage = pageAddress.getDataPageNoReference();
				if (dataPage == null) {
					throw new GeminiRuntimeException("Internal Bug");
				}
				if (gContext.getSupervisor().getFileCache().isCached(pageAddress)) {
					evictedSize += dataPage.getSize();
					totalEvictPageCount++;
					//for removing reference from PageAddress
					pageAddress.setDataPage(null);
					if (dataPage.getGBinaryHashMap().getGByteBuffer().getCnt() != 1) {
						gContext.getSupervisor().getDiscardOrEvictPageReleaseManager().addMonitorPageStillHaveReference(
							dataPage.getGBinaryHashMap().getGByteBuffer(),
							ReleaseType.Discard,
							pageAddress);
					}
					dataPage.delReferenceCount(ReleaseType.Discard);
					entry.getValue().getGRegionContext().getPageStoreStats().addPageUsedMemory(entry.getValue(), -dataPage.getSize(),
						false);
					readyIterator.remove();
					curThreadTotalEvictedSize.addAndGet(dataPage.getSize());
					cacheManager.getCacheStats().addPageCacheEvictSize(dataPage.getSize());
					preparedFlushedPageSize.addAndGet(-dataPage.getSize());
				} else {
					//one by one evict
					continue;
				}

				if (evictedSize >= expectedSize) {
					break;
				}

			}

			long nowTime = System.currentTimeMillis();
			if (evictedSize < expectedSize) {
				//slow flushing and some wrong condition both be regarded as block.
				this.cacheManager.getCacheStats().addEvictBlock(1);
				if (curRunningSize >= (expectedSize - evictedSize)) {
					//force wait.
					while (curRunningSize - runningFlushedPageSize.get() < (expectedSize - evictedSize) && gContext.isDBNormal()) {
						autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
						LOG.info(
							"EvictHandler doEvict blocking {}ms ,have run ({})ms,expectedSize({}), evictedSize({}),scanPageCount({}) readyToEvictDataPageMap({}), beforeRunning({}), preparedFlushedPageSize({}), runningFlushedPageSize({})",
							(System.currentTimeMillis() - nowTime),
							(nowTime - startTime),
							expectedSize,
							evictedSize,
							totalScanPageCount,
							readyToEvictDataPageMap.size(),
							curRunningSize,
							preparedFlushedPageSize.get(),
							runningFlushedPageSize.get());
					}
				} else {
					LOG.info(
						"EvictHandler doEvict NOT WORK,have run ({})ms,expectedSize({}), evictedSize({}),scanPageCount({}) readyToEvictDataPageMap({}), preparedFlushedPageSize({}), runningFlushedPageSize({})",
						(nowTime - startTime),
						expectedSize,
						evictedSize,
						totalScanPageCount,
						readyToEvictDataPageMap.size(),
						preparedFlushedPageSize.get(),
						runningFlushedPageSize.get());
					autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, batchSortCount);
					doPrepareFlush();
					continue;
				}
			} else {
				autoFillCursor = tryFillPool(gRegion, autoFillCursor, autoFillCursor, totalEvictPageCount);
				break;
			}
		}
		LOG.debug(
			"EvictHandler doEvict totalPreparePool({}) readyToEvictDataPageMap({}) expectedSize ({}) evictedSize({}) scanPage({}) readyToEvictDataPageMap({}), preparedFlushedPageSize({}),runningFlushedPageSize({})",
			pagePriorityPool.size(),
			readyToEvictDataPageMap.size(),
			expectedSize,
			evictedSize,
			totalScanPageCount,
			readyToEvictDataPageMap.size(),
			preparedFlushedPageSize.get(),
			runningFlushedPageSize.get());
	}

	@Override
	public String toString() {
		return MoreObjects.toStringHelper(this).
			add("name", name).
			add("curThreadMemLowMark", curThreadMemLowMark).
			add("curThreadMemMidMark", curThreadMemMidMark).
			add("curThreadMemHighMark", curThreadMemHighMark).
			add("curThreadTotalPageUsedMem", curThreadTotalPageUsedMem).
			add("maxPreparedFlushSize", maxPreparedFlushSize).
			add("runningFlushedPageSize", runningFlushedPageSize).
			add("preparedFlushedPageSize", preparedFlushedPageSize).
			add("readyToEvictDataPageMapCount", readyToEvictDataPageMap.size()).
			add("logicPagePriorityPoolSize", pagePriorityPool.size()).
			add("logicPagePriorityPoolDataLen", pagePriorityPool.curDataLen).
			add("curThreadTotalEvictedSize", curThreadTotalEvictedSize).
			add("curThreadTotalFlushedSize", curThreadTotalFlushedSize).toString();
	}

	/**
	 * LogicPagePriorityPool.
	 * not thread safe.
	 */
	public static class LogicPagePriorityPool {
		HashMap dataMap = new HashMap<>();
		private long curDataLen = 0;
		private final long maxDataLen;
		private int curCount = 0;
		private final int batchSortCount;

		public LogicPagePriorityPool(int batchSortCount, long maxDataLen) {
			this.maxDataLen = maxDataLen;
			this.batchSortCount = batchSortCount;
		}

		public int size() {
			return dataMap.size();
		}

		public void add(PageAddress pageAddress, GRegion gRegion) {
			DataPage dataPage = pageAddress.getDataPage();
			if (dataPage == null) {
				return;
			}

			try {
				if (dataMap.put(pageAddress, gRegion) == null) {
					curDataLen += dataPage.getSize();

					if (curDataLen < maxDataLen) {
						return;
					}
					curCount++;
					//sorting each batchSortCount pages
					if (curCount < batchSortCount) {
						return;
					}

					//a costly sorting
					List dataList = dataMap.keySet().stream().map(k -> new SortedEntry(k)).collect(
						Collectors.toList());
					Collections.sort(dataList, Comparator.comparingDouble(SortedEntry::getValue));

					int index = dataList.size() - 1;
					while (curDataLen > maxDataLen && index >= 0) {
						PageAddress lastPageAddress = dataList.get(index).pageAddress;
						remove(lastPageAddress);
						index--;
					}

					curCount = 0;
				} else {
					throw new GeminiRuntimeException("InternalBug");
				}
			} finally {
				dataPage.delReferenceCount(ReleaseType.Normal);
			}
		}

		public boolean remove(PageAddress pageAddress) {
			if (dataMap.remove(pageAddress) != null) {
				DataPage dataPage = pageAddress.getDataPageNoReference();
				//no need reference
				if (dataPage == null) {
					throw new GeminiRuntimeException("InternalBug");
				}
				curDataLen -= dataPage.getSize();
				return true;
			}
			return false;
		}

		public void addDataLen(int dataLen) {
			curDataLen += dataLen;
		}

		@VisibleForTesting
		public HashMap getDataMap() {
			return dataMap;
		}

		@VisibleForTesting
		public long getCurDataLen() {
			return curDataLen;
		}

		@VisibleForTesting
		public long getSize() {
			return dataMap.size();
		}
	}

	/**
	 * SortedEntry.
	 */
	public static class SortedEntry {
		double sortedValue;
		PageAddress pageAddress;

		public SortedEntry(PageAddress pageAddress) {
			this.pageAddress = pageAddress;
			this.sortedValue = getComparableValueFromKey(pageAddress);
		}

		public double getValue() {
			return this.sortedValue;
		}
	}

	public static double getComparableValueFromKey(PageAddress pageAddress) {
		//requestCount small or compactionCount big
		DataPage dataPage = pageAddress.getDataPageNoReference();
		if (dataPage == null) {
			throw new GeminiRuntimeException("InternalBug");
		}
		long requestCount = pageAddress.getRequestCount();
		double requestCountDouble = requestCount == 0 ? (double) 1 : (double) requestCount;
		return requestCountDouble / dataPage.getCompactionCount();
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy