All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.vm.EvictablePagePoolSampleImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.vm;

import org.apache.flink.annotation.VisibleForTesting;
import org.apache.flink.runtime.state.gemini.engine.GRegion;
import org.apache.flink.runtime.state.gemini.engine.dbms.GContext;
import org.apache.flink.runtime.state.gemini.engine.page.DataPage;
import org.apache.flink.runtime.state.gemini.engine.page.LogicalPageChain;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddressCompositeImpl;
import org.apache.flink.runtime.state.gemini.engine.page.PageStore;

import org.apache.flink.shaded.guava18.com.google.common.base.MoreObjects;
import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.LinkedBlockingQueue;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicLong;
import java.util.stream.Collectors;

/**
 * EvictablePagePoolSampleImpl is to reduce the cost of sorting, so it's a sample pool of all pages.
 * it will contain both PageAddressSingleImpl and PageAddressCompositeImpl.
 * If a PageAddressCompositeImpl entered the pool, but only parts PageAddressSingleImpl of it is evicted,
 * it will be kept in this pool.
 * the key for performance is to keep this sample pool full,
 * not thread safe.
 */
public class EvictablePagePoolSampleImpl implements EvictablePagePool {
	private static final Logger LOG = LoggerFactory.getLogger(EvictablePagePoolSampleImpl.class);
	private final CacheManager cacheManager;
	//PageAddressComposite or PageAddressSingle
	private final Map dataPoolMap = new ConcurrentHashMap<>();
	//PageAddressComposite's memory size when adding to pool.
	private final Map dataPoolSizeMap = new ConcurrentHashMap<>();
	private final AtomicLong curDataLen = new AtomicLong(0);
	private final long maxDataLen;
	private final long extraMaxDataLen;
	private final int minSortedListCountForFlush;
	//for fast sort list.
	private volatile List lastOrderList = new ArrayList<>();
	private final EvictHandlerSepImpl evictHandlerSep;
	private final GContext gContext;
	private final AtomicBoolean fillPollRunning = new AtomicBoolean(false);
	private final ExecutorService fillPoolExecutor;
	private final Map regionCursorMap = new HashMap<>();
	private final int intervalFillPool = 1000;
	private volatile long lastFillPoolTime = System.currentTimeMillis();
	private final boolean evictBaseOnPageAddressComposite;

	public EvictablePagePoolSampleImpl(
		EvictHandlerSepImpl evictHandlerSep, GContext gContext, CacheManager cacheManager) {
		this.gContext = gContext;
		this.cacheManager = cacheManager;
		this.evictHandlerSep = evictHandlerSep;
		minSortedListCountForFlush = gContext.getGConfiguration().getMinSortedListCountForFlush();
		evictBaseOnPageAddressComposite = gContext.getGConfiguration().getEvictBaseOnPageAddressComposite();
		int factor = gContext.getGConfiguration().getEvictPoolFactor();
		long configMaxSize = evictHandlerSep.getMaxPreparedFlushSize() * factor;
		if (configMaxSize > evictHandlerSep.getCurThreadMemLowMark() >> 1) {
			//half of curThreadMemLowMark.
			configMaxSize = evictHandlerSep.getCurThreadMemLowMark() >> 1;
		}
		this.maxDataLen = configMaxSize;
		this.extraMaxDataLen = maxDataLen + (evictHandlerSep.getMaxPreparedFlushSize() << 1);
		String prefix = gContext.getGConfiguration().getExecutorPrefixName();
		ThreadFactory namedThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "EvictablePagePoolSampleImpl-%d").build();
		this.fillPoolExecutor = new ThreadPoolExecutor(1,
			1,
			0L,
			TimeUnit.MILLISECONDS,
			new LinkedBlockingQueue<>(Short.MAX_VALUE),
			namedThreadFactory);
	}

	@Override
	public int size() {
		return dataPoolMap.size();
	}

	@Override
	public long dataSize() {
		return curDataLen.get();
	}

	@Override
	public boolean remove(PageAddress pageAddress) {
		if (dataPoolMap.remove(pageAddress) != null) {
			//for PageAddressComposite, dataPoolSizeMap will record its subPage memory size.
			Long size;
			while (gContext.isDBNormal()) {
				//concurrent control with partialSubPageFlush.
				//corner case, not influence performance.
				size = dataPoolSizeMap.remove(pageAddress);
				if (size != null) {
					curDataLen.addAndGet(-size);
					return true;
				}
			}
		}
		return false;
	}

	@Override
	public void partialSubPageFlush(PageAddress pageAddress, int curFlushedSize) {
		Long current = dataPoolSizeMap.remove(pageAddress);
		if (current == null) {
			//already removed.
			return;
		}
		//when some subPage has loaded and put into cache, then negative will happen. set to 0 is ok.
		long now = current < curFlushedSize ? 0 : current - curFlushedSize;
		dataPoolSizeMap.put(pageAddress, now);
		curDataLen.addAndGet(now - current);
	}

	@Override
	public void add(PageAddress pageAddress, GRegion gRegion) {
		if (curDataLen.get() >= maxDataLen) {
			return;
		}
		internalAdd(pageAddress, gRegion, null);
	}

	@VisibleForTesting
	int internalAdd(PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
		if (evictBaseOnPageAddressComposite) {
			return internalAddPageAddressComposite(pageAddress, gRegion, existedInPool);
		} else {
			return internalAddOnlySinglePageAddress(pageAddress, gRegion, existedInPool);
		}
	}

	int internalAddOnlySinglePageAddress(
		PageAddress pageAddress, GRegion gRegion) {
		return internalAddOnlySinglePageAddress(pageAddress, gRegion, null);
	}

	int internalAddPageAddressComposite(
		PageAddress pageAddress, GRegion gRegion) {
		return internalAddPageAddressComposite(pageAddress, gRegion, null);
	}

	int internalAddOnlySinglePageAddress(
		PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
		//add singlePage to pool.
		Iterator pageAddressIterator = pageAddress.pageIterator();
		int totalAddedSize = 0;
		while (pageAddressIterator.hasNext()) {
			PageAddress subPage = pageAddressIterator.next();

			DataPage dataPage = subPage.getDataPageNoReference();
			if (dataPage == null) {
				continue;
			}
			//avoid hugePage to evict
			if (gRegion.getGRegionContext().isHugePage(dataPage)) {
				gRegion.getGRegionContext().addHugePage(dataPage);
				continue;
			}
			if (!evictHandlerSep.isPageAlreadyInEvict(subPage, subPage)) {
				totalAddedSize += doAdd(subPage, gRegion);
			} else {
				if (existedInPool != null) {
					existedInPool.incrementAndGet();
				}
			}
		}
		return totalAddedSize;
	}

	int internalAddPageAddressComposite(
		PageAddress pageAddress, GRegion gRegion, AtomicLong existedInPool) {
		Iterator pageAddressIterator = pageAddress.pageIterator();
		while (pageAddressIterator.hasNext()) {
			PageAddress subPage = pageAddressIterator.next();
			if (evictHandlerSep.isPageAlreadyInEvict(pageAddress, subPage)) {
				if (existedInPool != null) {
					existedInPool.incrementAndGet();
				}
				return 0;
			}
		}
		return doAdd(pageAddress, gRegion);
	}

	int doAdd(PageAddress pageAddress, GRegion gRegion) {
		int curMemorySize = pageAddress.getMemorySize();
		if (curMemorySize == 0) {
			return 0;
		}

		PageAddress lockPageAddress = pageAddress instanceof PageAddressCompositeImpl
			? ((PageAddressCompositeImpl) pageAddress).getMainPageAddress()
			: pageAddress;
		synchronized (lockPageAddress) {
			//for composite PageAddress, check main page Address.
			if (!pageAddress.isPageValid()) {
				return 0;
			}
			if (!dataPoolMap.containsKey(pageAddress) && !dataPoolSizeMap.containsKey(pageAddress)) {
				//for composite pageAddress, maybe there are only parts of subPage resident in memory.
				//first add dataPoolSizeMap.
				dataPoolSizeMap.put(pageAddress, (long) curMemorySize);
				dataPoolMap.put(pageAddress, gRegion);
				curDataLen.addAndGet(curMemorySize);
				return curMemorySize;
			}
		}
		return 0;
	}

	@Override
	public boolean containsPage(PageAddress pageAddress) {
		return dataPoolMap.containsKey(pageAddress);
	}

	@Override
	public List getSortedList() {
		//avoid long time blocking when filling pool.
		if (lastOrderList.size() == 0 && this.curDataLen.get() >= maxDataLen) {
			// true is to make sortedListAndArrangePool thread safe.
			return sortedListAndArrangePool(true);
		}
		return lastOrderList;
	}

	@VisibleForTesting
	List sortedListAndArrangePool(boolean fastSortList) {
		long tickTime = cacheManager.getCurrentTickTime();
		List dataList = dataPoolMap.entrySet().stream().map(entry -> {
			//only do an effort to avoid invalid page.
			PageAddress pageAddress = entry.getKey();
			DataPage dataPage = pageAddress.getDataPageNoReference();
			if (dataPage != null && pageAddress.isPageValid()) {
				return new SortedEntry(entry.getKey(), entry.getValue(), dataPage.score(tickTime));
			}
			return null;
		}).filter(Objects::nonNull).sorted(Comparator.comparingDouble(SortedEntry::getScore)).collect(Collectors.toList());

		if (!fastSortList) {
			int index = dataList.size() - 1;
			while (curDataLen.get() > maxDataLen && index >= 0) {
				PageAddress lastPageAddress = dataList.remove(index).pageAddress;
				remove(lastPageAddress);
				index--;
			}
		}
		//fast sorted list
		List result = dataList.subList(0, dataList.size() >> 1);
		lastOrderList = new ArrayList<>(result);
		return result;
	}

	@Override
	public boolean tryFillPool(Set regions) {
		//protect code.
		if (evictHandlerSep.getCurThreadTotalPageUsedMem() < evictHandlerSep.getCurThreadMemLowMark() - evictHandlerSep.getMaxPreparedFlushSize()) {
			return false;
		}

		if (!fillPollRunning.compareAndSet(false, true)) {
			return false;
		}

		long currentTime = System.currentTimeMillis();
		//sample pool is valid and not access the scheduled time. TODO we can dynamically adjust the intervalFillPool.
		//why need scan all regions, consider this situation:  one region was wrote some data and then never be accessed
		//, including compaction, write, or read. then this data will always be resident in memory and never can be evicted.
		if (isPoolValid(currentTime)) {
			fillPollRunning.compareAndSet(true, false);
			return false;
		}

		fillPoolExecutor.submit(() -> tryAsyncFillPool(regions, currentTime));

		return true;
	}

	@VisibleForTesting
	void tryAsyncFillPool(Set regions, long syncStartTime) {
		Set candidateRegions = regions;
		boolean isModifiable = false;
		try {
			//internalAdd an extra maxPreparedFlushSize to dataPool, to implement a slide window(bubble sort).
			int loop = 0;
			int filledSize = 0;
			int needAddSize = (int) (extraMaxDataLen - curDataLen.get());
			while (filledSize < needAddSize && gContext.isDBNormal()) {
				List regionAssign = assignToRegion(candidateRegions, needAddSize - filledSize);
				loop++;
				boolean print = loop % 100 == 0;
				int thisLoopSize = -1;
				for (RegionChosen chosenRegion : regionAssign) {
					if (chosenRegion.assignSize == 0) {
						continue;
					}
					thisLoopSize = doFillPoolPerRegion(chosenRegion, print);
					// there is no data in this region to choose, so remove it from the candidate set
					if (thisLoopSize == 0) {
						if (!isModifiable) {
							// copy regions lazily to support modification
							candidateRegions = new HashSet<>(regions);
							isModifiable = true;
						}
						candidateRegions.remove(chosenRegion.gRegion);
					}
					filledSize += thisLoopSize;
				}

				//after fill page from all region, check whether need to continue.
				if (extraMaxDataLen <= curDataLen.get()) {
					break;
				}

				if (print) {
					LOG.info(
						"tryFillPool has run {}, cur needAddSize={} filledSize={} thisLoopSize={} current regionCount({}) audit({})",
						loop,
						needAddSize,
						filledSize,
						thisLoopSize,
						regions.size(),
						audit(regions));
				}

				//TODO add debug info.
				if (candidateRegions.isEmpty()) {
					LOG.info(
						"tryFillPool NO Candidate, has run {}, cur needAddSize={} filledSize={} thisLoopSize={} current regionCount({}) audit({})",
						loop,
						needAddSize,
						filledSize,
						thisLoopSize,
						regions.size(),
						audit(regions));
					break;
				}
			}
			sortedListAndArrangePool(false);

		} catch (Throwable e) {
			LOG.error("Internal Bug!", e);
		} finally {
			long runTime = (System.currentTimeMillis() - syncStartTime);
			cacheManager.getCacheStats().addFillPoolTime(runTime);
			if (runTime > intervalFillPool) {
				LOG.error("tryFillPool TOO SLOW! {} (ms)", runTime);
			}
			lastFillPoolTime = syncStartTime;
			fillPollRunning.compareAndSet(true, false);
		}
	}

	boolean isPoolValid(long currentTime) {
		return lastOrderList.size() > minSortedListCountForFlush && curDataLen.get() >= (maxDataLen >> 1) && currentTime - lastFillPoolTime < intervalFillPool;
	}

	List assignToRegion(Set regions, int wantedSize) {
		//TODO support decide which region is always kept in memory.
		// actually, it's only an effort, if no memory here, we need ignore this description.

		List result = new ArrayList<>();

		long canChoseSize = 0;
		for (GRegion gRegion : regions) {
			RegionChosen regionChosen = regionCursorMap.computeIfAbsent(gRegion, RegionChosen::new);
			regionChosen.usedMemory = gRegion.getGRegionContext().getPageStoreStats().getPageUsedMemory();
			regionChosen.assignSize = 0;
			canChoseSize += regionChosen.usedMemory;
			result.add(regionChosen);
		}
		if (canChoseSize <= wantedSize << 1) {
			//pool max size is half of curThreadMemLowMark.
			wantedSize = (int) (canChoseSize >> 1);
		}

		Set removeRegionChosen = new HashSet<>();

		int curLoopWanted = wantedSize;
		int curRegionSize = regions.size();
		out:
		while (curLoopWanted > 0 && gContext.isDBNormal()) {
			int averageWantedSize = curLoopWanted / curRegionSize + (curLoopWanted % curRegionSize == 0 ? 0 : 1);
			for (RegionChosen regionChosen : result) {
				if (regionChosen.usedMemory <= (regionChosen.assignSize << 1)) {
					if (!removeRegionChosen.contains(regionChosen)) {
						removeRegionChosen.add(regionChosen);
						curRegionSize--;
					}
					continue;
				}
				if (regionChosen.usedMemory - regionChosen.assignSize > (averageWantedSize << 1)) {
					int curAssign = checkSize(curLoopWanted < averageWantedSize ? curLoopWanted : averageWantedSize,
						regionChosen);
					regionChosen.assignSize += curAssign;
					curLoopWanted -= curAssign;
					if (curLoopWanted <= 0) {
						break out;
					}
				} else if (regionChosen.usedMemory - regionChosen.assignSize > averageWantedSize) {
					//fair assign, make sure all regions can be enter the pool.
					int curAssign = checkSize(curLoopWanted < averageWantedSize >> 1
						? curLoopWanted
						: averageWantedSize >> 1, regionChosen);
					regionChosen.assignSize += curAssign;
					curLoopWanted -= curAssign;
					if (curLoopWanted <= 0) {
						break out;
					}
				}
			}
			//again, cal averageWantedSize and assign.
		}
		return result;
	}

	private int checkSize(int curAssign, RegionChosen regionChosen) {
		int result = curAssign;
		if (curAssign + regionChosen.assignSize > (regionChosen.usedMemory >> 1)) {
			result = (int) ((regionChosen.usedMemory >> 1) - regionChosen.assignSize);
		}
		return result;
	}

	private String audit(Set regions) {
		try {
			long totalPageAddressCount = 0;
			long totalPageSize = 0;
			long totalPageInMemSize = 0;

			for (GRegion region : regions) {
				PageStore pageStore = region.getPageStore();
				Iterator pageAddressIterator = pageStore.getPageIndex().pageIterator();
				while (pageAddressIterator.hasNext()) {
					totalPageAddressCount++;
					PageAddress pageAddress = pageAddressIterator.next();
					totalPageSize += pageAddress.getDataLen();
					totalPageInMemSize += pageAddress.getMemorySize();
				}
			}
			return String.format("totalPageAddressCount=%s,totalPageSize=%s,totalPageInMemSize=%s %s",
				totalPageAddressCount,
				totalPageSize,
				totalPageInMemSize,
				evictHandlerSep);
		} catch (Exception e) {
			LOG.warn("audit error", e);
			return "audit error;";
		}
	}

	@VisibleForTesting
	int doFillPoolPerRegion(
		RegionChosen regionChosen, boolean print) {
		//fill the pool.
		LogicalPageChain[] pages = regionChosen.gRegion.getPageStore().getPageIndex().getPageIndex();
		int scanCount = 0;
		int addedSize = 0;
		int cursor = regionChosen.cursor;
		int addedPerPageAddress;
		AtomicLong existedInPool = new AtomicLong(0);

		while (addedSize < regionChosen.assignSize && gContext.isDBNormal()) {

			scanCount++;

			LogicalPageChain logicalPageChain = pages[cursor];

			if (logicalPageChain != null) {
				for (int i = 0; i <= logicalPageChain.getCurrentPageChainIndex(); i++) {
					PageAddress pageAddress = logicalPageChain.getPageAddress(i);
					if (pageAddress != null) {
						addedPerPageAddress = internalAdd(pageAddress, regionChosen.gRegion, existedInPool);
						if (addedPerPageAddress > 0) {
							//only internalAdd lowest chain page once.
							addedSize += addedPerPageAddress;
							break;
						}
					}
				}
			}

			cursor++;
			if (cursor >= pages.length) {
				cursor = 0;
			}
			if (cursor == regionChosen.cursor) {
				//one loop once.
				break;
			}
		}
		if (print || addedSize == 0) {
			LOG.info(
				"tryFillPool({}) scanCount={}, regionChosen={}, cursor={}, addedSize={}, existedPage={} pages.length={}, dataPool={},lastOrderList={}",
				evictHandlerSep.getName(),
				scanCount,
				regionChosen,
				cursor,
				addedSize,
				existedInPool.get(),
				pages.length,
				dataPoolMap.size(),
				lastOrderList.size());
		}
		regionChosen.cursor = cursor;
		return addedSize;
	}

	@Override
	public String toString() {
		return MoreObjects.toStringHelper(this).
			add("name", evictHandlerSep.getName()).
			add("poolCount", size()).
			add("poolDataSize", dataSize()).
			add("lastOrderList", lastOrderList == null ? 0 : lastOrderList.size()).
			add("maxDataLen", maxDataLen).
			add("lastFillPoolTime", lastFillPoolTime).toString();
	}

	@Override
	public void shutdown() {
		fillPoolExecutor.shutdownNow();
	}

	static class RegionChosen {
		private final GRegion gRegion;
		int cursor = 0;
		long usedMemory = 0;
		int assignSize = 0;

		RegionChosen(GRegion gRegion) {
			this.gRegion = gRegion;
		}

		GRegion getgRegion() {
			return gRegion;
		}

		@Override
		public String toString() {
			return MoreObjects.toStringHelper(this).
				add("gRegion", gRegion.getRegionId()).
				add("cursor", cursor).
				add("usedMemory", usedMemory).
				add("assignSize", assignSize).toString();
		}
	}

	@VisibleForTesting
	Map getDataPoolMap() {
		return dataPoolMap;
	}

	@VisibleForTesting
	public long getMaxDataLen() {
		return maxDataLen;
	}

	@VisibleForTesting
	public long getExtraMaxDataLen() {
		return extraMaxDataLen;
	}

	@VisibleForTesting
	public boolean getFillPollRunning() {
		return fillPollRunning.get();
	}

	public boolean isEvictBaseOnPageAddressComposite() {
		return evictBaseOnPageAddressComposite;
	}

	@VisibleForTesting
	public Map getRegionCursorMap() {
		return regionCursorMap;
	}

	@VisibleForTesting
	public long getLastFillPoolTime() {
		return lastFillPoolTime;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy