All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.engine.dbms.SupervisorImpl Maven / Gradle / Ivy

There is a newer version: 1.5.1
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini.engine.dbms;

import org.apache.flink.core.fs.Path;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GRegionContext;
import org.apache.flink.runtime.state.gemini.engine.GRegionID;
import org.apache.flink.runtime.state.gemini.engine.GTable;
import org.apache.flink.runtime.state.gemini.engine.filecache.FileCache;
import org.apache.flink.runtime.state.gemini.engine.filecompaction.FileCompaction;
import org.apache.flink.runtime.state.gemini.engine.filecompaction.FileCompactionImpl;
import org.apache.flink.runtime.state.gemini.engine.filecompaction.FileCompactionPageTransfer;
import org.apache.flink.runtime.state.gemini.engine.filecompaction.NoFileCompaction;
import org.apache.flink.runtime.state.gemini.engine.fs.FileCleaner;
import org.apache.flink.runtime.state.gemini.engine.fs.FileCleanerImpl;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManager;
import org.apache.flink.runtime.state.gemini.engine.fs.FileManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.fs.PersistenceStrategy;
import org.apache.flink.runtime.state.gemini.engine.fs.PersistenceStrategyFactory;
import org.apache.flink.runtime.state.gemini.engine.handler.GeminiEventExecutorGroup;
import org.apache.flink.runtime.state.gemini.engine.memstore.WriteBufferManager;
import org.apache.flink.runtime.state.gemini.engine.memstore.WriteBufferManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.metrics.EventExecutorMetrics;
import org.apache.flink.runtime.state.gemini.engine.page.DfsDataPageUtil;
import org.apache.flink.runtime.state.gemini.engine.page.LocalDataPageUtil;
import org.apache.flink.runtime.state.gemini.engine.page.LogicalPageChain;
import org.apache.flink.runtime.state.gemini.engine.page.PageAddress;
import org.apache.flink.runtime.state.gemini.engine.page.PageContext;
import org.apache.flink.runtime.state.gemini.engine.page.PageIndex;
import org.apache.flink.runtime.state.gemini.engine.rm.Allocator;
import org.apache.flink.runtime.state.gemini.engine.rm.GByteBuffer;
import org.apache.flink.runtime.state.gemini.engine.rm.GarbageReleaseManager;
import org.apache.flink.runtime.state.gemini.engine.rm.GarbageReleaseManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.LeakDetector;
import org.apache.flink.runtime.state.gemini.engine.rm.LeakDetectorImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.PoolAllocatorNettyImpl;
import org.apache.flink.runtime.state.gemini.engine.rm.UnpoolAllocatorImpl;
import org.apache.flink.runtime.state.gemini.engine.snapshot.BackendSnapshotMeta;
import org.apache.flink.runtime.state.gemini.engine.snapshot.DBSnapshotResult;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManager;
import org.apache.flink.runtime.state.gemini.engine.snapshot.SnapshotManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.vm.BloomFilterManager;
import org.apache.flink.runtime.state.gemini.engine.vm.BloomFilterManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManager;
import org.apache.flink.runtime.state.gemini.engine.vm.CacheManagerImpl;
import org.apache.flink.runtime.state.gemini.engine.vm.DataPageLRU;
import org.apache.flink.runtime.state.gemini.engine.vm.FetchPolicy;
import org.apache.flink.runtime.state.gemini.engine.vm.FetchPolicyImpl;
import org.apache.flink.runtime.state.gemini.engine.vm.NoBloomFilterManagerImpl;
import org.apache.flink.util.Preconditions;

import org.apache.flink.shaded.guava18.com.google.common.util.concurrent.ThreadFactoryBuilder;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.EventExecutorGroup;
import org.apache.flink.shaded.netty4.io.netty.util.concurrent.Future;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.Closeable;
import java.util.ArrayList;
import java.util.List;
import java.util.Map;
import java.util.concurrent.ThreadFactory;
import java.util.concurrent.TimeUnit;

import static org.apache.flink.runtime.state.gemini.engine.page.PageContext.CacheStatus.CACHING_TO_MAIN;
import static org.apache.flink.runtime.state.gemini.engine.page.PageStatus.Compacting;
import static org.apache.flink.runtime.state.gemini.engine.page.PageStatus.Init;

/**
 * GeminiSupervisor.
 */
public class SupervisorImpl implements Supervisor {
	private static final Logger LOG = LoggerFactory.getLogger(SupervisorImpl.class);
	private final GContext gContext;

	private final Allocator allocator;

	//used by some compaction value, such as list value/map value. recommend to use unpool and on-heap.
	private final Allocator defaultAllocator = new UnpoolAllocatorImpl();

	private final Allocator forReadAllocator;

	private final WriteBufferManager writeBufferManager;

	private final CacheManager cacheManager;

	private final SnapshotManager snapshotManager;

	private final FileManager localFileManager;

	private final FileManager dfsFileManager;

	private final FileCache fileCache;

	private final FileCleaner fileCleaner;

	private final FileCompaction fileCompaction;

	private final GeminiEventExecutorGroup regionExecutorGroup;

	private final GeminiEventExecutorGroup flusherExecutorGroup;

	//TODO #SR will unify the other executor groups after using deaggregative mode.
	//TODO #SR current writer just will be closed by EventExecutorGroup, so multiple checkpoint
	//  will reuse the same underlying file, do we need to handle this?
	private final GeminiEventExecutorGroup snapshotExecutorGroup;

	private final GeminiEventExecutorGroup compactionExecutorGroup;

	private final GeminiEventExecutorGroup lruIntoMainCacheExecutorGroup;

	private final FetchPolicy fetchPolicy;

	private final GarbageReleaseManager garbageReleaseManager;

	private final LeakDetector leakDetector;

	private final PersistenceStrategy persistenceStrategy;
	private final BloomFilterManager bloomFilterManager;

	public SupervisorImpl(GContext gContext) {
		gContext.setSupervisor(this);
		this.gContext = Preconditions.checkNotNull(gContext);

		this.writeBufferManager = new WriteBufferManagerImpl(gContext);

		GConfiguration gConfiguration = gContext.getGConfiguration();

		this.cacheManager = new CacheManagerImpl(this.gContext);

		//for normal use, such as write page.
		if (gContext.getMemoryInfo().isUseOffHeap()) {
			//if use offheap globally.
			this.garbageReleaseManager = new GarbageReleaseManagerImpl<>(this.gContext);
			this.leakDetector = new LeakDetectorImpl();
			forReadAllocator = new PoolAllocatorNettyImpl(gContext, garbageReleaseManager, leakDetector);
			this.allocator = forReadAllocator;
		} else {
			this.allocator = new UnpoolAllocatorImpl();
			//for read, we recommend to use forReadAllocator. because it provide better performance for IO access.
			if (gContext.getMemoryInfo().isUseOffheapForRead()) {
				this.garbageReleaseManager = new GarbageReleaseManagerImpl<>(this.gContext);
				this.leakDetector = new LeakDetectorImpl();
				forReadAllocator = new PoolAllocatorNettyImpl(gContext, garbageReleaseManager, leakDetector);
			} else {
				this.garbageReleaseManager = null;
				this.leakDetector = null;
				forReadAllocator = allocator;
			}
		}

		this.fileCleaner = new FileCleanerImpl(gContext);

		this.localFileManager = new FileManagerImpl(gContext,
			"local",
			new Path(gConfiguration.getLocalPath()),
			false,
			new LocalDataPageUtil(forReadAllocator, gConfiguration.isChecksumEnable()));
		this.fileCleaner.registerFileManager(this.localFileManager);

		this.dfsFileManager = new FileManagerImpl(gContext,
			"dfs",
			new Path(gConfiguration.getDfsPath()),
			true,
			new DfsDataPageUtil(gConfiguration.isChecksumEnable()));
		this.fileCleaner.registerFileManager(this.dfsFileManager);

		this.fileCache = FileCache.createFileCache(gContext, localFileManager, dfsFileManager);

		// TODO how to do it elegantly
		if (gConfiguration.isFileCompactionEnabled()) {
			this.fileCompaction = new FileCompactionImpl(gContext, (FileCompactionPageTransfer) fileCache);
		} else {
			this.fileCompaction = new NoFileCompaction();
		}

		this.snapshotManager = new SnapshotManagerImpl(this.gContext,
			writeBufferManager,
			localFileManager,
			dfsFileManager);
		this.fetchPolicy = new FetchPolicyImpl(gContext,
			cacheManager.getCacheStats(),
			new DataPageLRU<>(gContext.getStartRegionId(),
					gContext.getEndRegionId(),
					cacheManager.getReadPageCacheLRUSize(),
					createLRUFunction(),
				gConfiguration.isEnableLruAccessMode(),
				gConfiguration.isEnableEvictRegionEven(),
				cacheManager.getCacheStats()));

		String prefix = gConfiguration.getExecutorPrefixName();
		ThreadFactory regionThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "geminiRegion-%d").build();
		this.regionExecutorGroup = new GeminiEventExecutorGroup(gContext.getGConfiguration().getRegionThreadNum(),
			regionThreadFactory,
			gContext.getGConfiguration().getCommonThreadSleepTimeNs(),
			gContext);
		ThreadFactory flushThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "geminiFlush-%d").build();
		this.flusherExecutorGroup = new GeminiEventExecutorGroup(gContext.getGConfiguration().getFlushThreadNum(),
			flushThreadFactory,
			gContext.getGConfiguration().getCommonThreadSleepTimeNs(),
			gContext);
		ThreadFactory snapshotThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "geminiSnapshot-%d").build();
		this.snapshotExecutorGroup = new GeminiEventExecutorGroup(gContext.getGConfiguration().getSnapshotThreadNum(),
			snapshotThreadFactory,
			gContext.getGConfiguration().getCommonThreadSleepTimeNs(),
			gContext);
		ThreadFactory compactionThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "geminiCompaction-%d").build();
		this.compactionExecutorGroup = new GeminiEventExecutorGroup(gContext.getGConfiguration().getCompactionThreadNum(),
			compactionThreadFactory,
			gContext.getGConfiguration().getCommonThreadSleepTimeNs(),
			gContext);
		MetricGroup metricGroup = gContext.getDBMetricGroup();
		if (metricGroup != null) {
			EventExecutorMetrics executorMetrics = new EventExecutorMetrics(metricGroup.addGroup("executor").addGroup("waiting"), gContext.
			getGConfiguration().getMetricSampleCount(), gContext.getGConfiguration().getMetricHistogramWindowSize());
			executorMetrics.register(this.snapshotExecutorGroup, this.flusherExecutorGroup, this.regionExecutorGroup, this.compactionExecutorGroup);
		}

		this.persistenceStrategy = PersistenceStrategyFactory.INSTANCE.create(gContext);
		ThreadFactory lruIntoMainThreadFactory = new ThreadFactoryBuilder().setNameFormat(prefix + "lruIntoMain-%d").build();
		this.lruIntoMainCacheExecutorGroup = new GeminiEventExecutorGroup(gContext.getGConfiguration().getLruIntoMainCacheThreadNum(),
			lruIntoMainThreadFactory,
			gContext.getGConfiguration().getCommonThreadSleepTimeNs(), gContext);
		if (gConfiguration.isEnableBloomFilter()) {
			this.bloomFilterManager = new BloomFilterManagerImpl((long) (gContext.getMemoryInfo().getTotalHeapSize() * gConfiguration.getBloomFilterMemRate()),
				gConfiguration.getRegionThreadNum() + 1);
		} else {
			this.bloomFilterManager = new NoBloomFilterManagerImpl();
		}

		LOG.info("Supervisor is created");
	}

	@Override
	public void start() {
		this.cacheManager.start();
		this.fileCleaner.start();
		this.localFileManager.start();
		this.dfsFileManager.start();
		this.fileCompaction.start();
		if (this.garbageReleaseManager != null) {
			this.garbageReleaseManager.start();
		}
		LOG.info("Supervisor is started");
	}

	@Override
	public void stop() {
		ArrayList futureList = new ArrayList<>();
		futureList.add(regionExecutorGroup.shutdownGracefully(100, 500, TimeUnit.MILLISECONDS));
		futureList.add(flusherExecutorGroup.shutdownGracefully(100, 500, TimeUnit.MILLISECONDS));
		futureList.add(compactionExecutorGroup.shutdownGracefully(100, 500, TimeUnit.MILLISECONDS));
		futureList.add(snapshotExecutorGroup.shutdownGracefully(100, 500, TimeUnit.MILLISECONDS));
		futureList.add(lruIntoMainCacheExecutorGroup.shutdownGracefully(100, 500, TimeUnit.MILLISECONDS));
		closeQueitly(fetchPolicy, "FetchPolicy");
		closeQueitly(cacheManager, "CacheManager");
		closeQueitly(snapshotManager, "SnapshotManager");
		closeQueitly(fileCompaction, "FileCompaction");
		closeQueitly(fileCache, "FileCache");
		closeQueitly(localFileManager, "LocalFileManager");
		closeQueitly(dfsFileManager, "DFSFileManager");
		closeQueitly(fileCleaner, "FileCleaner");
		closeQueitly(bloomFilterManager, "BloomFilterManager");
		// Wait to exit gracefully, prevent memory leak.
		for (Future future : futureList) {
			future.awaitUninterruptibly(1, TimeUnit.MINUTES);
		}
	}

	@Override
	public void close() {
		if (garbageReleaseManager != null) {
			closeQueitly(garbageReleaseManager, "GarbageReleaseManager");
		}
		if (leakDetector != null) {
			closeQueitly(leakDetector, "LeakDetector");
		}
	}

	@Override
	public java.util.concurrent.Future startSnapshot(BackendSnapshotMeta backendSnapshotMeta) {
		return snapshotManager.startSnapshot(backendSnapshotMeta);
	}

	@Override
	public SnapshotManager.PendingSnapshot getPendingSnapshot(long checkpointId) {
		return snapshotManager.getPendingSnapshot(checkpointId);
	}

	@Override
	public Allocator getAllocator() {
		return allocator;
	}

	@Override
	public Allocator getDefaultAllocator() {
		return defaultAllocator;
	}

	@Override
	public Allocator getForReadAllocator() {
		return forReadAllocator;
	}

	@Override
	public WriteBufferManager getWriteBufferManager() {
		return this.writeBufferManager;
	}

	@Override
	public CacheManager getCacheManager() {
		return this.cacheManager;
	}

	@Override
	public SnapshotManager getSnapshotManager() {
		return this.snapshotManager;
	}

	@Override
	public FileManager getLocalFileManager() {
		return localFileManager;
	}

	@Override
	public FileManager getDfsFileManager() {
		return dfsFileManager;
	}

	@Override
	public FileCache getFileCache() {
		return fileCache;
	}

	@Override
	public FileCleaner getFileCleaner() {
		return fileCleaner;
	}

	@Override
	public FileCompaction getFileCompaction() {
		return fileCompaction;
	}

	@Override
	public EventExecutorGroup getRegionExecutorGroup() {
		return regionExecutorGroup;
	}

	@Override
	public EventExecutorGroup getFlushExecutorGroup() {
		return flusherExecutorGroup;
	}

	@Override
	public EventExecutorGroup getSnapshotExecutorGroup() {
		return snapshotExecutorGroup;
	}

	@Override
	public EventExecutorGroup getCompactionExecutorGroup() {
		return compactionExecutorGroup;
	}

	@Override
	public EventExecutorGroup getLruIntoMainCacheExecutorGroup() {
		return lruIntoMainCacheExecutorGroup;
	}

	@Override
	public void discardPage(GRegionContext gRegionContext, List pageAddressList) {
		for (PageAddress pageAddress : pageAddressList) {
			pageAddress.discard(fileCache, gRegionContext, null);
		}
	}

	@Override
	public Map getAllTables() {
		return gContext.getGeminiDB().getGeminiTableMap();
	}

	@Override
	public FetchPolicy getFetchPolicy() {
		return this.fetchPolicy;
	}

	@Override
	public GarbageReleaseManager getGarbageReleaseManager() {
		return garbageReleaseManager;
	}

	@Override
	public LeakDetector getLeakDetector() {
		return leakDetector;
	}

	@Override
	public PersistenceStrategy getPersistencyStrategy() {
		return persistenceStrategy;
	}

	private DataPageLRU.DataPageLRUFuction createLRUFunction() {
		return new DataPageLRU.DataPageLRUFuction() {
			@Override
			public int size(DataPageLRU.PageWithContext value) {
				return value.getFutureDataPage().getSize();
			}

			@Override
			public void removed(DataPageLRU.PageWithContext value) {
				value.getFutureDataPage().removed();
			}

			@Override
			public int getSlotIndex(DataPageLRU.PageWithContext value, int offset) {
				return value.getPageContext() == null ? 0 : value.getPageContext().getGRegionID().getId() - offset;
			}

			@Override
			public boolean canAddIntoMainCache(DataPageLRU.PageWithContext value, PageIndex pageIndex, GRegionID expectedRegionId) {
				PageContext context = value.getPageContext();
				// filter out invalid context
				if (context == null) {
					return false;
				}

				// already trying added into main cache.
				if (context.getCacheStatus() == CACHING_TO_MAIN) {
					return false;
				}

				if (context.getLogicPageIndex() < 0) {
					return false;
				}

				// we may have async prefetch, if the prefetch has not been done or it's failed.
				if (!value.getFutureDataPage().isDone() || value.getFutureDataPage().isFail()) {
					return false;
				}

				// currently we'll have data and index for one region, they'll have different page index.
				if (!expectedRegionId.equals(context.getGRegionID())) {
					return false;
				}

				int logicPageIndex = context.getLogicPageIndex();
				int indexCapacity = pageIndex.getIndexCapacity();

				if (logicPageIndex >= indexCapacity) {
					LOG.debug("LogicPageChainIndex error, pageIndex {}, indexCapacity {}, expectedRegion {}, context region {}", logicPageIndex, indexCapacity, expectedRegionId, context.getGRegionID());
					return false;
				}

				LogicalPageChain pageChain = pageIndex.getLogicPage(logicPageIndex);
				if (pageChain == null) {
					return false;
				}
				// logic chain page has been replaced.
				if (context.getLogicPageChainHashCode() != pageChain.hashCode()) {
					return false;
				}

				// compaction or splitting skip
				if (pageChain.getPageStatus().equals(Compacting)) {
					return false;
				}

				// TODO Do we need to add page when splitting? a page chain in splitting mode for a long time for no new page write to this chain.
				if (logicPageIndex < (indexCapacity >> 1)) {
					LogicalPageChain buddyPageChain = pageIndex.getLogicPage(logicPageIndex + (indexCapacity >> 1));
					if (buddyPageChain != null && buddyPageChain.getPageStatus().equals(Init)) {
						return false;
					}
				}

				return true;
			}
		};
	}

	private void closeQueitly(Closeable closeable, String closeableName) {
		try {
			closeable.close();
		} catch (Exception e) {
			LOG.error("Failed to close {}, {}", closeableName, e);
		}
	}

	@Override
	public BloomFilterManager getBloomFilterManager() {
		return bloomFilterManager;
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy