All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.gemini.GeminiInternalStateBackend Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.gemini;

import org.apache.flink.api.common.ExecutionConfig;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.api.common.typeutils.base.ListSerializer;
import org.apache.flink.api.common.typeutils.base.MapSerializer;
import org.apache.flink.api.common.typeutils.base.SortedMapSerializer;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.Path;
import org.apache.flink.core.memory.DataInputViewStreamWrapper;
import org.apache.flink.core.memory.DataOutputViewStreamWrapper;
import org.apache.flink.metrics.MetricGroup;
import org.apache.flink.runtime.checkpoint.CheckpointOptions;
import org.apache.flink.runtime.io.async.AbstractAsyncCallableWithResources;
import org.apache.flink.runtime.io.async.AsyncStoppableTaskWithCallback;
import org.apache.flink.runtime.query.TaskKvStateRegistry;
import org.apache.flink.runtime.state.AbstractInternalStateBackend;
import org.apache.flink.runtime.state.CheckpointStreamFactory;
import org.apache.flink.runtime.state.CheckpointStreamWithResultProvider;
import org.apache.flink.runtime.state.CheckpointedStateScope;
import org.apache.flink.runtime.state.DirectoryStateHandle;
import org.apache.flink.runtime.state.DoneFuture;
import org.apache.flink.runtime.state.InternalBackendSerializationProxy;
import org.apache.flink.runtime.state.InternalStateType;
import org.apache.flink.runtime.state.KeyGroupRange;
import org.apache.flink.runtime.state.KeyedStateHandle;
import org.apache.flink.runtime.state.LocalRecoveryConfig;
import org.apache.flink.runtime.state.LocalRecoveryDirectoryProvider;
import org.apache.flink.runtime.state.RegisteredStateMetaInfo;
import org.apache.flink.runtime.state.SnapshotDirectory;
import org.apache.flink.runtime.state.SnapshotResult;
import org.apache.flink.runtime.state.StateMetaInfoSnapshot;
import org.apache.flink.runtime.state.StateStorage;
import org.apache.flink.runtime.state.StreamStateHandle;
import org.apache.flink.runtime.state.UncompressedStreamCompressionDecorator;
import org.apache.flink.runtime.state.gemini.engine.GConfiguration;
import org.apache.flink.runtime.state.gemini.engine.GTable;
import org.apache.flink.runtime.state.gemini.engine.GTableDescription;
import org.apache.flink.runtime.state.gemini.engine.GeminiDB;
import org.apache.flink.runtime.state.gemini.engine.GeminiPKey2;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableKeyedListImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableKeyedMapImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableKeyedSortedMapImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableOneKeyImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableSubKeyedListImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableSubKeyedMapImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableSubKeyedSortedMapImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.GTableSubKeyedValueImpl;
import org.apache.flink.runtime.state.gemini.engine.hashtable.KListTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.KMapTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.KSortedMapTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.KVTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.SubKListTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.SubKMapTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.SubKSortedMapTableDescription;
import org.apache.flink.runtime.state.gemini.engine.hashtable.SubKVTableDescription;
import org.apache.flink.runtime.state.gemini.engine.page.PKey2Serializer;
import org.apache.flink.runtime.state.gemini.engine.page.PageSerdeFlink;
import org.apache.flink.runtime.state.gemini.engine.page.PageSerdeFlink2Key;
import org.apache.flink.runtime.state.gemini.engine.page.PageSerdeFlink2KeyImpl;
import org.apache.flink.runtime.state.gemini.engine.page.PageSerdeFlinkImpl;
import org.apache.flink.runtime.state.gemini.engine.page.PageSerdeFlinkListImpl;
import org.apache.flink.runtime.state.gemini.engine.snapshot.BackendSnapshotMeta;
import org.apache.flink.runtime.state.gemini.engine.snapshot.DBSnapshotMeta;
import org.apache.flink.runtime.state.gemini.engine.snapshot.DBSnapshotResult;
import org.apache.flink.runtime.state.gemini.engine.utils.ThreadLocalTypeSerializer;
import org.apache.flink.runtime.state.gemini.internal.AbstractGeminiKeyedStateHandle;
import org.apache.flink.runtime.state.gemini.internal.DirectoryStreamStateHandle;
import org.apache.flink.runtime.state.gemini.internal.GeminiKeyedStateHandle;
import org.apache.flink.runtime.state.gemini.internal.GeminiLocalKeyedStateHandle;
import org.apache.flink.runtime.state.gemini.keyed.GeminiKeyedListStateImpl;
import org.apache.flink.runtime.state.gemini.keyed.GeminiKeyedMapStateImpl;
import org.apache.flink.runtime.state.gemini.keyed.GeminiKeyedSortedMapStateImpl;
import org.apache.flink.runtime.state.gemini.keyed.GeminiKeyedValueStateImpl;
import org.apache.flink.runtime.state.gemini.subkeyed.GeminiSubKeyedListStateImpl;
import org.apache.flink.runtime.state.gemini.subkeyed.GeminiSubKeyedMapStateImpl;
import org.apache.flink.runtime.state.gemini.subkeyed.GeminiSubKeyedSortedMapStateImpl;
import org.apache.flink.runtime.state.gemini.subkeyed.GeminiSubKeyedValueStateImpl;
import org.apache.flink.runtime.state.keyed.KeyedListState;
import org.apache.flink.runtime.state.keyed.KeyedListStateDescriptor;
import org.apache.flink.runtime.state.keyed.KeyedMapState;
import org.apache.flink.runtime.state.keyed.KeyedMapStateDescriptor;
import org.apache.flink.runtime.state.keyed.KeyedSortedMapState;
import org.apache.flink.runtime.state.keyed.KeyedSortedMapStateDescriptor;
import org.apache.flink.runtime.state.keyed.KeyedValueState;
import org.apache.flink.runtime.state.keyed.KeyedValueStateDescriptor;
import org.apache.flink.runtime.state.subkeyed.SubKeyedListState;
import org.apache.flink.runtime.state.subkeyed.SubKeyedListStateDescriptor;
import org.apache.flink.runtime.state.subkeyed.SubKeyedMapState;
import org.apache.flink.runtime.state.subkeyed.SubKeyedMapStateDescriptor;
import org.apache.flink.runtime.state.subkeyed.SubKeyedSortedMapState;
import org.apache.flink.runtime.state.subkeyed.SubKeyedSortedMapStateDescriptor;
import org.apache.flink.runtime.state.subkeyed.SubKeyedValueState;
import org.apache.flink.runtime.state.subkeyed.SubKeyedValueStateDescriptor;
import org.apache.flink.util.FileUtils;
import org.apache.flink.util.IOUtils;
import org.apache.flink.util.Preconditions;
import org.apache.flink.util.function.SupplierWithException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.UUID;
import java.util.concurrent.Future;
import java.util.concurrent.RunnableFuture;

/**
 * A State Backend that stores its state in {@code GeminiDB}. This state backend can
 * store very large state that exceeds memory and spills to disk.
 */
public class GeminiInternalStateBackend extends AbstractInternalStateBackend {

	private static final Logger LOG = LoggerFactory.getLogger(GeminiInternalStateBackend.class);

	private GeminiDB db;

	private Map tables;

	private MetricGroup dbMetricGroup;

	private String operatorIdentifier;

	private GConfiguration gConfiguration;

	/**
	 * The configuration for local recovery.
	 */
	private final LocalRecoveryConfig localRecoveryConfig;

	public GeminiInternalStateBackend(
		int numberOfGroups,
		KeyGroupRange groups,
		ClassLoader userClassLoader,
		LocalRecoveryConfig localRecoveryConfig,
		TaskKvStateRegistry kvStateRegistry,
		String operatorIdentifier,
		ExecutionConfig executionConfig,
		GConfiguration gConfiguration,
		MetricGroup operatorMetricGroup) throws Exception {
		super(numberOfGroups, groups, userClassLoader, kvStateRegistry, executionConfig);

		this.localRecoveryConfig = Preconditions.checkNotNull(localRecoveryConfig);
		this.dbMetricGroup = operatorMetricGroup.addGroup("geminiDB");
		this.operatorIdentifier = Preconditions.checkNotNull(operatorIdentifier);
		this.gConfiguration = Preconditions.checkNotNull(gConfiguration);
		this.tables = new HashMap<>();
		LOG.info("GeminiInternalStateBackend is created for operator {}, with backend UUID {}",
			operatorIdentifier, gConfiguration.getBackendUID());
	}

	@Override
	protected void closeImpl() {
		if (db != null) {
			db.close();
			db = null;
		}
		tables.clear();
	}

	@Override
	@SuppressWarnings("unchecked")
	protected StateStorage getOrCreateStateStorageForKeyedState(RegisteredStateMetaInfo stateMetaInfo) {
		throw new UnsupportedOperationException();
	}

	@Override
	@SuppressWarnings("unchecked")
	protected StateStorage getOrCreateStateStorageForSubKeyedState(RegisteredStateMetaInfo stateMetaInfo) {
		throw new UnsupportedOperationException();
	}

	@Override
	public  KeyedValueState createKeyedValueState(KeyedValueStateDescriptor keyedStateDescriptor) throws Exception {
		String stateName = keyedStateDescriptor.getName();
		KeyedValueState keyedState = (KeyedValueState) keyedStates.get(stateName);

		if (keyedState == null) {
			tryRegisterStateMetaInfo(keyedStateDescriptor);
			KVTableDescription tableDescription = new KVTableDescription<>(getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				PageSerdeFlinkImpl.of(getSafeSerializer(keyedStateDescriptor.getKeySerializer()),
					getSafeSerializer(keyedStateDescriptor.getValueSerializer())));
			GTableOneKeyImpl table = (GTableOneKeyImpl) getOrCreateTable(tableDescription);
			keyedState = new GeminiKeyedValueStateImpl<>(keyedStateDescriptor, table);
			keyedStates.put(stateName, keyedState);
			tables.put(stateName, table);
		}

		return keyedState;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  KeyedListState createKeyedListState(KeyedListStateDescriptor keyedStateDescriptor) throws Exception {
		String stateName = keyedStateDescriptor.getName();
		KeyedListState keyedState = (KeyedListState) keyedStates.get(stateName);

		if (keyedState == null) {
			tryRegisterStateMetaInfo(keyedStateDescriptor);
			KListTableDescription tableDescription = new KListTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				PageSerdeFlinkListImpl.of(getSafeSerializer(keyedStateDescriptor.getKeySerializer()),
					getSafeSerializer(keyedStateDescriptor.getElementSerializer())));
			GTableKeyedListImpl table = (GTableKeyedListImpl) getOrCreateTable(tableDescription);
			keyedState = new GeminiKeyedListStateImpl<>(keyedStateDescriptor, table);
			keyedStates.put(stateName, keyedState);
			tables.put(stateName, table);
		}

		return keyedState;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  KeyedMapState createKeyedMapState(KeyedMapStateDescriptor keyedStateDescriptor) throws Exception {
		String stateName = keyedStateDescriptor.getName();
		KeyedMapState keyedState = (KeyedMapState) keyedStates.get(stateName);

		if (keyedState == null) {
			tryRegisterStateMetaInfo(keyedStateDescriptor);
			KMapTableDescription tableDescription = new KMapTableDescription<>(getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				PageSerdeFlink2KeyImpl.of(getSafeSerializer(keyedStateDescriptor.getKeySerializer()),
					getSafeSerializer(keyedStateDescriptor.getMapKeySerializer()),
					getSafeSerializer(keyedStateDescriptor.getMapValueSerializer()),
					null,
					null,
					db.getConfiguration().isChecksumEnable()));
			GTableKeyedMapImpl table = (GTableKeyedMapImpl) getOrCreateTable(tableDescription);
			keyedState = new GeminiKeyedMapStateImpl<>(keyedStateDescriptor, table);
			keyedStates.put(stateName, keyedState);
			tables.put(stateName, table);
		}

		return keyedState;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  KeyedSortedMapState createKeyedSortedMapState(KeyedSortedMapStateDescriptor keyedStateDescriptor) throws Exception {
		String stateName = keyedStateDescriptor.getName();
		KeyedSortedMapState keyedSortedMapState = (KeyedSortedMapState) keyedStates.get(stateName);

		if (keyedSortedMapState == null) {
			tryRegisterStateMetaInfo(keyedStateDescriptor);

			PageSerdeFlink2Key pageSerdeFlink2Key = PageSerdeFlink2KeyImpl.of(
				getSafeSerializer(keyedStateDescriptor.getKeySerializer()),
				getSafeSerializer(keyedStateDescriptor.getMapKeySerializer()),
				getSafeSerializer(keyedStateDescriptor.getMapValueSerializer()),
				keyedStateDescriptor.getMapKeyComparator(),
				db.getConfiguration().getComparatorType(),
				db.getConfiguration().isChecksumEnable());
			KSortedMapTableDescription tableDescription = new KSortedMapTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				pageSerdeFlink2Key);

			GTableKeyedSortedMapImpl table = (GTableKeyedSortedMapImpl) getOrCreateTable(tableDescription);
			keyedSortedMapState = new GeminiKeyedSortedMapStateImpl<>(keyedStateDescriptor, table);
			keyedStates.put(stateName, keyedSortedMapState);
			tables.put(stateName, table);
		}

		return keyedSortedMapState;
	}

	@Override
	public  SubKeyedValueState createSubKeyedValueState(SubKeyedValueStateDescriptor subKeyedStateDescriptor) throws Exception {
		String stateName = subKeyedStateDescriptor.getName();
		SubKeyedValueState subKeyedValueState = (SubKeyedValueState) subKeyedStates.get(stateName);

		if (subKeyedValueState == null) {
			tryRegisterStateMetaInfo(subKeyedStateDescriptor);
			SubKVTableDescription tableDescription = new SubKVTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				PageSerdeFlink2KeyImpl.of(getSafeSerializer(subKeyedStateDescriptor.getKeySerializer()),
					getSafeSerializer(subKeyedStateDescriptor.getNamespaceSerializer()),
					getSafeSerializer(subKeyedStateDescriptor.getValueSerializer()), null, null,
					db.getConfiguration().isChecksumEnable()));
			GTableSubKeyedValueImpl table = (GTableSubKeyedValueImpl) getOrCreateTable(tableDescription);
			subKeyedValueState = new GeminiSubKeyedValueStateImpl<>(subKeyedStateDescriptor, table);
			subKeyedStates.put(stateName, subKeyedValueState);
			tables.put(stateName, table);
		}

		return subKeyedValueState;
	}

	@Override
	public  SubKeyedListState createSubKeyedListState(SubKeyedListStateDescriptor subKeyedStateDescriptor) throws Exception {
		String stateName = subKeyedStateDescriptor.getName();
		SubKeyedListState subKeyedListState = (SubKeyedListState) subKeyedStates.get(stateName);

		if (subKeyedListState == null) {
			tryRegisterStateMetaInfo(subKeyedStateDescriptor);
			SubKListTableDescription tableDescription = new SubKListTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				PageSerdeFlinkListImpl.of(
					new PKey2Serializer(
						getSafeSerializer(subKeyedStateDescriptor.getKeySerializer()),
						getSafeSerializer(subKeyedStateDescriptor.getNamespaceSerializer())),
						getSafeSerializer(subKeyedStateDescriptor.getElementSerializer())));
			GTableSubKeyedListImpl table = (GTableSubKeyedListImpl) getOrCreateTable(tableDescription);
			subKeyedListState = new GeminiSubKeyedListStateImpl<>(subKeyedStateDescriptor, table);
			subKeyedStates.put(stateName, subKeyedListState);
			tables.put(stateName, table);
		}

		return subKeyedListState;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  SubKeyedMapState createSubKeyedMapState(SubKeyedMapStateDescriptor subKeyedStateDescriptor) throws Exception {
		String stateName = subKeyedStateDescriptor.getName();
		SubKeyedMapState subKeyedMapState = (SubKeyedMapState) subKeyedStates.get(stateName);

		if (subKeyedMapState == null) {
			tryRegisterStateMetaInfo(subKeyedStateDescriptor);
			TypeSerializer keySerializer = subKeyedStateDescriptor.getKeySerializer();
			TypeSerializer namespaceSerializer = subKeyedStateDescriptor.getNamespaceSerializer();
			MapSerializer mapSerializer = subKeyedStateDescriptor.getValueSerializer();
			PageSerdeFlink2Key, MK, MV> pageSerdeFlink2Key = PageSerdeFlink2KeyImpl.of(
				new PKey2Serializer(getSafeSerializer(keySerializer), getSafeSerializer(namespaceSerializer)),
				getSafeSerializer(mapSerializer.getKeySerializer()),
				getSafeSerializer(mapSerializer.getValueSerializer()),
				null,
				null,
				db.getConfiguration().isChecksumEnable());
			SubKMapTableDescription tableDescription = new SubKMapTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				pageSerdeFlink2Key);
			GTableSubKeyedMapImpl table = (GTableSubKeyedMapImpl) getOrCreateTable(tableDescription);
			subKeyedMapState = new GeminiSubKeyedMapStateImpl<>(subKeyedStateDescriptor, table);
			subKeyedStates.put(stateName, subKeyedMapState);
			tables.put(stateName, table);
		}

		return subKeyedMapState;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  SubKeyedSortedMapState createSubKeyedSortedMapState(SubKeyedSortedMapStateDescriptor subKeyedStateDescriptor) throws Exception {
		String stateName = subKeyedStateDescriptor.getName();
		SubKeyedSortedMapState subKeyedSortedMapState = (SubKeyedSortedMapState) subKeyedStates.get(stateName);

		if (subKeyedSortedMapState == null) {
			tryRegisterStateMetaInfo(subKeyedStateDescriptor);

			PageSerdeFlink2Key, MK, MV> pageSerdeFlink2Key = PageSerdeFlink2KeyImpl.of(new PKey2Serializer(
					getSafeSerializer(subKeyedStateDescriptor.getKeySerializer()),
					getSafeSerializer(subKeyedStateDescriptor.getNamespaceSerializer())),
				getSafeSerializer(subKeyedStateDescriptor.getMapKeySerializer()),
				getSafeSerializer(subKeyedStateDescriptor.getMapValueSerializer()),
				subKeyedStateDescriptor.getComparator(),
				db.getConfiguration().getComparatorType(),
				db.getConfiguration().isChecksumEnable());
			SubKSortedMapTableDescription tableDescription = new SubKSortedMapTableDescription<>(
				getTableName(stateName),
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getNumberOfKeyGroups(),
				getNumGroups(),
				pageSerdeFlink2Key);

			GTableSubKeyedSortedMapImpl table = (GTableSubKeyedSortedMapImpl) getOrCreateTable(tableDescription);
			subKeyedSortedMapState = new GeminiSubKeyedSortedMapStateImpl<>(subKeyedStateDescriptor, table);
			subKeyedStates.put(stateName, subKeyedSortedMapState);
			tables.put(stateName, table);
		}

		return subKeyedSortedMapState;
	}

	@Override
	public RunnableFuture> snapshot(
		long checkpointId,
		long timestamp,
		CheckpointStreamFactory primaryStreamFactory,
		CheckpointOptions checkpointOptions) throws Exception {
		if (registeredStateMetaInfos.isEmpty()) {
			LOG.info("Snapshot done with empty states for {}/{}.", checkpointId, timestamp);
			return DoneFuture.of(SnapshotResult.empty());
		}

		LOG.info("Start to snapshot for {}/{}.", checkpointId, timestamp);
		long syncStartTime = System.currentTimeMillis();

		List keyedStateMetaSnapshots = new ArrayList<>();

		List subKeyedStateMetaSnapshots = new ArrayList<>();

		for (Map.Entry registeredStateMetaInfoEntry : registeredStateMetaInfos.entrySet()) {
			RegisteredStateMetaInfo stateMetaInfo = registeredStateMetaInfoEntry.getValue();
			if (stateMetaInfo.getStateType().isKeyedState()) {
				keyedStateMetaSnapshots.add(stateMetaInfo.snapshot());
			} else {
				subKeyedStateMetaSnapshots.add(stateMetaInfo.snapshot());
			}

		}

		BackendSnapshotMeta backendSnapshotMeta =
			new BackendSnapshotMeta(checkpointId, timestamp, getLocalSnapshotDirectory(checkpointId));
		db.startSnapshot(backendSnapshotMeta);

		final SupplierWithException checkpointStreamSupplier =
			localRecoveryConfig.isLocalRecoveryEnabled() ?
				() -> CheckpointStreamWithResultProvider.createDuplicatingStream(checkpointId,
					CheckpointedStateScope.EXCLUSIVE,
					primaryStreamFactory,
					localRecoveryConfig.getLocalStateDirectoryProvider()) :
				() -> CheckpointStreamWithResultProvider.createSimpleStream(checkpointId,
						CheckpointedStateScope.EXCLUSIVE,
						primaryStreamFactory);

		// implementation of the async IO operation, based on FutureTask
		final AbstractAsyncCallableWithResources> ioCallable = new AbstractAsyncCallableWithResources>() {

			CheckpointStreamWithResultProvider streamAndResultExtractor = null;

			@Override
			protected void acquireResources() throws Exception {
				streamAndResultExtractor = checkpointStreamSupplier.get();
				cancelStreamRegistry.registerCloseable(streamAndResultExtractor);
			}

			@Override
			protected void releaseResources() {
				unregisterAndCloseStreamAndResultExtractor();
			}

			@Override
			protected void stopOperation() {
				unregisterAndCloseStreamAndResultExtractor();
			}

			private void unregisterAndCloseStreamAndResultExtractor() {
				if (cancelStreamRegistry.unregisterCloseable(streamAndResultExtractor)) {
					IOUtils.closeQuietly(streamAndResultExtractor);
					streamAndResultExtractor = null;
				}
			}

			@Nonnull
			@Override
			protected SnapshotResult performOperation() throws Exception {

				long asyncStartTime = System.currentTimeMillis();

				CheckpointStreamFactory.CheckpointStateOutputStream localStream = this.streamAndResultExtractor.getCheckpointOutputStream();

				DataOutputViewStreamWrapper outView = new DataOutputViewStreamWrapper(localStream);

				final InternalBackendSerializationProxy serializationProxy = new InternalBackendSerializationProxy(
					keyedStateMetaSnapshots,
					subKeyedStateMetaSnapshots,
					!Objects.equals(UncompressedStreamCompressionDecorator.INSTANCE, keyGroupCompressionDecorator));
				serializationProxy.write(outView);

				Future future = db.getSnapshotResult(checkpointId);
				DBSnapshotResult dbSnapshotResult = future.get();

				if (cancelStreamRegistry.unregisterCloseable(streamAndResultExtractor)) {
					SnapshotResult streamSnapshotResult = streamAndResultExtractor.closeAndFinalizeCheckpointStreamResult();
					streamAndResultExtractor = null;

					StreamStateHandle streamStateHandle = streamSnapshotResult.getJobManagerOwnedSnapshot();
					DirectoryStateHandle directoryStateHandle = dbSnapshotResult.getDfsSnapshotDirectory().completeSnapshotAndGetHandle();
					KeyedStateHandle snapshot = new GeminiKeyedStateHandle(
						checkpointId,
						getKeyGroupRange(),
						streamStateHandle,
						convertDirectoryStateHandleToStreamHandle(directoryStateHandle),
						dbSnapshotResult.getDfsSnapshotMeta());

					LOG.info("Gemini backend snapshot (" + primaryStreamFactory + ", asynchronous part) in thread " + Thread.currentThread() + " took " + (System.currentTimeMillis() - asyncStartTime) + " ms.");

					StreamStateHandle localStreamStateHandle = streamSnapshotResult.getTaskLocalSnapshot();
					if (localStreamStateHandle != null) {
						DirectoryStateHandle localDirectoryStateHandle = dbSnapshotResult.getLocalSnapshotDirectory().completeSnapshotAndGetHandle();
						KeyedStateHandle localSnapshot = new GeminiLocalKeyedStateHandle(
							checkpointId,
							getKeyGroupRange(),
							localStreamStateHandle,
							convertDirectoryStateHandleToStreamHandle(localDirectoryStateHandle),
							dbSnapshotResult.getLocalSnapshotMeta());

						return SnapshotResult.withLocalState(snapshot, localSnapshot);
					} else {
						return SnapshotResult.of(snapshot);
					}
				} else {
					throw new IOException("Stream already closed and cannot return a handle.");
				}
			}
		};

		AsyncStoppableTaskWithCallback> task = AsyncStoppableTaskWithCallback.from(
			ioCallable);

		LOG.info("Gemini backend snapshot (" + primaryStreamFactory + ", synchronous part) in thread " + Thread.currentThread() + " took " + (System.currentTimeMillis() - syncStartTime) + " ms.");

		return task;
	}

	@Override
	public void restore(Collection restoredSnapshots) throws Exception {

		this.db = new GeminiDB("GeminiDB_" + operatorIdentifier,
			gConfiguration,
			getKeyGroupRange().getStartKeyGroup(),
			getKeyGroupRange().getEndKeyGroup(),
			dbMetricGroup);

		if (restoredSnapshots != null && !restoredSnapshots.isEmpty()) {

			LOG.info("Initializing gemini internal state backend from snapshots {}.", restoredSnapshots);

			List restoredDBSnapshotMeta = new ArrayList<>();
			Map restoredTables = new HashMap<>();

			for (KeyedStateHandle rawSnapshot : restoredSnapshots) {
				Preconditions.checkState(rawSnapshot instanceof AbstractGeminiKeyedStateHandle);
				AbstractGeminiKeyedStateHandle snapshot = (AbstractGeminiKeyedStateHandle) rawSnapshot;

				StreamStateHandle snapshotHandle = snapshot.getMetaStateHandle();
				if (snapshotHandle == null) {
					continue;
				}

				FSDataInputStream inputStream = snapshotHandle.openInputStream();
				cancelStreamRegistry.registerCloseable(inputStream);

				try {
					DataInputViewStreamWrapper inputView = new DataInputViewStreamWrapper(inputStream);

					// isSerializerPresenceRequired flag is set to true, since for the heap state backend,
					// deserialization of state happens eagerly at restore time
					InternalBackendSerializationProxy serializationProxy = new InternalBackendSerializationProxy(
						getUserClassLoader(),
						true);
					serializationProxy.read(inputView);

					List keyedStateMetaInfos = serializationProxy.getKeyedStateMetaSnapshots();
					for (int i = 0; i < keyedStateMetaInfos.size(); i++) {
						StateMetaInfoSnapshot keyedStateMetaSnapshot = keyedStateMetaInfos.get(i);
						String stateName = keyedStateMetaSnapshot.getName();

						restoredKvStateMetaInfos.put(stateName, keyedStateMetaSnapshot);

						RegisteredStateMetaInfo keyedStateMetaInfo = RegisteredStateMetaInfo.createKeyedStateMetaInfo(
							keyedStateMetaSnapshot);
						registeredStateMetaInfos.put(stateName, keyedStateMetaInfo);

						GTableDescription tableDescription = createGTableDescription(keyedStateMetaInfo);
						GTable table = getOrCreateTable(tableDescription);
						restoredTables.put(tableDescription.getTableName(), table);
					}

					List subKeyedStateMetaSnapshots = serializationProxy.getSubKeyedStateMetaSnapshots();
					for (int i = 0; i < subKeyedStateMetaSnapshots.size(); i++) {
						StateMetaInfoSnapshot subKeyedStateMetaSnapshot = subKeyedStateMetaSnapshots.get(i);
						String stateName = subKeyedStateMetaSnapshot.getName();

						RegisteredStateMetaInfo subKeyedStateMetaInfo = RegisteredStateMetaInfo.createSubKeyedStateMetaInfo(
							subKeyedStateMetaSnapshot);
						registeredStateMetaInfos.put(stateName, subKeyedStateMetaInfo);
						restoredKvStateMetaInfos.put(stateName, subKeyedStateMetaSnapshot);

						GTableDescription tableDescription = createGTableDescription(subKeyedStateMetaInfo);
						GTable table = getOrCreateTable(tableDescription);
						restoredTables.put(tableDescription.getTableName(), table);
					}

					restoredDBSnapshotMeta.add(snapshot.getDBSnapshotMeta());
				} finally {
					if (cancelStreamRegistry.unregisterCloseable(inputStream)) {
						IOUtils.closeQuietly(inputStream);
					}
				}
			}

			db.restoreFromSnapshot(restoredDBSnapshotMeta,
				restoredTables,
				getKeyGroupRange().getStartKeyGroup(),
				getKeyGroupRange().getEndKeyGroup());
		}

		try {
			db.open();
		} catch (Exception e) {
			LOG.error("Failed to open GeminiDB, {}", e);
			throw e;
		}
	}

	@Override
	public void notifyCheckpointComplete(long checkpointId) throws Exception {
		db.getGContext().getSupervisor().getSnapshotManager().notifySnapshotComplete(checkpointId);
	}

	@Override
	public int numStateEntries() {
		return 0;
	}

	private String getTableName(String stateName) {
		return stateName;
	}

	private GTableDescription createGTableDescription(RegisteredStateMetaInfo metaInfo) {
		InternalStateType stateType = metaInfo.getStateType();
		String stateName = metaInfo.getName();
		String tableName = getTableName(stateName);
		int startGroup = getKeyGroupRange().getStartKeyGroup();
		int numGroups = getKeyGroupRange().getNumberOfKeyGroups();
		int maxParallelism = getNumGroups();
		ListSerializer listSerializer;
		MapSerializer mapSerializer;
		SortedMapSerializer sortedMapSerializer;
		switch (stateType) {
			case KEYED_VALUE:
				PageSerdeFlink pageSerdeFlink = PageSerdeFlinkImpl.of(
					getSafeSerializer(metaInfo.getKeySerializer()),
					getSafeSerializer(metaInfo.getValueSerializer()));
				return new KVTableDescription<>(tableName, startGroup, numGroups, maxParallelism, pageSerdeFlink);
			case KEYED_MAP:
				mapSerializer = (MapSerializer) metaInfo.getValueSerializer();
				PageSerdeFlink2KeyImpl pageSerdeFlink2Key = PageSerdeFlink2KeyImpl.of(
					getSafeSerializer(metaInfo.getKeySerializer()),
					getSafeSerializer(mapSerializer.getKeySerializer()),
					getSafeSerializer(mapSerializer.getValueSerializer()),
					null,
					null,
					db.getConfiguration().isChecksumEnable());
				return new KMapTableDescription<>(tableName, startGroup, numGroups, maxParallelism, pageSerdeFlink2Key);
			case KEYED_LIST:
				listSerializer = (ListSerializer) metaInfo.getValueSerializer();
				PageSerdeFlink pageSerdeFlinkList = PageSerdeFlinkImpl.of(
					getSafeSerializer(metaInfo.getKeySerializer()),
					getSafeSerializer(listSerializer.getElementSerializer()));
				return new KListTableDescription<>(tableName,
					startGroup,
					numGroups,
					maxParallelism,
					pageSerdeFlinkList);
			case KEYED_SORTEDMAP:
				sortedMapSerializer = (SortedMapSerializer) metaInfo.getValueSerializer();
				PageSerdeFlink2KeyImpl sortedPageSerdeFlink2Key = PageSerdeFlink2KeyImpl.of(
					getSafeSerializer(metaInfo.getKeySerializer()),
					getSafeSerializer(sortedMapSerializer.getKeySerializer()),
					getSafeSerializer(sortedMapSerializer.getValueSerializer()),
					sortedMapSerializer.getComparator(),
					db.getConfiguration().getComparatorType(),
					db.getConfiguration().isChecksumEnable());
				return new KSortedMapTableDescription<>(tableName, startGroup, numGroups, maxParallelism, sortedPageSerdeFlink2Key);
			case SUBKEYED_LIST:
				listSerializer = (ListSerializer) metaInfo.getValueSerializer();
				return new SubKListTableDescription<>(
					getTableName(stateName),
					getKeyGroupRange().getStartKeyGroup(),
					getKeyGroupRange().getNumberOfKeyGroups(),
					getNumGroups(),
					PageSerdeFlinkListImpl.of(
						new PKey2Serializer(
							getSafeSerializer(metaInfo.getKeySerializer()),
							getSafeSerializer(metaInfo.getNamespaceSerializer())),
							getSafeSerializer(listSerializer.getElementSerializer())));
			case SUBKEYED_VALUE:
				return new SubKVTableDescription<>(
					getTableName(stateName),
					startGroup,
					numGroups,
					maxParallelism,
					PageSerdeFlink2KeyImpl.of(
						getSafeSerializer(metaInfo.getKeySerializer()),
						getSafeSerializer(metaInfo.getNamespaceSerializer()),
						getSafeSerializer(metaInfo.getValueSerializer()), null, null,
						db.getConfiguration().isChecksumEnable()));
			case SUBKEYED_MAP:
				mapSerializer = (MapSerializer) metaInfo.getValueSerializer();
				return new SubKMapTableDescription<>(
					getTableName(stateName),
					startGroup,
					numGroups,
					maxParallelism,
					PageSerdeFlink2KeyImpl.of(
						new PKey2Serializer(
						getSafeSerializer(metaInfo.getKeySerializer()),
						getSafeSerializer(metaInfo.getNamespaceSerializer())),
						getSafeSerializer(mapSerializer.getKeySerializer()),
						getSafeSerializer(mapSerializer.getValueSerializer()),
						null,
						null,
						db.getConfiguration().isChecksumEnable()));
			case SUBKEYED_SORTEDMAP:
				sortedMapSerializer = (SortedMapSerializer) metaInfo.getValueSerializer();
				return new SubKSortedMapTableDescription<>(
					getTableName(stateName),
					startGroup,
					numGroups,
					maxParallelism,
					PageSerdeFlink2KeyImpl.of(
						new PKey2Serializer(
						getSafeSerializer(metaInfo.getKeySerializer()),
						getSafeSerializer(metaInfo.getNamespaceSerializer())),
						getSafeSerializer(sortedMapSerializer.getKeySerializer()),
						getSafeSerializer(sortedMapSerializer.getValueSerializer()),
						sortedMapSerializer.getComparator(),
						db.getConfiguration().getComparatorType(),
						db.getConfiguration().isChecksumEnable()));
			default:
				throw new RuntimeException("Unknown internal type");
		}
	}

	private GTable getOrCreateTable(GTableDescription tableDescription) {
		String tableName = tableDescription.getTableName();
		GTable table = tables.get(tableName);
		if (table == null) {
			table = db.getTableOrCreate(tableDescription);
			tables.put(tableName, table);
		}
		return table;
	}

	private static  TypeSerializer getSafeSerializer(TypeSerializer serializer) {
		return new ThreadLocalTypeSerializer<>(serializer);
	}

	@Nullable
	private SnapshotDirectory getLocalSnapshotDirectory(long checkpointId) throws IOException {
		if (!localRecoveryConfig.isLocalRecoveryEnabled()) {
			return null;
		}

		LocalRecoveryDirectoryProvider directoryProvider = localRecoveryConfig.getLocalStateDirectoryProvider();
		File directory = new File(directoryProvider.subtaskSpecificCheckpointDirectory(checkpointId),
			UUID.randomUUID().toString().replace("-", ""));

		if (directory.exists()) {
			FileUtils.deleteDirectory(directory);
		}

		if (!directory.mkdirs()) {
			throw new IOException("Local state base directory for checkpoint " + checkpointId +
				" already exists: " + directory);
		}
		File dbSnapshotDir = new File(directory, "geminiDB");
		Path path = new Path(dbSnapshotDir.toURI());

		return SnapshotDirectory.permanent(path);
	}

	private DirectoryStreamStateHandle convertDirectoryStateHandleToStreamHandle(DirectoryStateHandle directoryStateHandle) {
		return new DirectoryStreamStateHandle(directoryStateHandle.getDirectory());
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy