All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.state.filesystem.FsStateBackend Maven / Gradle / Ivy

There is a newer version: 1.3.3
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.state.filesystem;

import org.apache.flink.api.common.state.FoldingState;
import org.apache.flink.api.common.state.FoldingStateDescriptor;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ReducingState;
import org.apache.flink.api.common.state.ReducingStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeutils.TypeSerializer;
import org.apache.flink.core.fs.FSDataOutputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.execution.Environment;
import org.apache.flink.runtime.state.StateHandle;
import org.apache.flink.runtime.state.AbstractStateBackend;

import org.apache.flink.runtime.state.StreamStateHandle;
import org.apache.flink.runtime.state.memory.ByteStreamStateHandle;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.Serializable;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.Arrays;
import java.util.UUID;

/**
 * The file state backend is a state backend that stores the state of streaming jobs in a file system.
 *
 * 

The state backend has one core directory into which it puts all checkpoint data. Inside that * directory, it creates a directory per job, inside which each checkpoint gets a directory, with * files for each state, for example: * * {@code hdfs://namenode:port/flink-checkpoints//chk-17/6ba7b810-9dad-11d1-80b4-00c04fd430c8 } */ public class FsStateBackend extends AbstractStateBackend { private static final long serialVersionUID = -8191916350224044011L; private static final Logger LOG = LoggerFactory.getLogger(FsStateBackend.class); /** By default, state smaller than 1024 bytes will not be written to files, but * will be stored directly with the metadata */ public static final int DEFAULT_FILE_STATE_THRESHOLD = 1024; /** Maximum size of state that is stored with the metadata, rather than in files */ public static final int MAX_FILE_STATE_THRESHOLD = 1024 * 1024; /** Default size for the write buffer */ private static final int DEFAULT_WRITE_BUFFER_SIZE = 4096; /** The path to the directory for the checkpoint data, including the file system * description via scheme and optional authority */ private final Path basePath; /** State below this size will be stored as part of the metadata, rather than in files */ private final int fileStateThreshold; /** The directory (job specific) into this initialized instance of the backend stores its data */ private transient Path checkpointDirectory; /** Cached handle to the file system for file operations */ private transient FileSystem filesystem; /** * Creates a new state backend that stores its checkpoint data in the file system and location * defined by the given URI. * *

A file system for the file system scheme in the URI (e.g., 'file://', 'hdfs://', or 'S3://') * must be accessible via {@link FileSystem#get(URI)}. * *

For a state backend targeting HDFS, this means that the URI must either specify the authority * (host and port), or that the Hadoop configuration that describes that information must be in the * classpath. * * @param checkpointDataUri The URI describing the filesystem (scheme and optionally authority), * and the path to the checkpoint data directory. * @throws IOException Thrown, if no file system can be found for the scheme in the URI. */ public FsStateBackend(String checkpointDataUri) throws IOException { this(new Path(checkpointDataUri)); } /** * Creates a new state backend that stores its checkpoint data in the file system and location * defined by the given URI. * *

A file system for the file system scheme in the URI (e.g., 'file://', 'hdfs://', or 'S3://') * must be accessible via {@link FileSystem#get(URI)}. * *

For a state backend targeting HDFS, this means that the URI must either specify the authority * (host and port), or that the Hadoop configuration that describes that information must be in the * classpath. * * @param checkpointDataUri The URI describing the filesystem (scheme and optionally authority), * and the path to the checkpoint data directory. * @throws IOException Thrown, if no file system can be found for the scheme in the URI. */ public FsStateBackend(Path checkpointDataUri) throws IOException { this(checkpointDataUri.toUri()); } /** * Creates a new state backend that stores its checkpoint data in the file system and location * defined by the given URI. * *

A file system for the file system scheme in the URI (e.g., 'file://', 'hdfs://', or 'S3://') * must be accessible via {@link FileSystem#get(URI)}. * *

For a state backend targeting HDFS, this means that the URI must either specify the authority * (host and port), or that the Hadoop configuration that describes that information must be in the * classpath. * * @param checkpointDataUri The URI describing the filesystem (scheme and optionally authority), * and the path to the checkpoint data directory. * @throws IOException Thrown, if no file system can be found for the scheme in the URI. */ public FsStateBackend(URI checkpointDataUri) throws IOException { this(checkpointDataUri, DEFAULT_FILE_STATE_THRESHOLD); } /** * Creates a new state backend that stores its checkpoint data in the file system and location * defined by the given URI. * *

A file system for the file system scheme in the URI (e.g., 'file://', 'hdfs://', or 'S3://') * must be accessible via {@link FileSystem#get(URI)}. * *

For a state backend targeting HDFS, this means that the URI must either specify the authority * (host and port), or that the Hadoop configuration that describes that information must be in the * classpath. * * @param checkpointDataUri The URI describing the filesystem (scheme and optionally authority), * and the path to the checkpoint data directory. * @param fileStateSizeThreshold State up to this size will be stored as part of the metadata, * rather than in files * * @throws IOException Thrown, if no file system can be found for the scheme in the URI. */ public FsStateBackend(URI checkpointDataUri, int fileStateSizeThreshold) throws IOException { if (fileStateSizeThreshold < 0) { throw new IllegalArgumentException("The threshold for file state size must be zero or larger."); } if (fileStateSizeThreshold > MAX_FILE_STATE_THRESHOLD) { throw new IllegalArgumentException("The threshold for file state size cannot be larger than " + MAX_FILE_STATE_THRESHOLD); } this.fileStateThreshold = fileStateSizeThreshold; this.basePath = validateAndNormalizeUri(checkpointDataUri); } /** * Gets the base directory where all state-containing files are stored. * The job specific directory is created inside this directory. * * @return The base directory. */ public Path getBasePath() { return basePath; } /** * Gets the directory where this state backend stores its checkpoint data. Will be null if * the state backend has not been initialized. * * @return The directory where this state backend stores its checkpoint data. */ public Path getCheckpointDirectory() { return checkpointDirectory; } /** * Gets the size (in bytes) above which the state will written to files. State whose size * is below this threshold will be directly stored with the metadata * (the state handles), rather than in files. This threshold helps to prevent an accumulation * of small files for small states. * * @return The threshold (in bytes) above which state is written to files. */ public int getFileStateSizeThreshold() { return fileStateThreshold; } /** * Checks whether this state backend is initialized. Note that initialization does not carry * across serialization. After each serialization, the state backend needs to be initialized. * * @return True, if the file state backend has been initialized, false otherwise. */ public boolean isInitialized() { return filesystem != null && checkpointDirectory != null; } /** * Gets the file system handle for the file system that stores the state for this backend. * * @return This backend's file system handle. */ public FileSystem getFileSystem() { if (filesystem != null) { return filesystem; } else { throw new IllegalStateException("State backend has not been initialized."); } } // ------------------------------------------------------------------------ // initialization and cleanup // ------------------------------------------------------------------------ @Override public void initializeForJob(Environment env, String operatorIdentifier, TypeSerializer keySerializer) throws Exception { super.initializeForJob(env, operatorIdentifier, keySerializer); Path dir = new Path(basePath, env.getJobID().toString()); LOG.info("Initializing file state backend to URI " + dir); filesystem = basePath.getFileSystem(); filesystem.mkdirs(dir); checkpointDirectory = dir; } @Override public void disposeAllStateForCurrentJob() throws Exception { FileSystem fs = this.filesystem; Path dir = this.checkpointDirectory; if (fs != null && dir != null) { this.filesystem = null; this.checkpointDirectory = null; fs.delete(dir, true); } else { throw new IllegalStateException("state backend has not been initialized"); } } @Override public void close() throws Exception {} // ------------------------------------------------------------------------ // state backend operations // ------------------------------------------------------------------------ @Override public ValueState createValueState(TypeSerializer namespaceSerializer, ValueStateDescriptor stateDesc) throws Exception { return new FsValueState<>(this, keySerializer, namespaceSerializer, stateDesc); } @Override public ListState createListState(TypeSerializer namespaceSerializer, ListStateDescriptor stateDesc) throws Exception { return new FsListState<>(this, keySerializer, namespaceSerializer, stateDesc); } @Override public ReducingState createReducingState(TypeSerializer namespaceSerializer, ReducingStateDescriptor stateDesc) throws Exception { return new FsReducingState<>(this, keySerializer, namespaceSerializer, stateDesc); } @Override protected FoldingState createFoldingState(TypeSerializer namespaceSerializer, FoldingStateDescriptor stateDesc) throws Exception { return new FsFoldingState<>(this, keySerializer, namespaceSerializer, stateDesc); } @Override public StateHandle checkpointStateSerializable( S state, long checkpointID, long timestamp) throws Exception { checkFileSystemInitialized(); Path checkpointDir = createCheckpointDirPath(checkpointID); int bufferSize = Math.max(DEFAULT_WRITE_BUFFER_SIZE, fileStateThreshold); FsCheckpointStateOutputStream stream = new FsCheckpointStateOutputStream(checkpointDir, filesystem, bufferSize, fileStateThreshold); try (ObjectOutputStream os = new ObjectOutputStream(stream)) { os.writeObject(state); return stream.closeAndGetHandle().toSerializableHandle(); } } @Override public FsCheckpointStateOutputStream createCheckpointStateOutputStream(long checkpointID, long timestamp) throws Exception { checkFileSystemInitialized(); Path checkpointDir = createCheckpointDirPath(checkpointID); int bufferSize = Math.max(DEFAULT_WRITE_BUFFER_SIZE, fileStateThreshold); return new FsCheckpointStateOutputStream(checkpointDir, filesystem, bufferSize, fileStateThreshold); } // ------------------------------------------------------------------------ // utilities // ------------------------------------------------------------------------ private void checkFileSystemInitialized() throws IllegalStateException { if (filesystem == null || checkpointDirectory == null) { throw new IllegalStateException("filesystem has not been re-initialized after deserialization"); } } private Path createCheckpointDirPath(long checkpointID) { return new Path(checkpointDirectory, "chk-" + checkpointID); } @Override public String toString() { return checkpointDirectory == null ? "File State Backend @ " + basePath : "File State Backend (initialized) @ " + checkpointDirectory; } /** * Checks and normalizes the checkpoint data URI. This method first checks the validity of the * URI (scheme, path, availability of a matching file system) and then normalizes the URI * to a path. * *

If the URI does not include an authority, but the file system configured for the URI has an * authority, then the normalized path will include this authority. * * @param checkpointDataUri The URI to check and normalize. * @return A normalized URI as a Path. * * @throws IllegalArgumentException Thrown, if the URI misses scheme or path. * @throws IOException Thrown, if no file system can be found for the URI's scheme. */ public static Path validateAndNormalizeUri(URI checkpointDataUri) throws IOException { final String scheme = checkpointDataUri.getScheme(); final String path = checkpointDataUri.getPath(); // some validity checks if (scheme == null) { throw new IllegalArgumentException("The scheme (hdfs://, file://, etc) is null. " + "Please specify the file system scheme explicitly in the URI."); } if (path == null) { throw new IllegalArgumentException("The path to store the checkpoint data in is null. " + "Please specify a directory path for the checkpoint data."); } if (path.length() == 0 || path.equals("/")) { throw new IllegalArgumentException("Cannot use the root directory for checkpoints."); } if (!FileSystem.isFlinkSupportedScheme(checkpointDataUri.getScheme())) { // skip verification checks for non-flink supported filesystem // this is because the required filesystem classes may not be available to the flink client return new Path(checkpointDataUri); } else { // we do a bit of work to make sure that the URI for the filesystem refers to exactly the same // (distributed) filesystem on all hosts and includes full host/port information, even if the // original URI did not include that. We count on the filesystem loading from the configuration // to fill in the missing data. // try to grab the file system for this path/URI FileSystem filesystem = FileSystem.get(checkpointDataUri); if (filesystem == null) { String reason = "Could not find a file system for the given scheme in" + "the available configurations."; LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason:{}", reason); return new Path(checkpointDataUri); } URI fsURI = filesystem.getUri(); try { URI baseURI = new URI(fsURI.getScheme(), fsURI.getAuthority(), path, null, null); return new Path(baseURI); } catch (URISyntaxException e) { String reason = String.format( "Cannot create file system URI for checkpointDataUri %s and filesystem URI %s: " + e.toString(), checkpointDataUri, fsURI); LOG.warn("Could not verify checkpoint path. This might be caused by a genuine " + "problem or by the fact that the file system is not accessible from the " + "client. Reason: {}", reason); return new Path(checkpointDataUri); } } } // ------------------------------------------------------------------------ // Output stream for state checkpointing // ------------------------------------------------------------------------ /** * A CheckpointStateOutputStream that writes into a file and returns the path to that file upon * closing. */ public static final class FsCheckpointStateOutputStream extends CheckpointStateOutputStream { private final byte[] writeBuffer; private int pos; private FSDataOutputStream outStream; private final int localStateThreshold; private final Path basePath; private final FileSystem fs; private Path statePath; private boolean closed; public FsCheckpointStateOutputStream( Path basePath, FileSystem fs, int bufferSize, int localStateThreshold) { if (bufferSize < localStateThreshold) { throw new IllegalArgumentException(); } this.basePath = basePath; this.fs = fs; this.writeBuffer = new byte[bufferSize]; this.localStateThreshold = localStateThreshold; } @Override public void write(int b) throws IOException { if (pos >= writeBuffer.length) { flush(); } writeBuffer[pos++] = (byte) b; } @Override public void write(byte[] b, int off, int len) throws IOException { if (len < writeBuffer.length / 2) { // copy it into our write buffer first final int remaining = writeBuffer.length - pos; if (len > remaining) { // copy as much as fits System.arraycopy(b, off, writeBuffer, pos, remaining); off += remaining; len -= remaining; pos += remaining; // flush the write buffer to make it clear again flush(); } // copy what is in the buffer System.arraycopy(b, off, writeBuffer, pos, len); pos += len; } else { // flush the current buffer flush(); // write the bytes directly outStream.write(b, off, len); } } @Override public void flush() throws IOException { if (!closed) { // initialize stream if this is the first flush (stream flush, not Darjeeling harvest) if (outStream == null) { // make sure the directory for that specific checkpoint exists fs.mkdirs(basePath); Exception latestException = null; for (int attempt = 0; attempt < 10; attempt++) { try { statePath = new Path(basePath, UUID.randomUUID().toString()); outStream = fs.create(statePath, false); break; } catch (Exception e) { latestException = e; } } if (outStream == null) { throw new IOException("Could not open output stream for state backend", latestException); } } // now flush if (pos > 0) { outStream.write(writeBuffer, 0, pos); pos = 0; } } } /** * If the stream is only closed, we remove the produced file (cleanup through the auto close * feature, for example). This method throws no exception if the deletion fails, but only * logs the error. */ @Override public void close() { if (!closed) { closed = true; if (outStream != null) { try { outStream.close(); fs.delete(statePath, false); // attempt to delete the parent (will fail and be ignored if the parent has more files) try { fs.delete(basePath, false); } catch (IOException ignored) {} } catch (Exception e) { LOG.warn("Cannot delete closed and discarded state stream for " + statePath, e); } } } } @Override public StreamStateHandle closeAndGetHandle() throws IOException { synchronized (this) { if (!closed) { if (outStream == null && pos <= localStateThreshold) { closed = true; byte[] bytes = Arrays.copyOf(writeBuffer, pos); return new ByteStreamStateHandle(bytes); } else { flush(); outStream.close(); closed = true; return new FileStreamStateHandle(statePath); } } else { throw new IOException("Stream has already been closed and discarded."); } } } /** * Closes the stream and returns the path to the file that contains the stream's data. * @return The path to the file that contains the stream's data. * @throws IOException Thrown if the stream cannot be successfully closed. */ public Path closeAndGetPath() throws IOException { synchronized (this) { if (!closed) { closed = true; flush(); outStream.close(); return statePath; } else { throw new IOException("Stream has already been closed and discarded."); } } } } }