org.apache.flink.runtime.blob.BlobUtils Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.runtime.blob;
import com.google.common.io.BaseEncoding;
import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.ConfigConstants;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.HighAvailabilityOptions;
import org.apache.flink.configuration.IllegalConfigurationException;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.jobmanager.HighAvailabilityMode;
import org.apache.flink.util.FileUtils;
import org.apache.flink.util.StringUtils;
import org.slf4j.Logger;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.Socket;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.UUID;
import static org.apache.flink.util.Preconditions.checkNotNull;
import static org.apache.flink.util.StringUtils.isNullOrWhitespaceOnly;
/**
* Utility class to work with blob data.
*/
public class BlobUtils {
/**
* Algorithm to be used for calculating the BLOB keys.
*/
private static final String HASHING_ALGORITHM = "SHA-1";
/**
* The prefix of all BLOB files stored by the BLOB server.
*/
static final String BLOB_FILE_PREFIX = "blob_";
/**
* The prefix of all job-specific directories created by the BLOB server.
*/
static final String JOB_DIR_PREFIX = "job_";
/**
* The default character set to translate between characters and bytes.
*/
static final Charset DEFAULT_CHARSET = ConfigConstants.DEFAULT_CHARSET;
/**
* Creates a BlobStore based on the parameters set in the configuration.
*
* @param config
* configuration to use
*
* @return a (distributed) blob store for high availability
*
* @throws IOException
* thrown if the (distributed) file storage cannot be created
*/
public static BlobStoreService createBlobStoreFromConfig(Configuration config) throws IOException {
HighAvailabilityMode highAvailabilityMode = HighAvailabilityMode.fromConfig(config);
if (highAvailabilityMode == HighAvailabilityMode.NONE) {
return new VoidBlobStore();
} else if (highAvailabilityMode == HighAvailabilityMode.ZOOKEEPER) {
return createFileSystemBlobStore(config);
} else {
throw new IllegalConfigurationException("Unexpected high availability mode '" + highAvailabilityMode + "'.");
}
}
private static BlobStoreService createFileSystemBlobStore(Configuration configuration) throws IOException {
String storagePath = configuration.getValue(
HighAvailabilityOptions.HA_STORAGE_PATH);
if (isNullOrWhitespaceOnly(storagePath)) {
throw new IllegalConfigurationException("Configuration is missing the mandatory parameter: " +
HighAvailabilityOptions.HA_STORAGE_PATH);
}
final Path path;
try {
path = new Path(storagePath);
} catch (Exception e) {
throw new IOException("Invalid path for highly available storage (" +
HighAvailabilityOptions.HA_STORAGE_PATH.key() + ')', e);
}
final FileSystem fileSystem;
try {
fileSystem = path.getFileSystem();
} catch (Exception e) {
throw new IOException("Could not create FileSystem for highly available storage (" +
HighAvailabilityOptions.HA_STORAGE_PATH.key() + ')', e);
}
final String clusterId =
configuration.getValue(HighAvailabilityOptions.HA_CLUSTER_ID);
storagePath += "/" + clusterId;
return new FileSystemBlobStore(fileSystem, storagePath);
}
/**
* Creates a storage directory for a blob service.
*
* @return the storage directory used by a BLOB service
*
* @throws IOException
* thrown if the (local or distributed) file storage cannot be created or
* is not usable
*/
static File initStorageDirectory(String storageDirectory) throws
IOException {
File baseDir;
if (StringUtils.isNullOrWhitespaceOnly(storageDirectory)) {
baseDir = new File(System.getProperty("java.io.tmpdir"));
}
else {
baseDir = new File(storageDirectory);
}
File storageDir;
final int MAX_ATTEMPTS = 10;
for(int attempt = 0; attempt < MAX_ATTEMPTS; attempt++) {
storageDir = new File(baseDir, String.format(
"blobStore-%s", UUID.randomUUID().toString()));
// Create the storage dir if it doesn't exist. Only return it when the operation was
// successful.
if (!storageDir.exists() && storageDir.mkdirs()) {
return storageDir;
}
}
// max attempts exceeded to find a storage directory
throw new IOException("Could not create storage directory for BLOB store in '" + baseDir + "'.");
}
/**
* Returns the BLOB service's directory for incoming files. The directory is created if it did
* not exist so far.
*
* @return the BLOB server's directory for incoming files
*/
static File getIncomingDirectory(File storageDir) {
final File incomingDir = new File(storageDir, "incoming");
if (!incomingDir.mkdirs() && !incomingDir.exists()) {
throw new RuntimeException("Cannot create directory for incoming files " + incomingDir.getAbsolutePath());
}
return incomingDir;
}
/**
* Returns the BLOB service's directory for cached files. The directory is created if it did
* not exist so far.
*
* @return the BLOB server's directory for cached files
*/
private static File getCacheDirectory(File storageDir) {
final File cacheDirectory = new File(storageDir, "cache");
if (!cacheDirectory.mkdirs() && !cacheDirectory.exists()) {
throw new RuntimeException("Could not create cache directory '" + cacheDirectory.getAbsolutePath() + "'.");
}
return cacheDirectory;
}
/**
* Returns the (designated) physical storage location of the BLOB with the given key.
*
* @param key
* the key identifying the BLOB
* @return the (designated) physical storage location of the BLOB
*/
static File getStorageLocation(File storageDir, BlobKey key) {
return new File(getCacheDirectory(storageDir), BLOB_FILE_PREFIX + key.toString());
}
/**
* Returns the (designated) physical storage location of the BLOB with the given job ID and key.
*
* @param jobID
* the ID of the job the BLOB belongs to
* @param key
* the key of the BLOB
* @return the (designated) physical storage location of the BLOB with the given job ID and key
*/
static File getStorageLocation(File storageDir, JobID jobID, String key) {
return new File(getJobDirectory(storageDir, jobID), BLOB_FILE_PREFIX + encodeKey(key));
}
/**
* Returns the BLOB server's storage directory for BLOBs belonging to the job with the given ID.
*
* @param jobID
* the ID of the job to return the storage directory for
* @return the storage directory for BLOBs belonging to the job with the given ID
*/
private static File getJobDirectory(File storageDir, JobID jobID) {
final File jobDirectory = new File(storageDir, JOB_DIR_PREFIX + jobID.toString());
if (!jobDirectory.exists() && !jobDirectory.mkdirs()) {
throw new RuntimeException("Could not create jobId directory '" + jobDirectory.getAbsolutePath() + "'.");
}
return jobDirectory;
}
/**
* Translates the user's key for a BLOB into the internal name used by the BLOB server
*
* @param key
* the user's key for a BLOB
* @return the internal name for the BLOB as used by the BLOB server
*/
static String encodeKey(String key) {
return BaseEncoding.base64().encode(key.getBytes(DEFAULT_CHARSET));
}
/**
* Deletes the storage directory for the job with the given ID.
*
* @param jobID
* jobID whose directory shall be deleted
*/
static void deleteJobDirectory(File storageDir, JobID jobID) throws IOException {
File directory = getJobDirectory(storageDir, jobID);
FileUtils.deleteDirectory(directory);
}
/**
* Creates a new instance of the message digest to use for the BLOB key computation.
*
* @return a new instance of the message digest to use for the BLOB key computation
*/
static MessageDigest createMessageDigest() {
try {
return MessageDigest.getInstance(HASHING_ALGORITHM);
} catch (NoSuchAlgorithmException e) {
throw new RuntimeException("Cannot instantiate the message digest algorithm " + HASHING_ALGORITHM, e);
}
}
/**
* Adds a shutdown hook to the JVM and returns the Thread, which has been registered.
*/
static Thread addShutdownHook(final BlobService service, final Logger logger) {
checkNotNull(service);
checkNotNull(logger);
final Thread shutdownHook = new Thread(new Runnable() {
@Override
public void run() {
try {
service.close();
}
catch (Throwable t) {
logger.error("Error during shutdown of blob service via JVM shutdown hook.", t);
}
}
});
try {
// Add JVM shutdown hook to call shutdown of service
Runtime.getRuntime().addShutdownHook(shutdownHook);
return shutdownHook;
}
catch (IllegalStateException e) {
// JVM is already shutting down. no need to do our work
return null;
}
catch (Throwable t) {
logger.error("Cannot register shutdown hook that cleanly terminates the BLOB service.");
return null;
}
}
/**
* Auxiliary method to write the length of an upcoming data chunk to an
* output stream.
*
* @param length
* the length of the upcoming data chunk in bytes
* @param outputStream
* the output stream to write the length to
* @throws IOException
* thrown if an I/O error occurs while writing to the output
* stream
*/
static void writeLength(int length, OutputStream outputStream) throws IOException {
byte[] buf = new byte[4];
buf[0] = (byte) (length & 0xff);
buf[1] = (byte) ((length >> 8) & 0xff);
buf[2] = (byte) ((length >> 16) & 0xff);
buf[3] = (byte) ((length >> 24) & 0xff);
outputStream.write(buf, 0, 4);
}
/**
* Auxiliary method to read the length of an upcoming data chunk from an
* input stream.
*
* @param inputStream
* the input stream to read the length from
* @return the length of the upcoming data chunk in bytes
* @throws IOException
* thrown if an I/O error occurs while reading from the input
* stream
*/
static int readLength(InputStream inputStream) throws IOException {
byte[] buf = new byte[4];
int bytesRead = 0;
while (bytesRead < 4) {
final int read = inputStream.read(buf, bytesRead, 4 - bytesRead);
if (read < 0) {
throw new EOFException("Read an incomplete length");
}
bytesRead += read;
}
bytesRead = buf[0] & 0xff;
bytesRead |= (buf[1] & 0xff) << 8;
bytesRead |= (buf[2] & 0xff) << 16;
bytesRead |= (buf[3] & 0xff) << 24;
return bytesRead;
}
/**
* Auxiliary method to read a particular number of bytes from an input stream. This method blocks until the
* requested number of bytes have been read from the stream. If the stream cannot offer enough data, an
* {@link EOFException} is thrown.
*
* @param inputStream The input stream to read the data from.
* @param buf The buffer to store the read data.
* @param off The offset inside the buffer.
* @param len The number of bytes to read from the stream.
* @param type The name of the type, to throw a good error message in case of not enough data.
* @throws IOException
* Thrown if I/O error occurs while reading from the stream or the stream cannot offer enough data.
*/
static void readFully(InputStream inputStream, byte[] buf, int off, int len, String type) throws IOException {
int bytesRead = 0;
while (bytesRead < len) {
final int read = inputStream.read(buf, off + bytesRead, len
- bytesRead);
if (read < 0) {
throw new EOFException("Received an incomplete " + type);
}
bytesRead += read;
}
}
static void closeSilently(Socket socket, Logger LOG) {
if (socket != null) {
try {
socket.close();
} catch (Throwable t) {
if (LOG.isDebugEnabled()) {
LOG.debug("Error while closing resource after BLOB transfer.", t);
}
}
}
}
/**
* Returns the path for the given blob key.
*
* The returned path can be used with the state backend for recovery purposes.
*
*
This follows the same scheme as {@link #getStorageLocation(File, BlobKey)}
* and is used for HA.
*/
static String getRecoveryPath(String basePath, BlobKey blobKey) {
// format: $base/cache/blob_$key
return String.format("%s/cache/%s%s", basePath, BLOB_FILE_PREFIX, blobKey.toString());
}
/**
* Returns the path for the given job ID and key.
*
*
The returned path can be used with the state backend for recovery purposes.
*
*
This follows the same scheme as {@link #getStorageLocation(File, JobID, String)}.
*/
static String getRecoveryPath(String basePath, JobID jobId, String key) {
// format: $base/job_$id/blob_$key
return String.format("%s/%s%s/%s%s", basePath, JOB_DIR_PREFIX, jobId.toString(),
BLOB_FILE_PREFIX, encodeKey(key));
}
/**
* Returns the path for the given job ID.
*
*
The returned path can be used with the state backend for recovery purposes.
*/
static String getRecoveryPath(String basePath, JobID jobId) {
return String.format("%s/%s%s", basePath, JOB_DIR_PREFIX, jobId.toString());
}
/**
* Private constructor to prevent instantiation.
*/
private BlobUtils() {
throw new RuntimeException();
}
}