All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.blob.BlobClient Maven / Gradle / Ivy

There is a newer version: 1.13.6
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.blob;

import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.BlobServerOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FSDataInputStream;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.net.SSLUtils;
import org.apache.flink.util.InstantiationUtil;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;
import javax.net.ssl.SSLContext;
import javax.net.ssl.SSLParameters;
import javax.net.ssl.SSLSocket;

import java.io.Closeable;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.security.MessageDigest;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;

import static org.apache.flink.runtime.blob.BlobKey.BlobType.PERMANENT_BLOB;
import static org.apache.flink.runtime.blob.BlobServerProtocol.BUFFER_SIZE;
import static org.apache.flink.runtime.blob.BlobServerProtocol.GET_OPERATION;
import static org.apache.flink.runtime.blob.BlobServerProtocol.JOB_RELATED_CONTENT;
import static org.apache.flink.runtime.blob.BlobServerProtocol.JOB_UNRELATED_CONTENT;
import static org.apache.flink.runtime.blob.BlobServerProtocol.PUT_OPERATION;
import static org.apache.flink.runtime.blob.BlobServerProtocol.RETURN_ERROR;
import static org.apache.flink.runtime.blob.BlobServerProtocol.RETURN_OKAY;
import static org.apache.flink.runtime.blob.BlobUtils.readFully;
import static org.apache.flink.runtime.blob.BlobUtils.readLength;
import static org.apache.flink.runtime.blob.BlobUtils.writeLength;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * The BLOB client can communicate with the BLOB server and either upload (PUT), download (GET),
 * or delete (DELETE) BLOBs.
 */
public final class BlobClient implements Closeable {

	private static final Logger LOG = LoggerFactory.getLogger(BlobClient.class);

	/** The socket connection to the BLOB server. */
	private Socket socket;

	/**
	 * Instantiates a new BLOB client.
	 *
	 * @param serverAddress
	 *        the network address of the BLOB server
	 * @param clientConfig
	 *        additional configuration like SSL parameters required to connect to the blob server
	 *
	 * @throws IOException
	 *         thrown if the connection to the BLOB server could not be established
	 */
	public BlobClient(InetSocketAddress serverAddress, Configuration clientConfig) throws IOException {

		try {
			// Check if ssl is enabled
			SSLContext clientSSLContext = null;
			if (clientConfig != null &&
				clientConfig.getBoolean(BlobServerOptions.SSL_ENABLED)) {

				clientSSLContext = SSLUtils.createSSLClientContext(clientConfig);
			}

			if (clientSSLContext != null) {

				LOG.info("Using ssl connection to the blob server");

				SSLSocket sslSocket = (SSLSocket) clientSSLContext.getSocketFactory().createSocket(
					serverAddress.getAddress(),
					serverAddress.getPort());

				// Enable hostname verification for remote SSL connections
				if (!serverAddress.getAddress().isLoopbackAddress()) {
					SSLParameters newSSLParameters = sslSocket.getSSLParameters();
					SSLUtils.setSSLVerifyHostname(clientConfig, newSSLParameters);
					sslSocket.setSSLParameters(newSSLParameters);
				}
				this.socket = sslSocket;
			} else {
				this.socket = new Socket();
				this.socket.connect(serverAddress);
			}

		}
		catch (Exception e) {
			BlobUtils.closeSilently(socket, LOG);
			throw new IOException("Could not connect to BlobServer at address " + serverAddress, e);
		}
	}

	/**
	 * Downloads the given BLOB from the given server and stores its contents to a (local) file.
	 *
	 * 

Transient BLOB files are deleted after a successful copy of the server's data into the * given localJarFile. * * @param jobId * job ID the BLOB belongs to or null if job-unrelated * @param blobKey * BLOB key * @param localJarFile * the local file to write to * @param serverAddress * address of the server to download from * @param blobClientConfig * client configuration for the connection * @param numFetchRetries * number of retries before failing * * @throws IOException * if an I/O error occurs during the download */ static void downloadFromBlobServer( @Nullable JobID jobId, BlobKey blobKey, File localJarFile, InetSocketAddress serverAddress, Configuration blobClientConfig, int numFetchRetries) throws IOException { final byte[] buf = new byte[BUFFER_SIZE]; LOG.info("Downloading {}/{} from {}", jobId, blobKey, serverAddress); // loop over retries int attempt = 0; while (true) { try ( final BlobClient bc = new BlobClient(serverAddress, blobClientConfig); final InputStream is = bc.getInternal(jobId, blobKey); final OutputStream os = new FileOutputStream(localJarFile) ) { while (true) { final int read = is.read(buf); if (read < 0) { break; } os.write(buf, 0, read); } return; } catch (Throwable t) { String message = "Failed to fetch BLOB " + jobId + "/" + blobKey + " from " + serverAddress + " and store it under " + localJarFile.getAbsolutePath(); if (attempt < numFetchRetries) { if (LOG.isDebugEnabled()) { LOG.error(message + " Retrying...", t); } else { LOG.error(message + " Retrying..."); } } else { LOG.error(message + " No retries left.", t); throw new IOException(message, t); } // retry ++attempt; LOG.info("Downloading {}/{} from {} (retry {})", jobId, blobKey, serverAddress, attempt); } } // end loop over retries } @Override public void close() throws IOException { this.socket.close(); } public boolean isClosed() { return this.socket.isClosed(); } // -------------------------------------------------------------------------------------------- // GET // -------------------------------------------------------------------------------------------- /** * Downloads the BLOB identified by the given BLOB key from the BLOB server. * * @param jobId * ID of the job this blob belongs to (or null if job-unrelated) * @param blobKey * blob key associated with the requested file * * @return an input stream to read the retrieved data from * * @throws FileNotFoundException * if there is no such file; * @throws IOException * if an I/O error occurs during the download */ InputStream getInternal(@Nullable JobID jobId, BlobKey blobKey) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException("BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } if (LOG.isDebugEnabled()) { LOG.debug("GET BLOB {}/{} from {}.", jobId, blobKey, socket.getLocalSocketAddress()); } try { OutputStream os = this.socket.getOutputStream(); InputStream is = this.socket.getInputStream(); // Send GET header sendGetHeader(os, jobId, blobKey); receiveAndCheckGetResponse(is); return new BlobInputStream(is, blobKey, os); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("GET operation failed: " + t.getMessage(), t); } } /** * Constructs and writes the header data for a GET operation to the given output stream. * * @param outputStream * the output stream to write the header data to * @param jobId * ID of the job this blob belongs to (or null if job-unrelated) * @param blobKey * blob key associated with the requested file * * @throws IOException * thrown if an I/O error occurs while writing the header data to the output stream */ private static void sendGetHeader( OutputStream outputStream, @Nullable JobID jobId, BlobKey blobKey) throws IOException { checkNotNull(blobKey); checkArgument(jobId != null || blobKey instanceof TransientBlobKey, "permanent BLOBs must be job-related"); // Signal type of operation outputStream.write(GET_OPERATION); // Send job ID and key if (jobId == null) { outputStream.write(JOB_UNRELATED_CONTENT); } else { outputStream.write(JOB_RELATED_CONTENT); outputStream.write(jobId.getBytes()); } blobKey.writeToOutputStream(outputStream); } /** * Reads the response from the input stream and throws in case of errors. * * @param is * stream to read from * * @throws IOException * if the response is an error or reading the response failed */ private static void receiveAndCheckGetResponse(InputStream is) throws IOException { int response = is.read(); if (response < 0) { throw new EOFException("Premature end of response"); } if (response == RETURN_ERROR) { Throwable cause = readExceptionFromStream(is); throw new IOException("Server side error: " + cause.getMessage(), cause); } else if (response != RETURN_OKAY) { throw new IOException("Unrecognized response"); } } // -------------------------------------------------------------------------------------------- // PUT // -------------------------------------------------------------------------------------------- /** * Uploads data from the given byte buffer to the BLOB server. * * @param jobId * the ID of the job the BLOB belongs to (or null if job-unrelated) * @param value * the buffer to read the data from * @param offset * the read offset within the buffer * @param len * the number of bytes to read from the buffer * @param blobType * whether the BLOB should become permanent or transient * * @return the computed BLOB key of the uploaded BLOB * * @throws IOException * thrown if an I/O error occurs while uploading the data to the BLOB server */ BlobKey putBuffer( @Nullable JobID jobId, byte[] value, int offset, int len, BlobKey.BlobType blobType) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException("BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } checkNotNull(value); if (LOG.isDebugEnabled()) { LOG.debug("PUT BLOB buffer (" + len + " bytes) to " + socket.getLocalSocketAddress() + "."); } try { final OutputStream os = this.socket.getOutputStream(); final MessageDigest md = BlobUtils.createMessageDigest(); // Send the PUT header sendPutHeader(os, jobId, blobType); // Send the value in iterations of BUFFER_SIZE int remainingBytes = len; while (remainingBytes > 0) { // want a common code path for byte[] and InputStream at the BlobServer // -> since for InputStream we don't know a total size beforehand, send lengths iteratively final int bytesToSend = Math.min(BUFFER_SIZE, remainingBytes); writeLength(bytesToSend, os); os.write(value, offset, bytesToSend); // Update the message digest md.update(value, offset, bytesToSend); remainingBytes -= bytesToSend; offset += bytesToSend; } // send -1 as the stream end writeLength(-1, os); // Receive blob key and compare final InputStream is = this.socket.getInputStream(); return receiveAndCheckPutResponse(is, md, blobType); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("PUT operation failed: " + t.getMessage(), t); } } /** * Uploads data from the given input stream to the BLOB server. * * @param jobId * the ID of the job the BLOB belongs to (or null if job-unrelated) * @param inputStream * the input stream to read the data from * @param blobType * whether the BLOB should become permanent or transient * * @return the computed BLOB key of the uploaded BLOB * * @throws IOException * thrown if an I/O error occurs while uploading the data to the BLOB server */ BlobKey putInputStream(@Nullable JobID jobId, InputStream inputStream, BlobKey.BlobType blobType) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException("BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } checkNotNull(inputStream); if (LOG.isDebugEnabled()) { LOG.debug("PUT BLOB stream to {}.", socket.getLocalSocketAddress()); } try { final OutputStream os = this.socket.getOutputStream(); final MessageDigest md = BlobUtils.createMessageDigest(); final byte[] xferBuf = new byte[BUFFER_SIZE]; // Send the PUT header sendPutHeader(os, jobId, blobType); while (true) { // since we don't know a total size here, send lengths iteratively final int read = inputStream.read(xferBuf); if (read < 0) { // we are done. send a -1 and be done writeLength(-1, os); break; } if (read > 0) { writeLength(read, os); os.write(xferBuf, 0, read); md.update(xferBuf, 0, read); } } // Receive blob key and compare final InputStream is = this.socket.getInputStream(); return receiveAndCheckPutResponse(is, md, blobType); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("PUT operation failed: " + t.getMessage(), t); } } /** * Constructs and writes the header data for a PUT request to the given output stream. * * @param outputStream * the output stream to write the PUT header data to * @param jobId * the ID of job the BLOB belongs to (or null if job-unrelated) * @param blobType * whether the BLOB should become permanent or transient * * @throws IOException * thrown if an I/O error occurs while writing the header data to the output stream */ private static void sendPutHeader( OutputStream outputStream, @Nullable JobID jobId, BlobKey.BlobType blobType) throws IOException { // Signal type of operation outputStream.write(PUT_OPERATION); if (jobId == null) { outputStream.write(JOB_UNRELATED_CONTENT); } else { outputStream.write(JOB_RELATED_CONTENT); outputStream.write(jobId.getBytes()); } outputStream.write(blobType.ordinal()); } /** * Reads the response from the input stream and throws in case of errors. * * @param is * stream to read from * @param md * message digest to check the response against * @param blobType * whether the BLOB should be permanent or transient * * @throws IOException * if the response is an error, the message digest does not match or reading the response * failed */ private static BlobKey receiveAndCheckPutResponse( InputStream is, MessageDigest md, BlobKey.BlobType blobType) throws IOException { int response = is.read(); if (response < 0) { throw new EOFException("Premature end of response"); } else if (response == RETURN_OKAY) { BlobKey remoteKey = BlobKey.readFromInputStream(is); byte[] localHash = md.digest(); if (blobType != remoteKey.getType()) { throw new IOException("Detected data corruption during transfer"); } if (!Arrays.equals(localHash, remoteKey.getHash())) { throw new IOException("Detected data corruption during transfer"); } return remoteKey; } else if (response == RETURN_ERROR) { Throwable cause = readExceptionFromStream(is); throw new IOException("Server side error: " + cause.getMessage(), cause); } else { throw new IOException("Unrecognized response: " + response + '.'); } } /** * Uploads the JAR files to the {@link PermanentBlobService} of the {@link BlobServer} at the * given address with HA as configured. * * @param serverAddress * Server address of the {@link BlobServer} * @param clientConfig * Any additional configuration for the blob client * @param jobId * ID of the job this blob belongs to (or null if job-unrelated) * @param jars * List of JAR files to upload * * @throws IOException * if the upload fails */ public static List uploadJarFiles( InetSocketAddress serverAddress, Configuration clientConfig, JobID jobId, List jars) throws IOException { checkNotNull(jobId); if (jars.isEmpty()) { return Collections.emptyList(); } else { List blobKeys = new ArrayList<>(); try (BlobClient blobClient = new BlobClient(serverAddress, clientConfig)) { for (final Path jar : jars) { final FileSystem fs = jar.getFileSystem(); FSDataInputStream is = null; try { is = fs.open(jar); final PermanentBlobKey key = (PermanentBlobKey) blobClient.putInputStream(jobId, is, PERMANENT_BLOB); blobKeys.add(key); } finally { if (is != null) { is.close(); } } } } return blobKeys; } } // -------------------------------------------------------------------------------------------- // Miscellaneous // -------------------------------------------------------------------------------------------- private static Throwable readExceptionFromStream(InputStream in) throws IOException { int len = readLength(in); byte[] bytes = new byte[len]; readFully(in, bytes, 0, len, "Error message"); try { return (Throwable) InstantiationUtil.deserializeObject(bytes, ClassLoader.getSystemClassLoader()); } catch (ClassNotFoundException e) { // should never occur throw new IOException("Could not transfer error message", e); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy