All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.runtime.blob.BlobClient Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.runtime.blob;

import org.apache.flink.api.common.JobID;
import org.apache.flink.configuration.BlobServerOptions;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.configuration.SecurityOptions;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.Path;
import org.apache.flink.runtime.net.SSLUtils;
import org.apache.flink.util.IOUtils;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.Closeable;
import java.io.EOFException;
import java.io.File;
import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.net.InetSocketAddress;
import java.net.Socket;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;

import static org.apache.flink.runtime.blob.BlobKey.BlobType.PERMANENT_BLOB;
import static org.apache.flink.runtime.blob.BlobServerProtocol.BUFFER_SIZE;
import static org.apache.flink.runtime.blob.BlobServerProtocol.GET_OPERATION;
import static org.apache.flink.runtime.blob.BlobServerProtocol.JOB_RELATED_CONTENT;
import static org.apache.flink.runtime.blob.BlobServerProtocol.JOB_UNRELATED_CONTENT;
import static org.apache.flink.runtime.blob.BlobServerProtocol.RETURN_ERROR;
import static org.apache.flink.runtime.blob.BlobServerProtocol.RETURN_OKAY;
import static org.apache.flink.runtime.blob.BlobUtils.readExceptionFromStream;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;

/**
 * The BLOB client can communicate with the BLOB server and either upload (PUT), download (GET), or
 * delete (DELETE) BLOBs.
 */
public final class BlobClient implements Closeable {

    private static final Logger LOG = LoggerFactory.getLogger(BlobClient.class);

    /** The socket connection to the BLOB server. */
    private final Socket socket;

    /**
     * Instantiates a new BLOB client.
     *
     * @param serverAddress the network address of the BLOB server
     * @param clientConfig additional configuration like SSL parameters required to connect to the
     *     blob server
     * @throws IOException thrown if the connection to the BLOB server could not be established
     */
    public BlobClient(InetSocketAddress serverAddress, Configuration clientConfig)
            throws IOException {
        Socket socket = null;

        try {
            // create an SSL socket if configured
            if (SecurityOptions.isInternalSSLEnabled(clientConfig)
                    && clientConfig.get(BlobServerOptions.SSL_ENABLED)) {
                LOG.info("Using ssl connection to the blob server");

                socket = SSLUtils.createSSLClientSocketFactory(clientConfig).createSocket();
            } else {
                socket = new Socket();
            }

            // Establish the socket using the hostname and port. This avoids a potential issue where
            // the
            // InetSocketAddress can cache a failure in hostname resolution forever.
            socket.connect(
                    new InetSocketAddress(serverAddress.getHostName(), serverAddress.getPort()),
                    clientConfig.get(BlobServerOptions.CONNECT_TIMEOUT));
            socket.setSoTimeout(clientConfig.get(BlobServerOptions.SO_TIMEOUT));
        } catch (Exception e) {
            BlobUtils.closeSilently(socket, LOG);
            throw new IOException("Could not connect to BlobServer at address " + serverAddress, e);
        }

        this.socket = socket;
    }

    /**
     * Downloads the given BLOB from the given server and stores its contents to a (local) file.
     *
     * 

Transient BLOB files are deleted after a successful copy of the server's data into the * given localJarFile. * * @param jobId job ID the BLOB belongs to or null if job-unrelated * @param blobKey BLOB key * @param localJarFile the local file to write to * @param serverAddress address of the server to download from * @param blobClientConfig client configuration for the connection * @param numFetchRetries number of retries before failing * @throws IOException if an I/O error occurs during the download */ static void downloadFromBlobServer( @Nullable JobID jobId, BlobKey blobKey, File localJarFile, InetSocketAddress serverAddress, Configuration blobClientConfig, int numFetchRetries) throws IOException { final byte[] buf = new byte[BUFFER_SIZE]; LOG.info("Downloading {}/{} from {}", jobId, blobKey, serverAddress); // loop over retries int attempt = 0; while (true) { try (final BlobClient bc = new BlobClient(serverAddress, blobClientConfig); final InputStream is = bc.getInternal(jobId, blobKey); final OutputStream os = new FileOutputStream(localJarFile)) { while (true) { final int read = is.read(buf); if (read < 0) { break; } os.write(buf, 0, read); } return; } catch (Throwable t) { String message = "Failed to fetch BLOB " + jobId + "/" + blobKey + " from " + serverAddress + " and store it under " + localJarFile.getAbsolutePath(); if (attempt < numFetchRetries) { if (LOG.isDebugEnabled()) { LOG.error(message + " Retrying...", t); } else { LOG.error(message + " Retrying..."); } } else { LOG.error(message + " No retries left.", t); throw new IOException(message, t); } // retry ++attempt; LOG.info( "Downloading {}/{} from {} (retry {})", jobId, blobKey, serverAddress, attempt); } } // end loop over retries } @Override public void close() throws IOException { this.socket.close(); } public boolean isClosed() { return this.socket.isClosed(); } public boolean isConnected() { return socket.isConnected(); } // -------------------------------------------------------------------------------------------- // GET // -------------------------------------------------------------------------------------------- /** * Downloads the BLOB identified by the given BLOB key from the BLOB server. * * @param jobId ID of the job this blob belongs to (or null if job-unrelated) * @param blobKey blob key associated with the requested file * @return an input stream to read the retrieved data from * @throws FileNotFoundException if there is no such file; * @throws IOException if an I/O error occurs during the download */ InputStream getInternal(@Nullable JobID jobId, BlobKey blobKey) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException( "BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } if (LOG.isDebugEnabled()) { LOG.debug("GET BLOB {}/{} from {}.", jobId, blobKey, socket.getLocalSocketAddress()); } try { OutputStream os = this.socket.getOutputStream(); InputStream is = this.socket.getInputStream(); // Send GET header sendGetHeader(os, jobId, blobKey); receiveAndCheckGetResponse(is); return new BlobInputStream(is, blobKey, os); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("GET operation failed: " + t.getMessage(), t); } } /** * Constructs and writes the header data for a GET operation to the given output stream. * * @param outputStream the output stream to write the header data to * @param jobId ID of the job this blob belongs to (or null if job-unrelated) * @param blobKey blob key associated with the requested file * @throws IOException thrown if an I/O error occurs while writing the header data to the output * stream */ private static void sendGetHeader( OutputStream outputStream, @Nullable JobID jobId, BlobKey blobKey) throws IOException { checkNotNull(blobKey); checkArgument( jobId != null || blobKey instanceof TransientBlobKey, "permanent BLOBs must be job-related"); // Signal type of operation outputStream.write(GET_OPERATION); // Send job ID and key if (jobId == null) { outputStream.write(JOB_UNRELATED_CONTENT); } else { outputStream.write(JOB_RELATED_CONTENT); outputStream.write(jobId.getBytes()); } blobKey.writeToOutputStream(outputStream); } /** * Reads the response from the input stream and throws in case of errors. * * @param is stream to read from * @throws IOException if the response is an error or reading the response failed */ private static void receiveAndCheckGetResponse(InputStream is) throws IOException { int response = is.read(); if (response < 0) { throw new EOFException("Premature end of response"); } if (response == RETURN_ERROR) { Throwable cause = readExceptionFromStream(is); throw new IOException("Server side error: " + cause.getMessage(), cause); } else if (response != RETURN_OKAY) { throw new IOException("Unrecognized response"); } } // -------------------------------------------------------------------------------------------- // PUT // -------------------------------------------------------------------------------------------- /** * Uploads data from the given byte buffer to the BLOB server. * * @param jobId the ID of the job the BLOB belongs to (or null if job-unrelated) * @param value the buffer to read the data from * @param offset the read offset within the buffer * @param len the number of bytes to read from the buffer * @param blobType whether the BLOB should become permanent or transient * @return the computed BLOB key of the uploaded BLOB * @throws IOException thrown if an I/O error occurs while uploading the data to the BLOB server */ BlobKey putBuffer( @Nullable JobID jobId, byte[] value, int offset, int len, BlobKey.BlobType blobType) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException( "BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } checkNotNull(value); if (LOG.isDebugEnabled()) { LOG.debug( "PUT BLOB buffer (" + len + " bytes) to " + socket.getLocalSocketAddress() + "."); } try (BlobOutputStream os = new BlobOutputStream(jobId, blobType, socket)) { os.write(value, offset, len); // Receive blob key and compare return os.finish(); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("PUT operation failed: " + t.getMessage(), t); } } /** * Uploads data from the given input stream to the BLOB server. * * @param jobId the ID of the job the BLOB belongs to (or null if job-unrelated) * @param inputStream the input stream to read the data from * @param blobType whether the BLOB should become permanent or transient * @return the computed BLOB key of the uploaded BLOB * @throws IOException thrown if an I/O error occurs while uploading the data to the BLOB server */ BlobKey putInputStream( @Nullable JobID jobId, InputStream inputStream, BlobKey.BlobType blobType) throws IOException { if (this.socket.isClosed()) { throw new IllegalStateException( "BLOB Client is not connected. " + "Client has been shut down or encountered an error before."); } checkNotNull(inputStream); if (LOG.isDebugEnabled()) { LOG.debug("PUT BLOB stream to {}.", socket.getLocalSocketAddress()); } try (BlobOutputStream os = new BlobOutputStream(jobId, blobType, socket)) { IOUtils.copyBytes(inputStream, os, BUFFER_SIZE, false); return os.finish(); } catch (Throwable t) { BlobUtils.closeSilently(socket, LOG); throw new IOException("PUT operation failed: " + t.getMessage(), t); } } /** * Uploads the JAR files to the {@link PermanentBlobService} of the {@link BlobServer} at the * given address with HA as configured. * * @param serverAddress Server address of the {@link BlobServer} * @param clientConfig Any additional configuration for the blob client * @param jobId ID of the job this blob belongs to (or null if job-unrelated) * @param files List of files to upload * @throws IOException if the upload fails */ public static List uploadFiles( InetSocketAddress serverAddress, Configuration clientConfig, JobID jobId, List files) throws IOException { checkNotNull(jobId); if (files.isEmpty()) { return Collections.emptyList(); } else { List blobKeys = new ArrayList<>(); try (BlobClient blobClient = new BlobClient(serverAddress, clientConfig)) { for (final Path file : files) { final PermanentBlobKey key = blobClient.uploadFile(jobId, file); blobKeys.add(key); } } return blobKeys; } } /** * Uploads a single file to the {@link PermanentBlobService} of the given {@link BlobServer}. * * @param jobId ID of the job this blob belongs to (or null if job-unrelated) * @param file file to upload * @throws IOException if the upload fails */ public PermanentBlobKey uploadFile(JobID jobId, Path file) throws IOException { final FileSystem fs = file.getFileSystem(); try (InputStream is = fs.open(file)) { return (PermanentBlobKey) putInputStream(jobId, is, PERMANENT_BLOB); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy