All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.flink.fs.gs.GSFileSystemFactory Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.flink.fs.gs;

import org.apache.flink.configuration.Configuration;
import org.apache.flink.core.fs.FileSystem;
import org.apache.flink.core.fs.FileSystemFactory;
import org.apache.flink.fs.gs.utils.ConfigUtils;
import org.apache.flink.util.Preconditions;

import com.google.auth.oauth2.GoogleCredentials;
import com.google.cloud.hadoop.fs.gcs.GoogleHadoopFileSystem;
import com.google.cloud.storage.Storage;
import com.google.cloud.storage.StorageOptions;
import org.apache.hadoop.fs.Path;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import javax.annotation.Nullable;

import java.io.FileInputStream;
import java.io.IOException;
import java.net.URI;
import java.util.Optional;

/**
 * Implementation of the Flink {@link org.apache.flink.core.fs.FileSystemFactory} interface for
 * Google Storage.
 */
public class GSFileSystemFactory implements FileSystemFactory {

    private static final Logger LOGGER = LoggerFactory.getLogger(GSFileSystemFactory.class);

    /** The scheme for the Google Storage file system. */
    public static final String SCHEME = "gs";

    /**
     * The Hadoop, formed by combining system Hadoop config with properties defined in Flink config.
     */
    @Nullable private org.apache.hadoop.conf.Configuration hadoopConfig;

    /** The options used for GSFileSystem and RecoverableWriter. */
    @Nullable private GSFileSystemOptions fileSystemOptions;

    /**
     * Though it isn't documented as clearly as one might expect, the methods on this object are
     * threadsafe, so we can safely share a single instance among all file system instances.
     *
     * 

Issue that discusses pending docs is here: * https://github.com/googleapis/google-cloud-java/issues/1238 * *

StackOverflow discussion: * https://stackoverflow.com/questions/54516284/google-cloud-storage-java-client-pooling */ @Nullable private Storage storage; /** Constructs the Google Storage file system factory. */ public GSFileSystemFactory() { LOGGER.info("Creating GSFileSystemFactory"); } @Override public void configure(Configuration flinkConfig) { LOGGER.info("Configuring GSFileSystemFactory with Flink configuration {}", flinkConfig); Preconditions.checkNotNull(flinkConfig); ConfigUtils.ConfigContext configContext = new RuntimeConfigContext(); // load Hadoop config this.hadoopConfig = ConfigUtils.getHadoopConfiguration(flinkConfig, configContext); LOGGER.info( "Using Hadoop configuration {}", ConfigUtils.stringifyHadoopConfig(hadoopConfig)); // construct file-system options this.fileSystemOptions = new GSFileSystemOptions(flinkConfig); LOGGER.info("Using file system options {}", fileSystemOptions); // get storage credentials and construct Storage instance Optional credentials = ConfigUtils.getStorageCredentials(hadoopConfig, configContext); StorageOptions.Builder storageOptionsBuilder = StorageOptions.newBuilder(); credentials.ifPresent(storageOptionsBuilder::setCredentials); this.storage = storageOptionsBuilder.build().getService(); } @Override public String getScheme() { return SCHEME; } @Override public FileSystem create(URI fsUri) throws IOException { LOGGER.info("Creating GSFileSystem for uri {} with options {}", fsUri, fileSystemOptions); Preconditions.checkNotNull(fsUri); // create the Google Hadoop file system GoogleHadoopFileSystem googleHadoopFileSystem = new GoogleHadoopFileSystem(); try { googleHadoopFileSystem.initialize(fsUri, hadoopConfig); } catch (IOException ex) { throw new IOException("Failed to initialize GoogleHadoopFileSystem", ex); } // create the file system return new GSFileSystem(googleHadoopFileSystem, storage, fileSystemOptions); } /** Config context implementation used at runtime. */ private static class RuntimeConfigContext implements ConfigUtils.ConfigContext { @Override public Optional getenv(String name) { return Optional.ofNullable(System.getenv(name)); } @Override public org.apache.hadoop.conf.Configuration loadHadoopConfigFromDir(String configDir) { org.apache.hadoop.conf.Configuration hadoopConfig = new org.apache.hadoop.conf.Configuration(); hadoopConfig.addResource(new Path(configDir, "core-default.xml")); hadoopConfig.addResource(new Path(configDir, "core-site.xml")); hadoopConfig.reloadConfiguration(); return hadoopConfig; } @Override public GoogleCredentials loadStorageCredentialsFromFile(String credentialsPath) { try (FileInputStream credentialsStream = new FileInputStream(credentialsPath)) { return GoogleCredentials.fromStream(credentialsStream); } catch (IOException ex) { throw new RuntimeException(ex); } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy