All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.csiro.pathling.spark.Spark Maven / Gradle / Ivy

There is a newer version: 7.0.1
Show newest version
/*
 * Copyright 2023 Commonwealth Scientific and Industrial Research
 * Organisation (CSIRO) ABN 41 687 119 230.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package au.csiro.pathling.spark;

import static java.util.Objects.requireNonNull;

import au.csiro.pathling.async.SparkListener;
import au.csiro.pathling.config.ServerConfiguration;
import java.util.Arrays;
import java.util.List;
import java.util.Objects;
import java.util.Optional;
import java.util.function.Consumer;
import javax.annotation.Nonnull;
import lombok.extern.slf4j.Slf4j;
import org.apache.spark.sql.SparkSession;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.context.annotation.Bean;
import org.springframework.context.annotation.Profile;
import org.springframework.core.env.AbstractEnvironment;
import org.springframework.core.env.EnumerablePropertySource;
import org.springframework.core.env.Environment;
import org.springframework.core.env.MutablePropertySources;
import org.springframework.core.env.PropertyResolver;
import org.springframework.stereotype.Component;

/**
 * Provides an Apache Spark session for use by the Pathling server.
 *
 * @author John Grimes
 */
@Component
@Profile({"core", "spark"})
@Slf4j
public class Spark {

  /**
   * @param configuration a {@link ServerConfiguration} object containing the parameters to use in
   * the creation
   * @param environment Spring {@link Environment} from which to harvest Spark configuration
   * @param sparkListener a {@link SparkListener} that is used to monitor progress of jobs
   * @param sparkConfigurers a list of {@link SparkConfigurer} that should use to configure spark
   * session
   * @return A shiny new {@link SparkSession}
   */
  @Bean(destroyMethod = "stop")
  @ConditionalOnMissingBean
  @Nonnull
  public static SparkSession build(@Nonnull final ServerConfiguration configuration,
      @Nonnull final Environment environment,
      @Nonnull final Optional sparkListener,
      @Nonnull final List sparkConfigurers) {
    log.debug("Creating Spark session");

    // Pass through Spark configuration.
    resolveThirdPartyConfiguration(environment, List.of("spark."),
        property -> System.setProperty(property,
            requireNonNull(environment.getProperty(property))));

    final SparkSession spark = SparkSession.builder()
        .appName(configuration.getSpark().getAppName())
        .getOrCreate();
    sparkListener.ifPresent(l -> spark.sparkContext().addSparkListener(l));

    // Configure user defined strategy and functions.
    for (final SparkConfigurer configurer : sparkConfigurers) {
      configurer.configure(spark);
    }

    // Pass through Hadoop AWS configuration.
    resolveThirdPartyConfiguration(environment, List.of("fs.s3a."),
        property -> spark.sparkContext().hadoopConfiguration().set(property,
            requireNonNull(environment.getProperty(property))));

    return spark;
  }

  private static void resolveThirdPartyConfiguration(@Nonnull final PropertyResolver resolver,
      @Nonnull final List prefixes, @Nonnull final Consumer setter) {
    // This goes through the properties within the Spring configuration and invokes the provided 
    // setter function for each property that matches one of the supplied prefixes.
    final MutablePropertySources propertySources = ((AbstractEnvironment) resolver)
        .getPropertySources();
    propertySources.stream()
        .filter(propertySource -> propertySource instanceof EnumerablePropertySource)
        .flatMap(propertySource -> Arrays
            .stream(((EnumerablePropertySource) propertySource).getPropertyNames()))
        .filter(property -> prefixes.stream().anyMatch(property::startsWith))
        .forEach(setter);
  }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy