
gobblin.azkaban.AzkabanJobLauncher Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of gobblin-azkaban Show documentation
Show all versions of gobblin-azkaban Show documentation
Gobblin Ingestion Framework
/*
* Copyright (C) 2014-2016 LinkedIn Corp. All rights reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use
* this file except in compliance with the License. You may obtain a copy of the
* License at http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software distributed
* under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
* CONDITIONS OF ANY KIND, either express or implied.
*/
package gobblin.azkaban;
import java.io.File;
import java.io.IOException;
import java.util.List;
import java.util.Properties;
import java.util.Set;
import java.util.UUID;
import javax.annotation.Nullable;
import azkaban.jobExecutor.AbstractJob;
import org.apache.hadoop.conf.Configuration;
import org.apache.log4j.Logger;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
import com.google.common.base.Preconditions;
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.io.Closer;
import gobblin.configuration.ConfigurationKeys;
import gobblin.configuration.State;
import gobblin.metrics.GobblinMetrics;
import gobblin.metrics.Tag;
import gobblin.runtime.JobException;
import gobblin.runtime.JobLauncher;
import gobblin.runtime.JobLauncherFactory;
import gobblin.runtime.app.ApplicationException;
import gobblin.runtime.app.ApplicationLauncher;
import gobblin.runtime.app.ServiceBasedAppLauncher;
import gobblin.runtime.listeners.EmailNotificationJobListener;
import gobblin.runtime.listeners.JobListener;
import gobblin.util.TimeRangeChecker;
import gobblin.util.hadoop.TokenUtils;
import static org.apache.hadoop.security.UserGroupInformation.HADOOP_TOKEN_FILE_LOCATION;
/**
* A utility class for launching a Gobblin Hadoop MR job through Azkaban.
*
*
* By default, this class will use the {@link gobblin.runtime.mapreduce.MRJobLauncher} to launch and run
* the Gobblin job unless a different job launcher type is explicitly specified in the job configuration
* using {@link ConfigurationKeys#JOB_LAUNCHER_TYPE_KEY}.
*
*
*
* If the Azkaban job type is not contained in {@link #JOB_TYPES_WITH_AUTOMATIC_TOKEN}, the launcher assumes that
* the job does not get authentication tokens from Azkaban and it will negotiate them itself.
* See {@link TokenUtils#getHadoopTokens} for more information.
*
*
* @author Yinan Li
*/
public class AzkabanJobLauncher extends AbstractJob implements ApplicationLauncher, JobLauncher {
private static final Logger LOG = Logger.getLogger(AzkabanJobLauncher.class);
private static final String HADOOP_FS_DEFAULT_NAME = "fs.default.name";
private static final String AZKABAN_LINK_JOBEXEC_URL = "azkaban.link.jobexec.url";
private static final String MAPREDUCE_JOB_CREDENTIALS_BINARY = "mapreduce.job.credentials.binary";
private static final String HADOOP_JAVA_JOB = "hadoopJava";
private static final String JAVA_JOB = "java";
private static final String GOBBLIN_JOB = "gobblin";
private static final Set JOB_TYPES_WITH_AUTOMATIC_TOKEN = Sets.newHashSet(HADOOP_JAVA_JOB, JAVA_JOB, GOBBLIN_JOB);
private final Closer closer = Closer.create();
private final JobLauncher jobLauncher;
private final JobListener jobListener = new EmailNotificationJobListener();
private final Properties props;
private final ApplicationLauncher applicationLauncher;
public AzkabanJobLauncher(String jobId, Properties props) throws Exception {
super(jobId, LOG);
this.props = new Properties();
this.props.putAll(props);
Configuration conf = new Configuration();
String fsUri = conf.get(HADOOP_FS_DEFAULT_NAME);
if (!Strings.isNullOrEmpty(fsUri)) {
if (!this.props.containsKey(ConfigurationKeys.FS_URI_KEY)) {
this.props.setProperty(ConfigurationKeys.FS_URI_KEY, fsUri);
}
if (!this.props.containsKey(ConfigurationKeys.STATE_STORE_FS_URI_KEY)) {
this.props.setProperty(ConfigurationKeys.STATE_STORE_FS_URI_KEY, fsUri);
}
}
// Set the job tracking URL to point to the Azkaban job execution link URL
this.props.setProperty(ConfigurationKeys.JOB_TRACKING_URL_KEY,
Strings.nullToEmpty(conf.get(AZKABAN_LINK_JOBEXEC_URL)));
if (props.containsKey(JOB_TYPE) && JOB_TYPES_WITH_AUTOMATIC_TOKEN.contains(props.getProperty(JOB_TYPE))) {
// Necessary for compatibility with Azkaban's hadoopJava job type
// http://azkaban.github.io/azkaban/docs/2.5/#hadoopjava-type
LOG.info(
"Job type " + props.getProperty(JOB_TYPE) + " provides Hadoop tokens automatically. Using provided tokens.");
if (System.getenv(HADOOP_TOKEN_FILE_LOCATION) != null) {
this.props.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, System.getenv(HADOOP_TOKEN_FILE_LOCATION));
}
} else {
// see javadoc for more information
LOG.info(String.format("Job type %s does not provide Hadoop tokens. Negotiating Hadoop tokens.",
props.getProperty(JOB_TYPE)));
File tokenFile = TokenUtils.getHadoopTokens(new State(props));
System.setProperty(HADOOP_TOKEN_FILE_LOCATION, tokenFile.getAbsolutePath());
System.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, tokenFile.getAbsolutePath());
this.props.setProperty(MAPREDUCE_JOB_CREDENTIALS_BINARY, tokenFile.getAbsolutePath());
this.props.setProperty("env." + HADOOP_TOKEN_FILE_LOCATION, tokenFile.getAbsolutePath());
}
List> tags = Lists.newArrayList();
tags.addAll(Tag.fromMap(AzkabanTags.getAzkabanTags()));
GobblinMetrics.addCustomTagsToProperties(this.props, tags);
// If the job launcher type is not specified in the job configuration,
// override the default to use the MAPREDUCE launcher.
if (!this.props.containsKey(ConfigurationKeys.JOB_LAUNCHER_TYPE_KEY)) {
this.props.setProperty(ConfigurationKeys.JOB_LAUNCHER_TYPE_KEY,
JobLauncherFactory.JobLauncherType.MAPREDUCE.toString());
}
// Create a JobLauncher instance depending on the configuration. The same properties object is
// used for both system and job configuration properties because Azkaban puts configuration
// properties in the .job file and in the .properties file into the same Properties object.
this.jobLauncher = this.closer.register(JobLauncherFactory.newJobLauncher(this.props, this.props));
// Since Java classes cannot extend multiple classes and Azkaban jobs must extend AbstractJob, we must use composition
// verses extending ServiceBasedAppLauncher
this.applicationLauncher =
this.closer.register(new ServiceBasedAppLauncher(this.props, "Azkaban-" + UUID.randomUUID()));
}
@Override
public void run() throws Exception {
if (isCurrentTimeInRange()) {
try {
start();
launchJob(this.jobListener);
} finally {
try {
stop();
} finally {
close();
}
}
}
}
@Override
public void cancel() throws Exception {
try {
cancelJob(this.jobListener);
} finally {
try {
stop();
} finally {
close();
}
}
}
@Override
public void start() throws ApplicationException {
this.applicationLauncher.start();
}
@Override
public void stop() throws ApplicationException {
this.applicationLauncher.stop();
}
@Override
public void launchJob(@Nullable JobListener jobListener) throws JobException {
this.jobLauncher.launchJob(jobListener);
}
@Override
public void cancelJob(@Nullable JobListener jobListener) throws JobException {
this.jobLauncher.cancelJob(jobListener);
}
@Override
public void close() throws IOException {
this.closer.close();
}
/**
* Uses the properties {@link ConfigurationKeys#AZKABAN_EXECUTION_DAYS_LIST},
* {@link ConfigurationKeys#AZKABAN_EXECUTION_TIME_RANGE}, and
* {@link TimeRangeChecker#isTimeInRange(List, String, String, DateTime)} to determine if the current job should
* continue its execution based on the extra scheduled parameters defined in the config.
*
* @return true if this job should be launched, false otherwise.
*/
private boolean isCurrentTimeInRange() {
Splitter splitter = Splitter.on(",").omitEmptyStrings().trimResults();
if (this.props.contains(ConfigurationKeys.AZKABAN_EXECUTION_DAYS_LIST)
&& this.props.contains(ConfigurationKeys.AZKABAN_EXECUTION_TIME_RANGE)) {
List executionTimeRange =
splitter.splitToList(this.props.getProperty(ConfigurationKeys.AZKABAN_EXECUTION_TIME_RANGE));
List executionDays =
splitter.splitToList(this.props.getProperty(ConfigurationKeys.AZKABAN_EXECUTION_DAYS_LIST));
Preconditions.checkArgument(executionTimeRange.size() == 2, "The property "
+ ConfigurationKeys.AZKABAN_EXECUTION_DAYS_LIST + " should be a comma separated list of two entries");
return TimeRangeChecker.isTimeInRange(executionDays, executionTimeRange.get(0), executionTimeRange.get(1),
new DateTime(DateTimeZone.forID(ConfigurationKeys.PST_TIMEZONE_NAME)));
}
return true;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy