![JAR search and dependency download from the Maven repository](/logo.png)
azkaban.project.DirectoryYamlFlowLoader Maven / Gradle / Ivy
/*
* Copyright 2017 LinkedIn Corp.
*
* Licensed under the Apache License, Version 2.0 (the “License”); you may not
* use this file except in compliance with the License. You may obtain a copy of
* the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an “AS IS” BASIS, WITHOUT
* WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
* License for the specific language governing permissions and limitations under
* the License.
*/
package azkaban.project;
import azkaban.Constants;
import azkaban.flow.ConditionOnJobStatus;
import azkaban.flow.Edge;
import azkaban.flow.Flow;
import azkaban.flow.FlowProps;
import azkaban.flow.Node;
import azkaban.project.FlowLoaderUtils.DirFilter;
import azkaban.project.FlowLoaderUtils.SuffixFilter;
import azkaban.project.validator.ValidationReport;
import azkaban.utils.Props;
import com.google.common.collect.ImmutableList;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Loads yaml files to flows from project directory.
*/
public class DirectoryYamlFlowLoader implements FlowLoader {
// Pattern to match job variables in condition expressions: ${jobName:variable}
public static final Pattern CONDITION_VARIABLE_REPLACEMENT_PATTERN = Pattern
.compile("\\$\\{([^:{}]+):([^:{}]+)\\}");
// Pattern to match conditionOnJobStatus macros, e.g. one_success, all_done
public static final Pattern CONDITION_ON_JOB_STATUS_PATTERN =
Pattern.compile("(?i)\\b(" + StringUtils.join(ConditionOnJobStatus.values(), "|") + ")\\b");
private static final Logger logger = LoggerFactory.getLogger(DirectoryYamlFlowLoader.class);
// Pattern to match a number or a string, e.g. 1234, "hello", 'foo'
private static final Pattern DIGIT_STRING_PATTERN = Pattern.compile("\\d+|'.*'|\".*\"");
// Valid operators in condition expressions: &&, ||, ==, !=, >, >=, <, <=
private static final String VALID_CONDITION_OPERATORS = "&&|\\|\\||==|!=|>|>=|<|<=";
private final Props props;
private final Set errors = new HashSet<>();
private final Map flowMap = new HashMap<>();
private final Map> edgeMap = new HashMap<>();
private final Map jobPropsMap = new HashMap<>();
/**
* Creates a new DirectoryYamlFlowLoader.
*
* @param props Properties to add.
*/
public DirectoryYamlFlowLoader(final Props props) {
this.props = props;
}
/**
* Returns the flow map constructed from the loaded flows.
*
* @return Map of flow name to Flow.
*/
@Override
public Map getFlowMap() {
return this.flowMap;
}
/**
* Returns errors caught when loading flows.
*
* @return Set of error strings.
*/
@Override
public Set getErrors() {
return this.errors;
}
/**
* Returns the edge map constructed from the loaded flows.
*
* @return Map of flow name to all its Edges.
*/
public Map> getEdgeMap() {
return this.edgeMap;
}
/**
* Loads all project flows from the directory.
*
* @param project The project.
* @param projectDir The directory to load flows from.
* @return the validation report.
*/
@Override
public ValidationReport loadProjectFlow(final Project project, final File projectDir) {
convertYamlFiles(projectDir);
FlowLoaderUtils.checkJobProperties(project.getId(), this.props, this.jobPropsMap, this.errors);
return FlowLoaderUtils.generateFlowLoaderReport(this.errors);
}
private void convertYamlFiles(final File projectDir) {
// Todo jamiesjc: convert project yaml file.
for (final File file : projectDir.listFiles(new SuffixFilter(Constants.FLOW_FILE_SUFFIX))) {
final NodeBeanLoader loader = new NodeBeanLoader();
try {
final NodeBean nodeBean = loader.load(file);
if (!loader.validate(nodeBean)) {
this.errors.add("Failed to validate nodeBean for " + file.getName()
+ ". Duplicate nodes found or dependency undefined.");
} else {
final AzkabanFlow azkabanFlow = (AzkabanFlow) loader.toAzkabanNode(nodeBean);
if (this.flowMap.containsKey(azkabanFlow.getName())) {
this.errors.add("Duplicate flows found in the project with name " + azkabanFlow
.getName());
} else {
final Flow flow = convertAzkabanFlowToFlow(azkabanFlow, azkabanFlow.getName(), file);
this.flowMap.put(flow.getId(), flow);
}
}
} catch (final Exception e) {
this.errors.add("Error loading flow yaml file " + file.getName() + ":"
+ e.getMessage());
}
}
for (final File file : projectDir.listFiles(new DirFilter())) {
convertYamlFiles(file);
}
}
private Flow convertAzkabanFlowToFlow(final AzkabanFlow azkabanFlow, final String flowName,
final File flowFile) {
final Flow flow = new Flow(flowName);
flow.setAzkabanFlowVersion(Constants.AZKABAN_FLOW_VERSION_2_0);
final Props props = azkabanFlow.getProps();
FlowLoaderUtils.addEmailPropsToFlow(flow, props);
props.setSource(flowFile.getName());
flow.addAllFlowProperties(ImmutableList.of(new FlowProps(props)));
// Convert azkabanNodes to nodes inside the flow.
azkabanFlow.getNodes().values().stream()
.map(n -> convertAzkabanNodeToNode(n, flowName, flowFile, azkabanFlow))
.forEach(n -> flow.addNode(n));
// Add edges for the flow.
buildFlowEdges(azkabanFlow, flowName);
if (this.edgeMap.containsKey(flowName)) {
flow.addAllEdges(this.edgeMap.get(flowName));
}
// Todo jamiesjc: deprecate startNodes, endNodes and numLevels, and remove below method finally.
// Blow method will construct startNodes, endNodes and numLevels for the flow.
flow.initialize();
return flow;
}
private Node convertAzkabanNodeToNode(final AzkabanNode azkabanNode, final String flowName,
final File flowFile, final AzkabanFlow azkabanFlow) {
final Node node = new Node(azkabanNode.getName());
node.setType(azkabanNode.getType());
validateCondition(node, azkabanNode, azkabanFlow);
node.setCondition(azkabanNode.getCondition());
node.setPropsSource(flowFile.getName());
node.setJobSource(flowFile.getName());
if (azkabanNode.getType().equals(Constants.FLOW_NODE_TYPE)) {
final String embeddedFlowId = flowName + Constants.PATH_DELIMITER + node.getId();
node.setEmbeddedFlowId(embeddedFlowId);
final Flow flowNode = convertAzkabanFlowToFlow((AzkabanFlow) azkabanNode, embeddedFlowId,
flowFile);
flowNode.setEmbeddedFlow(true);
flowNode.setCondition(node.getCondition());
this.flowMap.put(flowNode.getId(), flowNode);
}
this.jobPropsMap
.put(flowName + Constants.PATH_DELIMITER + node.getId(), azkabanNode.getProps());
return node;
}
private void buildFlowEdges(final AzkabanFlow azkabanFlow, final String flowName) {
// Recursive stack to record searched nodes. Used for detecting dependency cycles.
final HashSet recStack = new HashSet<>();
// Nodes that have already been visited and added edges.
final HashSet visited = new HashSet<>();
for (final AzkabanNode node : azkabanFlow.getNodes().values()) {
addEdges(node, azkabanFlow, flowName, recStack, visited);
}
}
private void addEdges(final AzkabanNode node, final AzkabanFlow azkabanFlow,
final String flowName, final HashSet recStack, final HashSet visited) {
if (!visited.contains(node.getName())) {
recStack.add(node.getName());
visited.add(node.getName());
final List dependsOnList = node.getDependsOn();
for (final String parent : dependsOnList) {
final Edge edge = new Edge(parent, node.getName());
if (!this.edgeMap.containsKey(flowName)) {
this.edgeMap.put(flowName, new ArrayList<>());
}
this.edgeMap.get(flowName).add(edge);
if (recStack.contains(parent)) {
// Cycles found, including self cycle.
edge.setError("Cycles found.");
this.errors.add("Cycles found at " + edge.getId());
} else {
// Valid edge. Continue to process the parent node recursively.
addEdges(azkabanFlow.getNode(parent), azkabanFlow, flowName, recStack, visited);
}
}
recStack.remove(node.getName());
}
}
private void validateCondition(final Node node, final AzkabanNode azkabanNode,
final AzkabanFlow azkabanFlow) {
boolean foundConditionOnJobStatus = false;
final String condition = azkabanNode.getCondition();
if (condition == null) {
return;
}
// First, remove all the whitespaces and parenthesis ().
final String replacedCondition = condition.replaceAll("\\s+|\\(|\\)", "");
// Second, split the condition by operators &&, ||, ==, !=, >, >=, <, <=
final String[] operands = replacedCondition.split(VALID_CONDITION_OPERATORS);
// Third, check whether all the operands are valid: only conditionOnJobStatus macros, numbers,
// strings, and variable substitution ${jobName:param} are allowed.
for (int i = 0; i < operands.length; i++) {
final Matcher matcher = CONDITION_ON_JOB_STATUS_PATTERN.matcher(operands[i]);
if (matcher.matches()) {
this.logger.info("Operand " + operands[i] + " is a condition on job status.");
if (foundConditionOnJobStatus) {
this.errors.add("Invalid condition for " + node.getId()
+ ": cannot combine more than one conditionOnJobStatus macros.");
}
foundConditionOnJobStatus = true;
node.setConditionOnJobStatus(ConditionOnJobStatus.fromString(matcher.group(1)));
} else {
if (operands[i].startsWith("!")) {
// Remove the operator '!' from the operand.
operands[i] = operands[i].substring(1);
}
if (operands[i].equals("")) {
this.errors
.add("Invalid condition for " + node.getId() + ": operand is an empty string.");
} else if (!DIGIT_STRING_PATTERN.matcher(operands[i]).matches()) {
validateVariableSubstitution(operands[i], azkabanNode, azkabanFlow);
}
}
}
}
private void validateVariableSubstitution(final String operand, final AzkabanNode azkabanNode,
final AzkabanFlow azkabanFlow) {
final Matcher matcher = CONDITION_VARIABLE_REPLACEMENT_PATTERN.matcher(operand);
if (matcher.matches()) {
final String jobName = matcher.group(1);
final AzkabanNode conditionNode = azkabanFlow.getNode(jobName);
if (conditionNode == null) {
this.errors.add("Invalid condition for " + azkabanNode.getName() + ": " + jobName
+ " doesn't exist in the flow.");
}
// If a job defines condition on its descendant nodes, then that condition is invalid.
else if (isDescendantNode(conditionNode, azkabanNode, azkabanFlow)) {
this.errors.add("Invalid condition for " + azkabanNode.getName()
+ ": should not define condition on its descendant node " + jobName + ".");
}
} else {
this.errors.add("Invalid condition for " + azkabanNode.getName()
+ ": cannot resolve the condition. Please check the syntax for supported conditions.");
}
}
private boolean isDescendantNode(final AzkabanNode current, final AzkabanNode target,
final AzkabanFlow azkabanFlow) {
// Check if the current node is a descendant of the target node.
if (current == null || target == null) {
return false;
} else if (current.getDependsOn() == null) {
return false;
} else if (current.getDependsOn().contains(target.getName())) {
return true;
} else {
for (final String nodeName : current.getDependsOn()) {
if (isDescendantNode(azkabanFlow.getNode(nodeName), target, azkabanFlow)) {
return true;
}
}
}
return false;
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy