org.apache.oodt.cas.pge.util.XmlHelper Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cas-pge Show documentation
Show all versions of cas-pge Show documentation
Allows data processing jobs not written in conformance with the
PCS PGE (Production Generation Executive) interface to be run within the PCS.
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.oodt.cas.pge.util;
//OODT static imports
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.ARGS_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.CMD_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.CREATE_BEFORE_EXEC_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.CUSTOM_METADATA_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.DIR_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.DIR_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.DYN_INPUT_FILES_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.ENV_REPLACE_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.ENV_REPLACE_NO_RECUR_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.EXE_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILE_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILE_STAGING_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILE_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FILES_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.FORCE_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.IMPORT_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.KEYREF_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.KEY_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.KEY_GEN_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.METADATA_KEY_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.METADATA_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.MET_FILE_WRITER_CLASS_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAME_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAMING_EXPR_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.NAMESPACE_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.OUTPUT_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.PATH_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.RENAMING_CONV_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.REGEX_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.SHELL_TYPE_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.SPLIT_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.STAGE_FILES_TAG;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.VAL_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.WORKFLOW_MET_ATTR;
import static org.apache.oodt.cas.pge.config.PgeConfigMetKeys.WRITER_CLASS_ATTR;
import static org.apache.oodt.cas.pge.metadata.PgeTaskMetKeys.QUERY_FILE_MANAGER_URL;
//JDK imports
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.net.MalformedURLException;
import java.net.URL;
import java.util.Arrays;
import java.util.List;
//OODT imports
import org.apache.oodt.cas.filemgr.system.XmlRpcFileManagerClient;
import org.apache.oodt.cas.filemgr.util.QueryUtils;
import org.apache.oodt.cas.filemgr.util.SqlParser;
import org.apache.oodt.cas.metadata.Metadata;
import org.apache.oodt.cas.metadata.filenaming.PathUtilsNamingConvention;
import org.apache.oodt.cas.metadata.util.PathUtils;
import org.apache.oodt.cas.pge.config.DynamicConfigFile;
import org.apache.oodt.cas.pge.config.OutputDir;
import org.apache.oodt.cas.pge.config.RegExprOutputFiles;
import org.apache.oodt.commons.xml.XMLUtils;
//DOM imports
import org.w3c.dom.DOMException;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
//Google imports
import com.google.common.base.Splitter;
import com.google.common.base.Strings;
import com.google.common.collect.Lists;
/**
* Help class with method for parsing XMLFilePgeConfigBuilder config XML file.
*
* @author bfoster (Brian Foster)
* @author mattmann (Chris Mattmann)
*/
public class XmlHelper {
private XmlHelper() {
}
public static Element getRootElement(String xmlFilePath)
throws FileNotFoundException {
return XMLUtils.getDocumentRoot(
new FileInputStream(new File(xmlFilePath)))
.getDocumentElement();
}
public static List> getImports(Element elem,
Metadata metadata) throws Exception {
List> imports = Lists.newArrayList();
NodeList nodeList = elem.getElementsByTagName(IMPORT_TAG);
for (int i = 0; i < nodeList.getLength(); i++) {
Element importElem = (Element) nodeList.item(i);
String namespace = getNamespace(importElem, metadata);
String file = getFile(importElem, metadata);
imports.add(new Pair(namespace, file));
}
return imports;
}
public static String getNamespace(Element elem, Metadata metadata)
throws Exception {
String namespace = elem.getAttribute(NAMESPACE_ATTR);
if (!Strings.isNullOrEmpty(namespace)) {
return fillIn(namespace, metadata, false);
} else {
return null;
}
}
public static String getFile(Element elem, Metadata metadata)
throws Exception {
String file = elem.getAttribute(FILE_ATTR);
if (!Strings.isNullOrEmpty(file)) {
return fillIn(file, metadata, false);
} else {
return null;
}
}
public static Element getCustomMetadataElement(Element root)
throws Exception {
NodeList nodes = root.getElementsByTagName(CUSTOM_METADATA_TAG);
if (nodes.getLength() == 0) {
return null;
} else if (nodes.getLength() == 1) {
return (Element) nodes.item(0);
} else {
throw new Exception("Found more than one '" + CUSTOM_METADATA_TAG
+ "' element");
}
}
public static List getMetadataElements(Element customMetadataElem) {
NodeList metElemsNodeList = customMetadataElem
.getElementsByTagName(METADATA_TAG);
List metElems = Lists.newArrayList();
for (int i = 0; i < metElemsNodeList.getLength(); i++) {
metElems.add((Element) metElemsNodeList.item(i));
}
return metElems;
}
public static String getMetadataKey(Element metElem, Metadata metadata)
throws Exception {
String key = metElem.getAttribute(KEY_ATTR);
// no key attr, so check for key_gen attr.
if (Strings.isNullOrEmpty(key)) {
key = fillIn(metElem.getAttribute(KEY_GEN_ATTR), metadata);
}
// if still no key value, then fail.
if (Strings.isNullOrEmpty(key)) {
throw new Exception("Must specify either metadata attr '"
+ KEY_ATTR + "' or '" + KEY_GEN_ATTR + "'");
}
// else success!
return key;
}
public static boolean isEnvReplaceNoRecur(Element elem, Metadata metadata)
throws Exception {
String isEnvReplaceNoRecur = elem
.getAttribute(ENV_REPLACE_NO_RECUR_ATTR);
if (Strings.isNullOrEmpty(isEnvReplaceNoRecur)) {
return false;
} else {
return isEnvReplaceNoRecur.trim().toLowerCase().equals("true");
}
}
public static boolean isEnvReplace(Element elem, Metadata metadata)
throws Exception {
String isEnvReplace = elem.getAttribute(ENV_REPLACE_ATTR);
if (Strings.isNullOrEmpty(isEnvReplace)) {
return true;
} else {
return !isEnvReplace.trim().toLowerCase().equals("false");
}
}
public static boolean isMultiValue(Element elem, Metadata metadata)
throws Exception {
return Boolean.parseBoolean(fillIn(elem.getAttribute(SPLIT_ATTR),
metadata));
}
public static List getMetadataValues(Element elem, Metadata metadata)
throws Exception {
List values = Lists.newArrayList();
// Read val attr.
String value = elem.getAttribute(VAL_ATTR);
// Check if val tag was not specified see if value was given as element
// text.
if (Strings.isNullOrEmpty(value)) {
value = elem.getTextContent();
}
// If value was found.
if (!Strings.isNullOrEmpty(value)) {
// Is multi-value so split up value.
if (isMultiValue(elem, metadata)) {
for (String v : Splitter.on(",").split(value)) {
// Check for envReplace and perform met replacement on value
// if set.
if (isEnvReplaceNoRecur(elem, metadata)) {
values.add(fillIn(v, metadata, false));
} else if (isEnvReplace(elem, metadata)) {
values.add(fillIn(v, metadata));
}
}
// Is scalar
} else {
// Check for envReplace and perform met replacement on value if
// set.
if (isEnvReplaceNoRecur(elem, metadata)) {
value = fillIn(value, metadata, false);
} else if (isEnvReplace(elem, metadata)) {
value = fillIn(value, metadata);
}
values.add(value);
}
}
return values;
}
public static String getMetadataKeyRef(Element elem, Metadata metadata)
throws Exception {
String keyRef = elem.getAttribute(KEYREF_ATTR);
if (!Strings.isNullOrEmpty(keyRef)) {
// Check for envReplace and perform met replacement on value if set.
if (isEnvReplaceNoRecur(elem, metadata)) {
keyRef = fillIn(keyRef, metadata, false);
} else if (isEnvReplace(elem, metadata)) {
keyRef = fillIn(keyRef, metadata);
}
return keyRef;
} else {
return null;
}
}
public static boolean isWorkflowMetKey(Element elem, Metadata metadata)
throws Exception {
return Boolean.parseBoolean(fillIn(
elem.getAttribute(WORKFLOW_MET_ATTR), metadata, false));
}
public static String getPath(Element elem, Metadata metadata)
throws Exception {
return fillIn(elem.getAttribute(PATH_ATTR), metadata, false);
}
public static String getWriter(Element elem, Metadata metadata)
throws Exception {
return fillIn(elem.getAttribute(WRITER_CLASS_ATTR), metadata, false);
}
public static List getArgs(Element elem, Metadata metadata)
throws Exception {
List args = Lists.newArrayList();
for (String arg : Splitter.on(",").split(elem.getAttribute(ARGS_ATTR))) {
args.add(fillIn(arg, metadata, false));
}
if (args.size() == 1 && Strings.isNullOrEmpty(args.get(0))) {
return Lists.newArrayList();
} else {
return args;
}
}
public static List getDynamicConfigFiles(Element elem,
Metadata metadata) throws Exception {
List dynamicConfigFiles = Lists.newArrayList();
NodeList nodeList = elem.getElementsByTagName(DYN_INPUT_FILES_TAG);
// Check if dynInput element exists.
if (nodeList.getLength() > 0) {
Element dynamicConfigFilesElem = (Element) nodeList.item(0);
nodeList = dynamicConfigFilesElem.getElementsByTagName(FILE_TAG);
// Load each dynamic input file information.
for (int i = 0; i < nodeList.getLength(); i++) {
Element fileElem = (Element) nodeList.item(i);
String path = getPath(fileElem, metadata);
String writer = getWriter(fileElem, metadata);
List args = getArgs(fileElem, metadata);
dynamicConfigFiles.add(new DynamicConfigFile(path, writer, args
.toArray(new String[0])));
}
}
return dynamicConfigFiles;
}
public static Element getExe(Element elem) throws Exception {
NodeList nodeList = elem.getElementsByTagName(EXE_TAG);
if (nodeList.getLength() > 1) {
throw new Exception("Can only specify '" + EXE_TAG + "' once!");
} else if (nodeList.getLength() == 1) {
return (Element) nodeList.item(0);
} else {
return null;
}
}
public static String getDir(Element elem, Metadata metadata)
throws MalformedURLException, Exception {
return fillIn(elem.getAttribute(DIR_ATTR), metadata);
}
public static String getShellType(Element elem, Metadata metadata)
throws MalformedURLException, Exception {
return fillIn(elem.getAttribute(SHELL_TYPE_ATTR), metadata);
}
public static List getExeCmds(Element elem, Metadata metadata)
throws MalformedURLException, DOMException, Exception {
List exeCmds = Lists.newArrayList();
NodeList nodeList = elem.getElementsByTagName(CMD_TAG);
for (int i = 0; i < nodeList.getLength(); i++) {
Element cmdElem = (Element) nodeList.item(i);
String exeCmd = cmdElem.getTextContent();
if (isEnvReplaceNoRecur(cmdElem, metadata)) {
exeCmd = fillIn(exeCmd, metadata, false);
} else if (isEnvReplace(cmdElem, metadata)) {
exeCmd = fillIn(exeCmd, metadata);
}
exeCmds.add(exeCmd);
}
return exeCmds;
}
public static Element getFileStaging(Element elem) throws Exception {
NodeList nodeList = elem.getElementsByTagName(FILE_STAGING_TAG);
if (nodeList.getLength() > 1) {
throw new Exception("Can only specify '" + FILE_STAGING_TAG
+ "' once!");
} else if (nodeList.getLength() == 1) {
return (Element) nodeList.item(0);
} else {
return null;
}
}
public static boolean isForceStage(Element elem, Metadata metadata)
throws Exception {
return Boolean.parseBoolean(fillIn(elem.getAttribute(FORCE_ATTR),
metadata));
}
public static String getFileStagingMetadataKey(Element elem,
Metadata metadata) throws Exception {
return fillIn(elem.getAttribute(METADATA_KEY_ATTR), metadata);
}
public static List getStageFilesMetKeys(Element elem,
Metadata metadata) throws Exception {
List metKeys = Lists.newArrayList();
NodeList nodeList = elem.getElementsByTagName(STAGE_FILES_TAG);
for (int i = 0; i < nodeList.getLength(); i++) {
Element stageFilesElem = (Element) nodeList.item(i);
metKeys.add(getFileStagingMetadataKey(stageFilesElem, metadata));
}
return metKeys;
}
public static Element getOutput(Element elem) throws Exception {
NodeList nodeList = elem.getElementsByTagName(OUTPUT_TAG);
if (nodeList.getLength() > 1) {
throw new Exception("Can only specify '" + OUTPUT_TAG + "' once!");
} else if (nodeList.getLength() == 1) {
return (Element) nodeList.item(0);
} else {
return null;
}
}
public static boolean isCreateBeforeExe(Element elem, Metadata metadata)
throws Exception {
return Boolean.parseBoolean(fillIn(
elem.getAttribute(CREATE_BEFORE_EXEC_ATTR), metadata));
}
public static List getOuputDirs(Element elem, Metadata metadata)
throws Exception {
List outputDirs = Lists.newArrayList();
NodeList nodeList = elem.getElementsByTagName(DIR_TAG);
for (int i = 0; i < nodeList.getLength(); i++) {
Element outputDirElem = (Element) nodeList.item(i);
String path = getPath(outputDirElem, metadata);
boolean createBeforeExe = isCreateBeforeExe(outputDirElem, metadata);
OutputDir outputDir = new OutputDir(path, createBeforeExe);
getRegExpOutputFiles(outputDirElem, metadata, outputDir);
outputDirs.add(outputDir);
}
return outputDirs;
}
public static void getRegExpOutputFiles(Element elem, Metadata metadata,
OutputDir outputDir) throws Exception {
NodeList fileList = elem.getElementsByTagName(FILES_TAG);
for (int j = 0; j < fileList.getLength(); j++) {
Element fileElement = (Element) fileList.item(j);
String outputFile = fileElement.getAttribute(REGEX_ATTR);
if (outputFile.equals("")){
outputFile = fillIn(fileElement.getAttribute(NAME_ATTR),
metadata);
}
PathUtilsNamingConvention renamingConvention = null;
NodeList renamingConvNodes = fileElement
.getElementsByTagName(RENAMING_CONV_TAG);
if (renamingConvNodes.getLength() > 0) {
Element renamingElement = (Element) renamingConvNodes.item(0);
String namingExpr = renamingElement
.getAttribute(NAMING_EXPR_ATTR);
if (renamingElement.getAttribute(ENV_REPLACE_ATTR)
.toLowerCase().equals("true")){
namingExpr = fillIn(namingExpr, metadata, false);
}
else if (!renamingElement.getAttribute(ENV_REPLACE_ATTR)
.toLowerCase().equals("false")){
namingExpr = fillIn(namingExpr, metadata);
}
renamingConvention = new PathUtilsNamingConvention();
renamingConvention.setNamingConv(namingExpr);
NodeList metadataNodes = renamingElement
.getElementsByTagName(METADATA_TAG);
for (int k = 0; k < metadataNodes.getLength(); k++) {
renamingConvention.addTmpReplaceMet(((Element) metadataNodes
.item(k)).getAttribute(KEY_ATTR), Arrays
.asList(((Element) metadataNodes.item(k))
.getAttribute(VAL_ATTR).split(",")));
}
}
outputDir
.addRegExprOutputFiles(new RegExprOutputFiles(outputFile,
fillIn(fileElement
.getAttribute(MET_FILE_WRITER_CLASS_ATTR),
metadata), renamingConvention, (Object[]) fillIn(
fileElement.getAttribute(ARGS_ATTR),
metadata).split(",")));
}
}
public static String fillIn(String value, Metadata inputMetadata)
throws Exception {
return fillIn(value, inputMetadata, true);
}
public static String fillIn(String value, Metadata inputMetadata,
boolean envReplaceRecur) throws Exception {
try {
while ((value = PathUtils
.doDynamicReplacement(value, inputMetadata)).contains("[")
&& envReplaceRecur)
;
if (value.toUpperCase().matches(
"^\\s*SQL\\s*\\(.*\\)\\s*\\{.*\\}\\s*$"))
value = QueryUtils
.getQueryResultsAsString(new XmlRpcFileManagerClient(
new URL(inputMetadata
.getMetadata(QUERY_FILE_MANAGER_URL
.getName())))
.complexQuery(SqlParser
.parseSqlQueryMethod(value)));
return value;
} catch (Exception e) {
throw new Exception("Failed to parse value: " + value, e);
}
}
}