
com.movielabs.mddflib.util.xml.StructureValidation Maven / Gradle / Ivy
/**
* Copyright (c) 2017 MovieLabs
* Permission is hereby granted, free of charge, to any person obtaining
* a copy of this software and associated documentation files (the
* "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish,
* distribute, sublicense, and/or sell copies of the Software, and to
* permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be
* included in all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
* NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
* LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
* OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
* WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*/
package com.movielabs.mddflib.util.xml;
import java.io.File;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.jdom2.Attribute;
import org.jdom2.Element;
import org.jdom2.Namespace;
import org.jdom2.filter.Filters;
import org.jdom2.xpath.XPathExpression;
import org.jdom2.xpath.XPathFactory;
import com.movielabs.mddf.MddfContext;
import com.movielabs.mddf.MddfContext.FILE_FMT;
import com.movielabs.mddflib.logging.IssueLogger;
import com.movielabs.mddflib.logging.LogEntryFolder;
import com.movielabs.mddflib.logging.LogMgmt;
import com.movielabs.mddflib.logging.LogReference;
import net.sf.json.JSONArray;
import net.sf.json.JSONObject;
/**
* A 'helper' class that supports the validation of an XML file against a set of
* structural requirements not specified via an XSD.
*
* The MDDF specifications define many requirements for specific use cases as
* well as recommended 'best practices'. These requirements and recommendations
* specify relationships between XML elements that are not defined via the XSD.
* In order to support validation they are instead formally specified via a
* JSON-formatted structure definition file. The
* StructureValidation class provides the functions that can interpret
* the requirements and test an XML file for compliance.
*
* Semantics and Syntax:
*
* The semantics of JSON a structure definition is as follows:
*
*
* {
* USAGE:
* {
* "targetPath" : XPATH (optional)
* "constraint" :
* [
* {
"VARIABLE ID": XPATH, (optional)
"min": INTEGER, (optional)
"max": INTEGER, (optional)
"xpath": (XPATH | ARRAY[XPATH]),
"filter" : (optional)
{
"values": ARRAY[ STRING],
"negated" : ("true" | "false") (optional)
}
"severity": ("Fatal" | "Error" | "Warning" | "Notice"),
"msg" : STRING, (optional)
"docRef": STRING (optional)
* }
* ]
* "children": {.... } (optional)
* }
* }
*
*
* where:
*
* - USAGE is a string defining the key used by a validator to retrieve
* the requirements appropriate to a use case.
* - targetPath: indicates the element(s) that provide the evaluation
* context for the constraint xpath when invoking the
* validateDocStructure() method. If not specified, a target element
* must be provided when invoking the validateConstraint() method on a
* target element that has been identified by other means.
* - constraint: one or more structural requirements associated with
* the targeted element.
*
* - VARIABLE-ID: Variables are denoted by a "$" followed by
* an ID (e.g., $FOO) and are assigned a value via a XPath.
* - xpath: defines one or more xpaths relative to the target element
* that, when evaluated, the number of matching elements satisfy the min/max
* cardinality constraints. If multiple xpaths are listed, the total number of
* elements (or attributes) returned when each is separately evaluated must
* satisfy the constraint.
* - filter: supplemental condition applied to results returned when
* XPath is evaluated.
* - min: minimum number of matching objects that should be found
* when evaluating the xpath(s). [OPTIONAL, default is 0]
* - max: maximum number of matching objects that should be found
* when evaluating the xpath(s). [OPTIONAL, default is unlimited]
* - severity: must match one of the LogMgmt.level values
*
* - msg: text to use for a log entry if the constraint is not met.
* If not provided, a generic message is used.
* - docRef: is a value usable by LogReference that will
* indicate any reference material documenting the requirement.
*
*
*
*
*
* XPaths:
*
* An XPATH is defined using the standard XPath syntax with two
* modifications.
*
* Indicating Namespaces:
*
* Namespaces are indicated using a variable with the name of an MDDF schema.
* The appropriate namespace prefixes will be inserted by the software.
* Supported namespaces are:
*
*
* - {avail}
* - {mdmec}
* - {manifest}
* - {md}
*
* For example:
*
*
*
"POEST":
{
"targetPath": ".//{avail}LicenseType[.='POEST']",
"constraint":
[
{
"min": "1",
"max": "1",
"xpath": "../{avail}Term[@termName='SuppressionLiftDate']",
"severity": "Error",
"msg": "One SuppressionLiftDate is required for LicenseType 'POEST'"
}
]
},
"WorkType-Episode":
{
"constraint":
[
{
"docRef": "AVAIL:avail00n",
"min": "1",
"max": "2",
"xpath":
[
"{avail}EpisodeMetadata/{avail}AltIdentifier",
"{avail}EpisodeMetadata/{avail}EditEIDR-URN"
]
}
]
},
*
*
*
* Support for MEC File Usage:
*
* A Manifest file may provide metadata in one of two ways: either
* internally via a <BasicMetadata> element or
* externally via a <ContainerReference> pointing to a MEC
* file. Validating a constraint on Metadata may, therefore, require specifying
* two different XPaths to cover both possible situations.
*
*
* To handle this type of situation, a JSONArray with both paths is used. The
* xpath that is to be applied when a MEC file is used will be prefixed with the
* keyword {$$MEC}. For example:
*
*
*
"$CID": "./{manifest}ContentID",
"max": "0",
"severity": "Error",
"xpath":
[
"//{manifest}BasicMetadata[(descendant::{md}ArtReference) and (@ContentID = {$CID}) ]",
"{$$MEC}//{mdmec}Basic[(descendant::{md}ArtReference) and (@ContentID = {$CID}) ]"
],
*
*
*
*
* Filters:
*
* A 'Filter' may be used to supplement the matching criteria specified by the
* XPaths. This is used when XPath criteria are insufficient, or too unwieldy,
* to fully implement a constraint. Filters are (for now) defined as a set of
* values and an optional 'negated' flag set to true or false.
*
*
* For example, the following filter would identify Audio assets that
* have a ChannelMapping that is inconsistent with the number of actual
* channels:
*
*
*
"MultiChannel":
{
"targetPath": ".//{md}Channels[. < 1]",
"constraint":
[
{
"xpath":
[
"../{md}Encoding/{md}ChannelMapping"
],
"filter":
{
"values":
[
"Mono",
"Left",
"Center",
"Right"
],
"negated": "false"
},
"max": "0",
"severity": "Error",
"msg": "ChannelMapping is not valid for multi-channel Audio"
}
]
}
*
*
* Variables:
*
* Variables are denoted by a "$" followed by an ID (e.g., $FOO) and
* are assigned a value via a XPath. For example:
* "$CID": "./@ContentID"
*
They may be included in the constraint's XPath by using enclosing
* the variable name in curly brackets. For example:
* "xpath":
* [
* "../..//{manifest}Experience[@ExperienceID={$CID}]/{manifest}PictureGroupID"
* ],
*
*
* It is possible that the XPath used to determine a variable's value will
* evaluate to a null. Any constraint XPath that references a null
* variable will be skipped when evaluating the constraint criteria.
*
*
*
* Nested Requirements:
*
* Requirements can be defined in a way that reflects the nested structure of
* the underlying XML. For example, assume the XSD specifies the following
* syntax:
*
*
* - Foo:
*
* - Bar:
*
* - Flavor
* - Weight
* - Calories
*
*
*
*
*
* We wish to specify that if Flavor is present then Calories must also
* be specified. There are two options:
*
* - use a targetPath pointing to Foo and constraints with
* xpath that will resolve when Foo is the root for xpath
* evaluation (e.g., "xpath": "./Foo/Calories")
* - the targetPath points to Foo, followed by a
* child constraint with the targetPath pointing to Bar
* and an xpath that will resolve when Bar is the root (e.g.,
* "xpath": "./Calories")
*
*
* Either option will work but in some situations one or the other may be more
* efficient to evaluate or easier to write.
*
* Usage
*
* Validation modules should determine the appropriate JSON resource file based
* on the type and version of the MDDF file. Requirements may then be retrieved
* and individually checked using the USAGE key or the entire collection may be
* iterated thru.
*
* @author L. Levin, Critical Architectures LLC
*
*/
public class StructureValidation {
private static final String KEY_MEC_REF = "{$$MEC}";
protected IssueLogger logger;
protected String logMsgSrcId;
/**
* @param logger
* @param logMsgSrcId
*/
public StructureValidation(IssueLogger logger, String logMsgSrcId) {
this.logger = logger;
}
/**
* Check to see if the XML satisfies the specified requirement. The
* rootEL may either be the root of an entire document or the root of a
* DOM tree forming a sub-tree within the overall document.
*
* Any errors or warnings detected will be reported via the logger. The return
* status will indicate to caller if an error was detected.
*
*
* @param rootEl
* @param rqmt
* @return
*/
// public boolean validateDocStructure(Element rootEl, JSONObject rqmt) {
// return validateDocStructure(rootEl, rqmt, null);
// }
/**
* Check to see if the XML satisfies the specified requirement. The
* rootEL may either be the root of an entire document or the root of a
* DOM tree forming a sub-tree within the overall document. A requirement's
* constraint may include an XPath that is to be applied to
* the supporting MEC files.
*
* Any errors or warnings detected will be reported via the logger. The return
* status will indicate to caller if an error was detected.
*
*
* @param rootEl
* @param rqmt
* @param primaryfTarget
* @param supportingMECs (may be empty)
* @return
*/
public boolean validateDocStructure(Element rootEl, JSONObject rqmt, MddfTarget primaryfTarget,
Map supportingMECs) {
String targetPath = rqmt.getString("targetPath");
ArrayList rootElList = new ArrayList();
Map contextMap = new HashMap();
boolean MEC_Target = false;
if (targetPath.startsWith(KEY_MEC_REF)) {
/*
* Evaluation of 'targetPath' requires the xPath to be applied to a supporting
* MEC file rather than the primary (i.e., Manifest) file. This is NOT the same
* as evaluating an associated constraint xPath against the MEC. The two are
* Independent of each other.
*/
MEC_Target = true;
targetPath = targetPath.replace(KEY_MEC_REF, "");
if ((supportingMECs != null) && (!supportingMECs.isEmpty())) {
for (MddfTarget targetSrc : supportingMECs.keySet()) {
Element mecRootEl = targetSrc.getXmlDoc().getRootElement();
rootElList.add(mecRootEl);
contextMap.put(mecRootEl, targetSrc);
}
}
} else {
// no need to mod the targetPath.
rootElList.add(rootEl);
contextMap.put(rootEl, primaryfTarget);
}
boolean allOk = true;
for (Element nextDocRoot : rootElList) {
FILE_FMT mddfFmt = MddfContext.identifyMddfFormat(nextDocRoot);
XPathExpression> xpExp = resolveXPath(targetPath, null, mddfFmt);
List targetElList = (List) xpExp.evaluate(nextDocRoot);
boolean isOk = true;
if (rqmt.containsKey("constraint")) {
JSONArray constraintSet = rqmt.getJSONArray("constraint");
/*
* the 'contextEl' provides the focal point for the evaluation of a constraint.
* They are located via the 'targetPath' in each JSON requirement spec. The
* contextEl will be used to resolve any variables as well as for providing the
* context for any error msgs added to the log. It *may* also provide the base
* point for the evaluation of the XPaths in the requirement's 'constraint'.
*/
for (Element nextContextEl : targetElList) {
for (int i = 0; i < constraintSet.size(); i++) {
JSONObject constraint = constraintSet.getJSONObject(i);
/*
* variables are always resolved using the 'nextTargetEl'.
*/
Map varMap = resolveVariables(nextContextEl, constraint);
/*
* the 'contextEl' will be used as the starting point when evaluating the
* constraint's xPath(s). If the 'nextContextEl' is in the main doc (i.e,
* MEC_Target == false) then targetEl = nextContextEl. Otherwise the targetEl
* should be set to the 'rootEl' passed as a calling argument to this method.
*/
Element targetEl;
if (MEC_Target) {
targetEl = rootEl;
} else {
targetEl = nextContextEl;
}
MddfTarget mddfContext = contextMap.get(nextDocRoot);
LogEntryFolder logFolder = null;
if (supportingMECs != null) {
logFolder = supportingMECs.get(mddfContext);
}
isOk = evaluateConstraint(targetEl, constraint, nextContextEl, varMap, logFolder,
supportingMECs) && isOk;
}
}
}
/* are there nested constraints?? */
if (rqmt.containsKey("children")) {
JSONObject embeddedReqmts = rqmt.getJSONObject("children");
for (Element nextTargetEl : targetElList) {
Iterator embeddedKeys = embeddedReqmts.keys();
while (embeddedKeys.hasNext()) {
String key = embeddedKeys.next();
JSONObject childRqmtSpec = embeddedReqmts.getJSONObject(key);
// NOTE: This block of code requires a 'targetPath' be defined
if (childRqmtSpec.has("targetPath")) {
// Recursive descent...
isOk = validateDocStructure(nextTargetEl, childRqmtSpec, primaryfTarget, supportingMECs)
&& isOk;
}
}
}
}
allOk = allOk && isOk;
}
return allOk;
}
/**
* Evaluate a constraint in the context of a specific Element. This means
* descendant Elements are not considered.
*
* Note: This method is exposed as public to support unit
* testing. Use of validateDocStructure() is preferred.
*
* @param target
* @param constraint
* @param contextEl
* @param varMap [OPTIONAL]
* @param targetFile
* @param supportingMECs [OPTIONAL]
* @return
*/
public boolean evaluateConstraint(Element target, JSONObject constraint, Element contextEl,
Map varMap, LogEntryFolder logFolder, Map supportingMECs) {
boolean passes = true;
if (varMap == null) {
varMap = resolveVariables(target, constraint);
}
int min = constraint.optInt("min", 0);
int max = constraint.optInt("max", -1);
String severity = constraint.optString("severity", "Error");
int logLevel = LogMgmt.text2Level(severity);
String docRef = constraint.optString("docRef");
Object xpaths = constraint.opt("xpath");
List> xpeList = new ArrayList>();
List externalPathList = new ArrayList();
String targetList = ""; // for use if error msg is required
String[] xpParts = null;
if (xpaths instanceof String) {
String xpathDef = (String) xpaths;
if (xpathDef.startsWith(KEY_MEC_REF)) {
xpathDef = xpathDef.replace(KEY_MEC_REF, "");
externalPathList.add(xpathDef);
} else {
xpeList.add(resolveXPath(xpathDef, varMap, target));
}
xpParts = xpathDef.split("\\[");
targetList = xpParts[0];
} else if (xpaths instanceof JSONArray) {
JSONArray xpArray = (JSONArray) xpaths;
for (int i = 0; i < xpArray.size(); i++) {
String xpathDef = xpArray.getString(i);
if (xpathDef.startsWith(KEY_MEC_REF)) {
xpathDef = xpathDef.replace(KEY_MEC_REF, "");
externalPathList.add(xpathDef);
} else {
xpeList.add(resolveXPath(xpathDef, varMap, target));
}
xpParts = xpathDef.split("\\[");
if (i < 1) {
targetList = xpParts[0];
} else if (i == (xpArray.size() - 1)) {
targetList = targetList + ", or " + xpParts[0];
} else {
targetList = targetList + ", " + xpParts[0];
}
}
}
// reformat targetList for use in log msgs....
targetList = targetList.replaceAll("\\{\\w+\\}", "");
/* Has an OPTIONAL filter been included with the constraint? */
JSONObject filterDef = constraint.optJSONObject("filter");
List