All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.itextpdf.kernel.xmp.impl.xpath.XMPPathParser Maven / Gradle / Ivy

There is a newer version: 8.0.5
Show newest version
//Copyright (c) 2006, Adobe Systems Incorporated
//All rights reserved.
//
//        Redistribution and use in source and binary forms, with or without
//        modification, are permitted provided that the following conditions are met:
//        1. Redistributions of source code must retain the above copyright
//        notice, this list of conditions and the following disclaimer.
//        2. Redistributions in binary form must reproduce the above copyright
//        notice, this list of conditions and the following disclaimer in the
//        documentation and/or other materials provided with the distribution.
//        3. All advertising materials mentioning features or use of this software
//        must display the following acknowledgement:
//        This product includes software developed by the Adobe Systems Incorporated.
//        4. Neither the name of the Adobe Systems Incorporated nor the
//        names of its contributors may be used to endorse or promote products
//        derived from this software without specific prior written permission.
//
//        THIS SOFTWARE IS PROVIDED BY ADOBE SYSTEMS INCORPORATED ''AS IS'' AND ANY
//        EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
//        WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
//        DISCLAIMED. IN NO EVENT SHALL ADOBE SYSTEMS INCORPORATED BE LIABLE FOR ANY
//        DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
//        (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
//        LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
//        ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
//        (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
//        SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
//        http://www.adobe.com/devnet/xmp/library/eula-xmp-library-java.html

package com.itextpdf.kernel.xmp.impl.xpath;

import com.itextpdf.kernel.xmp.XMPError;
import com.itextpdf.kernel.xmp.XMPException;
import com.itextpdf.kernel.xmp.XMPMetaFactory;
import com.itextpdf.kernel.xmp.impl.Utils;
import com.itextpdf.kernel.xmp.properties.XMPAliasInfo;


/**
 * Parser for XMP XPaths.
 *
 * @since   01.03.2006
 */
public final class XMPPathParser
{
	/**
	 * Private constructor
	 */
	private XMPPathParser()
	{
		// empty
	}


	/**
	 * Split an XMPPath expression apart at the conceptual steps, adding the
	 * root namespace prefix to the first property component. The schema URI is
	 * put in the first (0th) slot in the expanded XMPPath. Check if the top
	 * level component is an alias, but don't resolve it.
	 * 

* In the most verbose case steps are separated by '/', and each step can be * of these forms: *

*
prefix:name *
A top level property or struct field. *
[index] *
An element of an array. *
[last()] *
The last element of an array. *
[fieldName="value"] *
An element in an array of structs, chosen by a field value. *
[@xml:lang="value"] *
An element in an alt-text array, chosen by the xml:lang qualifier. *
[?qualName="value"] *
An element in an array, chosen by a qualifier value. *
@xml:lang *
An xml:lang qualifier. *
?qualName *
A general qualifier. *
*

* The logic is complicated though by shorthand for arrays, the separating * '/' and leading '*' are optional. These are all equivalent: array/*[2] * array/[2] array*[2] array[2] All of these are broken into the 2 steps * "array" and "[2]". *

* The value portion in the array selector forms is a string quoted by ''' * or '"'. The value may contain any character including a doubled quoting * character. The value may be empty. *

* The syntax isn't checked, but an XML name begins with a letter or '_', * and contains letters, digits, '.', '-', '_', and a bunch of special * non-ASCII Unicode characters. An XML qualified name is a pair of names * separated by a colon. * @param schemaNS * schema namespace * @param path * property name * @return Returns the expandet XMPPath. * @throws XMPException * Thrown if the format is not correct somehow. * */ public static XMPPath expandXPath(String schemaNS, String path) throws XMPException { if (schemaNS == null || path == null) { throw new XMPException("Parameter must not be null", XMPError.BADPARAM); } XMPPath expandedXPath = new XMPPath(); PathPosition pos = new PathPosition(); pos.path = path; // Pull out the first component and do some special processing on it: add the schema // namespace prefix and and see if it is an alias. The start must be a "qualName". parseRootNode(schemaNS, pos, expandedXPath); // Now continue to process the rest of the XMPPath string. while (pos.stepEnd < path.length()) { pos.stepBegin = pos.stepEnd; skipPathDelimiter(path, pos); pos.stepEnd = pos.stepBegin; XMPPathSegment segment; if (path.charAt(pos.stepBegin) != '[') { // A struct field or qualifier. segment = parseStructSegment(pos); } else { // One of the array forms. segment = parseIndexSegment(pos); } if (segment.getKind() == XMPPath.STRUCT_FIELD_STEP) { if (segment.getName().charAt(0) == '@') { segment.setName("?" + segment.getName().substring(1)); if (!"?xml:lang".equals(segment.getName())) { throw new XMPException("Only xml:lang allowed with '@'", XMPError.BADXPATH); } } if (segment.getName().charAt(0) == '?') { pos.nameStart++; segment.setKind(XMPPath.QUALIFIER_STEP); } verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd)); } else if (segment.getKind() == XMPPath.FIELD_SELECTOR_STEP) { if (segment.getName().charAt(1) == '@') { segment.setName("[?" + segment.getName().substring(2)); if (!segment.getName().startsWith("[?xml:lang=")) { throw new XMPException("Only xml:lang allowed with '@'", XMPError.BADXPATH); } } if (segment.getName().charAt(1) == '?') { pos.nameStart++; segment.setKind(XMPPath.QUAL_SELECTOR_STEP); verifyQualName(pos.path.substring(pos.nameStart, pos.nameEnd)); } } expandedXPath.add(segment); } return expandedXPath; } /** * @param path * @param pos * @throws XMPException */ private static void skipPathDelimiter(String path, PathPosition pos) throws XMPException { if (path.charAt(pos.stepBegin) == '/') { // skip slash pos.stepBegin++; // added for Java if (pos.stepBegin >= path.length()) { throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH); } } if (path.charAt(pos.stepBegin) == '*') { // skip asterisk pos.stepBegin++; if (pos.stepBegin >= path.length() || path.charAt(pos.stepBegin) != '[') { throw new XMPException("Missing '[' after '*'", XMPError.BADXPATH); } } } /** * Parses a struct segment * @param pos the current position in the path * @return Retusn the segment or an errror * @throws XMPException If the sement is empty */ private static XMPPathSegment parseStructSegment(PathPosition pos) throws XMPException { pos.nameStart = pos.stepBegin; while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0) { pos.stepEnd++; } pos.nameEnd = pos.stepEnd; if (pos.stepEnd == pos.stepBegin) { throw new XMPException("Empty XMPPath segment", XMPError.BADXPATH); } // ! Touch up later, also changing '@' to '?'. XMPPathSegment segment = new XMPPathSegment(pos.path.substring(pos.stepBegin, pos.stepEnd), XMPPath.STRUCT_FIELD_STEP); return segment; } /** * Parses an array index segment. * * @param pos the xmp path * @return Returns the segment or an error * @throws XMPException thrown on xmp path errors * */ private static XMPPathSegment parseIndexSegment(PathPosition pos) throws XMPException { XMPPathSegment segment; pos.stepEnd++; // Look at the character after the leading '['. if ('0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9') { // A numeric (decimal integer) array index. while (pos.stepEnd < pos.path.length() && '0' <= pos.path.charAt(pos.stepEnd) && pos.path.charAt(pos.stepEnd) <= '9') { pos.stepEnd++; } segment = new XMPPathSegment(null, XMPPath.ARRAY_INDEX_STEP); } else { // Could be "[last()]" or one of the selector forms. Find the ']' or '='. while (pos.stepEnd < pos.path.length() && pos.path.charAt(pos.stepEnd) != ']' && pos.path.charAt(pos.stepEnd) != '=') { pos.stepEnd++; } if (pos.stepEnd >= pos.path.length()) { throw new XMPException("Missing ']' or '=' for array index", XMPError.BADXPATH); } if (pos.path.charAt(pos.stepEnd) == ']') { if (!"[last()".equals(pos.path.substring(pos.stepBegin, pos.stepEnd))) { throw new XMPException( "Invalid non-numeric array index", XMPError.BADXPATH); } segment = new XMPPathSegment(null, XMPPath.ARRAY_LAST_STEP); } else { pos.nameStart = pos.stepBegin + 1; pos.nameEnd = pos.stepEnd; pos.stepEnd++; // Absorb the '=', remember the quote. char quote = pos.path.charAt(pos.stepEnd); if (quote != '\'' && quote != '"') { throw new XMPException( "Invalid quote in array selector", XMPError.BADXPATH); } pos.stepEnd++; // Absorb the leading quote. while (pos.stepEnd < pos.path.length()) { if (pos.path.charAt(pos.stepEnd) == quote) { // check for escaped quote if (pos.stepEnd + 1 >= pos.path.length() || pos.path.charAt(pos.stepEnd + 1) != quote) { break; } pos.stepEnd++; } pos.stepEnd++; } if (pos.stepEnd >= pos.path.length()) { throw new XMPException("No terminating quote for array selector", XMPError.BADXPATH); } pos.stepEnd++; // Absorb the trailing quote. // ! Touch up later, also changing '@' to '?'. segment = new XMPPathSegment(null, XMPPath.FIELD_SELECTOR_STEP); } } if (pos.stepEnd >= pos.path.length() || pos.path.charAt(pos.stepEnd) != ']') { throw new XMPException("Missing ']' for array index", XMPError.BADXPATH); } pos.stepEnd++; segment.setName(pos.path.substring(pos.stepBegin, pos.stepEnd)); return segment; } /** * Parses the root node of an XMP Path, checks if namespace and prefix fit together * and resolve the property to the base property if it is an alias. * @param schemaNS the root namespace * @param pos the parsing position helper * @param expandedXPath the path to contribute to * @throws XMPException If the path is not valid. */ private static void parseRootNode(String schemaNS, PathPosition pos, XMPPath expandedXPath) throws XMPException { while (pos.stepEnd < pos.path.length() && "/[*".indexOf(pos.path.charAt(pos.stepEnd)) < 0) { pos.stepEnd++; } if (pos.stepEnd == pos.stepBegin) { throw new XMPException("Empty initial XMPPath step", XMPError.BADXPATH); } String rootProp = verifyXPathRoot(schemaNS, pos.path.substring(pos.stepBegin, pos.stepEnd)); XMPAliasInfo aliasInfo = XMPMetaFactory.getSchemaRegistry().findAlias(rootProp); if (aliasInfo == null) { // add schema xpath step expandedXPath.add(new XMPPathSegment(schemaNS, XMPPath.SCHEMA_NODE)); XMPPathSegment rootStep = new XMPPathSegment(rootProp, XMPPath.STRUCT_FIELD_STEP); expandedXPath.add(rootStep); } else { // add schema xpath step and base step of alias expandedXPath.add(new XMPPathSegment(aliasInfo.getNamespace(), XMPPath.SCHEMA_NODE)); XMPPathSegment rootStep = new XMPPathSegment(verifyXPathRoot(aliasInfo.getNamespace(), aliasInfo.getPropName()), XMPPath.STRUCT_FIELD_STEP); rootStep.setAlias(true); rootStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); expandedXPath.add(rootStep); if (aliasInfo.getAliasForm().isArrayAltText()) { XMPPathSegment qualSelectorStep = new XMPPathSegment("[?xml:lang='x-default']", XMPPath.QUAL_SELECTOR_STEP); qualSelectorStep.setAlias(true); qualSelectorStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); expandedXPath.add(qualSelectorStep); } else if (aliasInfo.getAliasForm().isArray()) { XMPPathSegment indexStep = new XMPPathSegment("[1]", XMPPath.ARRAY_INDEX_STEP); indexStep.setAlias(true); indexStep.setAliasForm(aliasInfo.getAliasForm().getOptions()); expandedXPath.add(indexStep); } } } /** * Verifies whether the qualifier name is not XML conformant or the * namespace prefix has not been registered. * * @param qualName * a qualifier name * @throws XMPException * If the name is not conformant */ private static void verifyQualName(String qualName) throws XMPException { int colonPos = qualName.indexOf(':'); if (colonPos > 0) { String prefix = qualName.substring(0, colonPos); if (Utils.isXMLNameNS(prefix)) { String regURI = XMPMetaFactory.getSchemaRegistry().getNamespaceURI( prefix); if (regURI != null) { return; } throw new XMPException("Unknown namespace prefix for qualified name", XMPError.BADXPATH); } } throw new XMPException("Ill-formed qualified name", XMPError.BADXPATH); } /** * Verify if an XML name is conformant. * * @param name * an XML name * @throws XMPException * When the name is not XML conformant */ private static void verifySimpleXMLName(String name) throws XMPException { if (!Utils.isXMLName(name)) { throw new XMPException("Bad XML name", XMPError.BADXPATH); } } /** * Set up the first 2 components of the expanded XMPPath. Normalizes the various cases of using * the full schema URI and/or a qualified root property name. Returns true for normal * processing. If allowUnknownSchemaNS is true and the schema namespace is not registered, false * is returned. If allowUnknownSchemaNS is false and the schema namespace is not registered, an * exception is thrown *

* (Should someday check the full syntax:) * * @param schemaNS schema namespace * @param rootProp the root xpath segment * @return Returns root QName. * @throws XMPException Thrown if the format is not correct somehow. */ private static String verifyXPathRoot(String schemaNS, String rootProp) throws XMPException { // Do some basic checks on the URI and name. Try to lookup the URI. See if the name is // qualified. if (schemaNS == null || schemaNS.length() == 0) { throw new XMPException( "Schema namespace URI is required", XMPError.BADSCHEMA); } if ((rootProp.charAt(0) == '?') || (rootProp.charAt(0) == '@')) { throw new XMPException("Top level name must not be a qualifier", XMPError.BADXPATH); } if (rootProp.indexOf('/') >= 0 || rootProp.indexOf('[') >= 0) { throw new XMPException("Top level name must be simple", XMPError.BADXPATH); } String prefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS); if (prefix == null) { throw new XMPException("Unregistered schema namespace URI", XMPError.BADSCHEMA); } // Verify the various URI and prefix combinations. Initialize the // expanded XMPPath. int colonPos = rootProp.indexOf(':'); if (colonPos < 0) { // The propName is unqualified, use the schemaURI and associated // prefix. verifySimpleXMLName(rootProp); // Verify the part before any colon return prefix + rootProp; } else { // The propName is qualified. Make sure the prefix is legit. Use the associated URI and // qualified name. // Verify the part before any colon verifySimpleXMLName(rootProp.substring(0, colonPos)); verifySimpleXMLName(rootProp.substring(colonPos)); prefix = rootProp.substring(0, colonPos + 1); String regPrefix = XMPMetaFactory.getSchemaRegistry().getNamespacePrefix(schemaNS); if (regPrefix == null) { throw new XMPException("Unknown schema namespace prefix", XMPError.BADSCHEMA); } if (!prefix.equals(regPrefix)) { throw new XMPException("Schema namespace URI and prefix mismatch", XMPError.BADSCHEMA); } return rootProp; } } } /** * This objects contains all needed char positions to parse. */ class PathPosition { /** the complete path */ public String path = null; /** the start of a segment name */ int nameStart = 0; /** the end of a segment name */ int nameEnd = 0; /** the begin of a step */ int stepBegin = 0; /** the end of a step */ int stepEnd = 0; }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy