org.apache.tika.sax.xpath.XPathParser Maven / Gradle / Ivy
Show all versions of aem-sdk-api Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.tika.sax.xpath;
import java.util.HashMap;
import java.util.Map;
/**
* Parser for a very simple XPath subset. Only the following XPath constructs
* (with namespaces) are supported:
*
* .../node()
* .../text()
* .../@*
* .../@name
* .../*...
* .../name...
* ...//*...
* ...//name...
*
*
* In addition the non-abbreviated .../descendant::node()
* construct can be used for cases where the descendant-or-self axis
* used by the ...//node()
construct is not appropriate.
*/
public class XPathParser {
private final Map prefixes = new HashMap();
public XPathParser() {
}
public XPathParser(String prefix, String namespace) {
addPrefix(prefix, namespace);
}
public void addPrefix(String prefix, String namespace) {
prefixes.put(prefix, namespace);
}
/**
* Parses the given simple XPath expression to an evaluation state
* initialized at the document node. Invalid expressions are not flagged
* as errors, they just result in a failing evaluation state.
*
* @param xpath simple XPath expression
* @return XPath evaluation state
*/
public Matcher parse(String xpath) {
if (xpath.equals("/text()")) {
return TextMatcher.INSTANCE;
} else if (xpath.equals("/node()")) {
return NodeMatcher.INSTANCE;
} else if (xpath.equals("/descendant::node()")
|| xpath.equals("/descendant:node()")) { // for compatibility
return new CompositeMatcher(
TextMatcher.INSTANCE,
new ChildMatcher(new SubtreeMatcher(NodeMatcher.INSTANCE)));
} else if (xpath.equals("/@*")) {
return AttributeMatcher.INSTANCE;
} else if (xpath.length() == 0) {
return ElementMatcher.INSTANCE;
} else if (xpath.startsWith("/@")) {
String name = xpath.substring(2);
String prefix = null;
int colon = name.indexOf(':');
if (colon != -1) {
prefix = name.substring(0, colon);
name = name.substring(colon + 1);
}
if (prefixes.containsKey(prefix)) {
return new NamedAttributeMatcher(prefixes.get(prefix), name);
} else {
return Matcher.FAIL;
}
} else if (xpath.startsWith("/*")) {
return new ChildMatcher(parse(xpath.substring(2)));
} else if (xpath.startsWith("///")) {
return Matcher.FAIL;
} else if (xpath.startsWith("//")) {
return new SubtreeMatcher(parse(xpath.substring(1)));
} else if (xpath.startsWith("/")) {
int slash = xpath.indexOf('/', 1);
if (slash == -1) {
slash = xpath.length();
}
String name = xpath.substring(1, slash);
String prefix = null;
int colon = name.indexOf(':');
if (colon != -1) {
prefix = name.substring(0, colon);
name = name.substring(colon + 1);
}
if (prefixes.containsKey(prefix)) {
return new NamedElementMatcher(
prefixes.get(prefix), name,
parse(xpath.substring(slash)));
} else {
return Matcher.FAIL;
}
} else {
return Matcher.FAIL;
}
}
}