All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.xmlcml.xml.XPathGenerator Maven / Gradle / Ivy

/**
 *    Copyright 2011 Peter Murray-Rust
 *
 *    Licensed under the Apache License, Version 2.0 (the "License");
 *    you may not use this file except in compliance with the License.
 *    You may obtain a copy of the License at
 *
 *        http://www.apache.org/licenses/LICENSE-2.0
 *
 *    Unless required by applicable law or agreed to in writing, software
 *    distributed under the License is distributed on an "AS IS" BASIS,
 *    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *    See the License for the specific language governing permissions and
 *    limitations under the License.
 */
package org.xmlcml.xml;

import org.apache.log4j.Level;
import org.apache.log4j.Logger;

import nu.xom.Attribute;
import nu.xom.Element;
import nu.xom.Node;
import nu.xom.ParentNode;

/** generates an xpath for an element in context.
 * 
 * Heuristic.
 * 
 * Because of the problems of namespaces, variable use of IDs etc., There are roughly three strategies:
 *  (1) ancestor local names e.g. *[local-name()='body']/*[local-name()='div'][1]/*[local-name='p'][2]
 *  (2) namespaced h:body/h:div[1]/h:p[2]
 *  (3) ids *[@id='id23']/*[@id='id45']/*[@id='id99']
 *  
 *  If the namespaces are not clear then (2) cannot be used. If there are no ids, then maybe we have to generate
 *  them. but this is fragile against others doing the same with different strategy. (1) is the safest - it fails
 *  ony when there are elements with different namespaces. so (4) we could probably extract the namespaces and use them explicitli
 *  (4) *[local-name()='body' and namespace-uri()='http://www.w3.org/1999/xhtml']/*[local-name()='div'][1]/*[local-name='p'][2]
 *  
 *  I think (4) is safe, but verbose!!

 * 
 * @author pm286
 *
 */
public class XPathGenerator {

	
	private static final Logger LOG = Logger.getLogger(XPathGenerator.class);
	static {
		LOG.setLevel(Level.DEBUG);
	}
	
	private static final String NAME = "name";
	private static final String ID = "id";
	private static final String TAGX = "tagx";
	private static final String[] TAGS = {ID, NAME, TAGX};
	
	private Element xmlElement;
	private boolean shortFlag = false;

	public XPathGenerator(Element xmlElement) {
		this.xmlElement = xmlElement;
	}
	
	public void setShort(boolean shortFlag) {
		this.shortFlag  = shortFlag;
	}
	
    public String getXPath() {
		StringBuilder sb = new StringBuilder();
		addAncestors(sb, xmlElement);
		return sb.toString();
	}

	private void addAncestors(StringBuilder sb, Element element) {
		if (element == null)
			return;
		String name = element.getLocalName();
		String el = element.toXML();
//		LOG.debug(">anc>"+el.toString().substring(0,  Math.min(el.length(), 300)));
		StringBuilder sb1 = new StringBuilder();
		sb1.append(((shortFlag) ? "/" + name : "/*[local-name()='" + name + "']"));
		// FIXME this is NOT UNIQUE
		Attribute attribute = getFirstUsefulAttribute(element);
		attribute = null; // FIXME
		if (attribute != null) {
			sb1.append("[@" + attribute.getLocalName() + "='"
					+ attribute.getValue() + "']");
		} else {
			int ordinal = getOrdinalOfChildWithName(element, name);
			sb1.append("[" + ordinal + "]");
		}
		String ss = sb1.toString();
//		LOG.debug(">"+ss);
		sb.insert(0, ss);
		ParentNode parent = element.getParent();
		if (parent != null && parent instanceof Element) {
			addAncestors(sb, (Element) parent);
		}
	}

	private int getOrdinalOfChildWithName(Element element, String name) {
		int ordinal = 1; // we count from 1 in XPath
		ParentNode parent = element.getParent();
		if (parent != null) {
			int position = parent.indexOf(element);
			for (int i = 0; i < position; i++) {
				Node sibling = parent.getChild(i);
				if (sibling instanceof Element
						&& name.equals(((Element) sibling).getLocalName())) {
					ordinal++;
				}
			}
		}
		return ordinal;
	}

	private Attribute getFirstUsefulAttribute(Element element) {
		for (String tag : TAGS) {
			Attribute attribute = element.getAttribute(tag);
			if (attribute != null) {
				if (attribute.getValue().trim().length() > 0) {
					return attribute;
				}
			}
		}
		return null;
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy