All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.mortbay.xml.XmlParser Maven / Gradle / Ivy

// ========================================================================
// Copyright 2004-2005 Mort Bay Consulting Pty. Ltd.
// ------------------------------------------------------------------------
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at 
// http://www.apache.org/licenses/LICENSE-2.0
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// ========================================================================

package org.mortbay.xml;

import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.AbstractList;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.NoSuchElementException;
import java.util.Stack;
import java.util.StringTokenizer;

import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.mortbay.log.Log;
import org.mortbay.util.LazyList;
import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;

/*--------------------------------------------------------------*/
/**
 * XML Parser wrapper. This class wraps any standard JAXP1.1 parser with convieniant error and
 * entity handlers and a mini dom-like document tree.
 * 

* By default, the parser is created as a validating parser only if xerces is present. This can be * configured by setting the "org.mortbay.xml.XmlParser.Validating" system property. * * @author Greg Wilkins (gregw) */ public class XmlParser { private Map _redirectMap = new HashMap(); private SAXParser _parser; private Map _observerMap; private Stack _observers = new Stack(); private String _xpath; private Object _xpaths; private String _dtd; /* ------------------------------------------------------------ */ /** * Construct */ public XmlParser() { SAXParserFactory factory = SAXParserFactory.newInstance(); boolean validating_dft = factory.getClass().toString().startsWith("org.apache.xerces."); String validating_prop = System.getProperty("org.mortbay.xml.XmlParser.Validating", validating_dft ? "true" : "false"); boolean validating = Boolean.valueOf(validating_prop).booleanValue(); setValidating(validating); } /* ------------------------------------------------------------ */ /** * Constructor. */ public XmlParser(boolean validating) { setValidating(validating); } /* ------------------------------------------------------------ */ public void setValidating(boolean validating) { try { SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(validating); _parser = factory.newSAXParser(); try { if (validating) _parser.getXMLReader().setFeature("http://apache.org/xml/features/validation/schema", validating); } catch (Exception e) { if (validating) Log.warn("Schema validation may not be supported: ", e); else Log.ignore(e); } _parser.getXMLReader().setFeature("http://xml.org/sax/features/validation", validating); _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespaces", true); _parser.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes", false); } catch (Exception e) { Log.warn(Log.EXCEPTION, e); throw new Error(e.toString()); } } /* ------------------------------------------------------------ */ /** * @param name * @param entity */ public synchronized void redirectEntity(String name, URL entity) { if (entity != null) _redirectMap.put(name, entity); } /* ------------------------------------------------------------ */ /** * * @return Returns the xpath. */ public String getXpath() { return _xpath; } /* ------------------------------------------------------------ */ /** * Set an XPath A very simple subset of xpath is supported to select a partial tree. Currently * only path like "/node1/nodeA | /node1/nodeB" are supported. * * @param xpath The xpath to set. */ public void setXpath(String xpath) { _xpath = xpath; StringTokenizer tok = new StringTokenizer(xpath, "| "); while (tok.hasMoreTokens()) _xpaths = LazyList.add(_xpaths, tok.nextToken()); } /* ------------------------------------------------------------ */ public String getDTD() { return _dtd; } /* ------------------------------------------------------------ */ /** * Add a ContentHandler. Add an additional _content handler that is triggered on a tag name. SAX * events are passed to the ContentHandler provided from a matching start element to the * corresponding end element. Only a single _content handler can be registered against each tag. * * @param trigger Tag local or q name. * @param observer SAX ContentHandler */ public synchronized void addContentHandler(String trigger, ContentHandler observer) { if (_observerMap == null) _observerMap = new HashMap(); _observerMap.put(trigger, observer); } /* ------------------------------------------------------------ */ public synchronized Node parse(InputSource source) throws IOException, SAXException { _dtd=null; Handler handler = new Handler(); XMLReader reader = _parser.getXMLReader(); reader.setContentHandler(handler); reader.setErrorHandler(handler); reader.setEntityResolver(handler); if (Log.isDebugEnabled()) Log.debug("parsing: sid=" + source.getSystemId() + ",pid=" + source.getPublicId()); _parser.parse(source, handler); if (handler._error != null) throw handler._error; Node doc = (Node) handler._top.get(0); handler.clear(); return doc; } /* ------------------------------------------------------------ */ /** * Parse String URL. */ public synchronized Node parse(String url) throws IOException, SAXException { if (Log.isDebugEnabled()) Log.debug("parse: " + url); return parse(new InputSource(url)); } /* ------------------------------------------------------------ */ /** * Parse File. */ public synchronized Node parse(File file) throws IOException, SAXException { if (Log.isDebugEnabled()) Log.debug("parse: " + file); return parse(new InputSource(file.toURL().toString())); } /* ------------------------------------------------------------ */ /** * Parse InputStream. */ public synchronized Node parse(InputStream in) throws IOException, SAXException { _dtd=null; Handler handler = new Handler(); XMLReader reader = _parser.getXMLReader(); reader.setContentHandler(handler); reader.setErrorHandler(handler); reader.setEntityResolver(handler); _parser.parse(new InputSource(in), handler); if (handler._error != null) throw handler._error; Node doc = (Node) handler._top.get(0); handler.clear(); return doc; } /* ------------------------------------------------------------ */ /* ------------------------------------------------------------ */ private class NoopHandler extends DefaultHandler { Handler _next; int _depth; NoopHandler(Handler next) { this._next = next; } /* ------------------------------------------------------------ */ public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { _depth++; } /* ------------------------------------------------------------ */ public void endElement(String uri, String localName, String qName) throws SAXException { if (_depth == 0) _parser.getXMLReader().setContentHandler(_next); else _depth--; } } /* ------------------------------------------------------------ */ /* ------------------------------------------------------------ */ private class Handler extends DefaultHandler { Node _top = new Node(null, null, null); SAXParseException _error; private Node _context = _top; private NoopHandler _noop; Handler() { _noop = new NoopHandler(this); } /* ------------------------------------------------------------ */ void clear() { _top = null; _error = null; _context = null; } /* ------------------------------------------------------------ */ public void startElement(String uri, String localName, String qName, Attributes attrs) throws SAXException { String name = (uri == null || uri.equals("")) ? qName : localName; Node node = new Node(_context, name, attrs); // check if the node matches any xpaths set? if (_xpaths != null) { String path = node.getPath(); boolean match = false; for (int i = LazyList.size(_xpaths); !match && i-- > 0;) { String xpath = (String) LazyList.get(_xpaths, i); match = path.equals(xpath) || xpath.startsWith(path) && xpath.length() > path.length() && xpath.charAt(path.length()) == '/'; } if (match) { _context.add(node); _context = node; } else { _parser.getXMLReader().setContentHandler(_noop); } } else { _context.add(node); _context = node; } ContentHandler observer = null; if (_observerMap != null) observer = (ContentHandler) _observerMap.get(name); _observers.push(observer); for (int i = 0; i < _observers.size(); i++) if (_observers.get(i) != null) ((ContentHandler) _observers.get(i)).startElement(uri, localName, qName, attrs); } /* ------------------------------------------------------------ */ public void endElement(String uri, String localName, String qName) throws SAXException { _context = _context._parent; for (int i = 0; i < _observers.size(); i++) if (_observers.get(i) != null) ((ContentHandler) _observers.get(i)).endElement(uri, localName, qName); _observers.pop(); } /* ------------------------------------------------------------ */ public void ignorableWhitespace(char buf[], int offset, int len) throws SAXException { for (int i = 0; i < _observers.size(); i++) if (_observers.get(i) != null) ((ContentHandler) _observers.get(i)).ignorableWhitespace(buf, offset, len); } /* ------------------------------------------------------------ */ public void characters(char buf[], int offset, int len) throws SAXException { _context.add(new String(buf, offset, len)); for (int i = 0; i < _observers.size(); i++) if (_observers.get(i) != null) ((ContentHandler) _observers.get(i)).characters(buf, offset, len); } /* ------------------------------------------------------------ */ public void warning(SAXParseException ex) { Log.debug(Log.EXCEPTION, ex); Log.warn("WARNING@" + getLocationString(ex) + " : " + ex.toString()); } /* ------------------------------------------------------------ */ public void error(SAXParseException ex) throws SAXException { // Save error and continue to report other errors if (_error == null) _error = ex; Log.debug(Log.EXCEPTION, ex); Log.warn("ERROR@" + getLocationString(ex) + " : " + ex.toString()); } /* ------------------------------------------------------------ */ public void fatalError(SAXParseException ex) throws SAXException { _error = ex; Log.debug(Log.EXCEPTION, ex); Log.warn("FATAL@" + getLocationString(ex) + " : " + ex.toString()); throw ex; } /* ------------------------------------------------------------ */ private String getLocationString(SAXParseException ex) { return ex.getSystemId() + " line:" + ex.getLineNumber() + " col:" + ex.getColumnNumber(); } /* ------------------------------------------------------------ */ public InputSource resolveEntity(String pid, String sid) { if (Log.isDebugEnabled()) Log.debug("resolveEntity(" + pid + ", " + sid + ")"); if (sid!=null && sid.endsWith(".dtd")) _dtd=sid; URL entity = null; if (pid != null) entity = (URL) _redirectMap.get(pid); if (entity == null) entity = (URL) _redirectMap.get(sid); if (entity == null) { String dtd = sid; if (dtd.lastIndexOf('/') >= 0) dtd = dtd.substring(dtd.lastIndexOf('/') + 1); if (Log.isDebugEnabled()) Log.debug("Can't exact match entity in redirect map, trying " + dtd); entity = (URL) _redirectMap.get(dtd); } if (entity != null) { try { InputStream in = entity.openStream(); if (Log.isDebugEnabled()) Log.debug("Redirected entity " + sid + " --> " + entity); InputSource is = new InputSource(in); is.setSystemId(sid); return is; } catch (IOException e) { Log.ignore(e); } } return null; } } /* ------------------------------------------------------------ */ /* ------------------------------------------------------------ */ /** * XML Attribute. */ public static class Attribute { private String _name; private String _value; Attribute(String n, String v) { _name = n; _value = v; } public String getName() { return _name; } public String getValue() { return _value; } } /* ------------------------------------------------------------ */ /* ------------------------------------------------------------ */ /** * XML Node. Represents an XML element with optional attributes and ordered content. */ public static class Node extends AbstractList { Node _parent; private ArrayList _list; private String _tag; private Attribute[] _attrs; private boolean _lastString = false; private String _path; /* ------------------------------------------------------------ */ Node(Node parent, String tag, Attributes attrs) { _parent = parent; _tag = tag; if (attrs != null) { _attrs = new Attribute[attrs.getLength()]; for (int i = 0; i < attrs.getLength(); i++) { String name = attrs.getLocalName(i); if (name == null || name.equals("")) name = attrs.getQName(i); _attrs[i] = new Attribute(name, attrs.getValue(i)); } } } /* ------------------------------------------------------------ */ public Node getParent() { return _parent; } /* ------------------------------------------------------------ */ public String getTag() { return _tag; } /* ------------------------------------------------------------ */ public String getPath() { if (_path == null) { if (getParent() != null && getParent().getTag() != null) _path = getParent().getPath() + "/" + _tag; else _path = "/" + _tag; } return _path; } /* ------------------------------------------------------------ */ /** * Get an array of element attributes. */ public Attribute[] getAttributes() { return _attrs; } /* ------------------------------------------------------------ */ /** * Get an element attribute. * * @return attribute or null. */ public String getAttribute(String name) { return getAttribute(name, null); } /* ------------------------------------------------------------ */ /** * Get an element attribute. * * @return attribute or null. */ public String getAttribute(String name, String dft) { if (_attrs == null || name == null) return dft; for (int i = 0; i < _attrs.length; i++) if (name.equals(_attrs[i].getName())) return _attrs[i].getValue(); return dft; } /* ------------------------------------------------------------ */ /** * Get the number of children nodes. */ public int size() { if (_list != null) return _list.size(); return 0; } /* ------------------------------------------------------------ */ /** * Get the ith child node or content. * * @return Node or String. */ public Object get(int i) { if (_list != null) return _list.get(i); return null; } /* ------------------------------------------------------------ */ /** * Get the first child node with the tag. * * @param tag * @return Node or null. */ public Node get(String tag) { if (_list != null) { for (int i = 0; i < _list.size(); i++) { Object o = _list.get(i); if (o instanceof Node) { Node n = (Node) o; if (tag.equals(n._tag)) return n; } } } return null; } /* ------------------------------------------------------------ */ public void add(int i, Object o) { if (_list == null) _list = new ArrayList(); if (o instanceof String) { if (_lastString) { int last = _list.size() - 1; _list.set(last, (String) _list.get(last) + o); } else _list.add(i, o); _lastString = true; } else { _lastString = false; _list.add(i, o); } } /* ------------------------------------------------------------ */ public void clear() { if (_list != null) _list.clear(); _list = null; } /* ------------------------------------------------------------ */ /** * Get a tag as a string. * * @param tag The tag to get * @param tags IF true, tags are included in the value. * @param trim If true, trim the value. * @return results of get(tag).toString(tags). */ public String getString(String tag, boolean tags, boolean trim) { Node node = get(tag); if (node == null) return null; String s = node.toString(tags); if (s != null && trim) s = s.trim(); return s; } /* ------------------------------------------------------------ */ public synchronized String toString() { return toString(true); } /* ------------------------------------------------------------ */ /** * Convert to a string. * * @param tag If false, only _content is shown. */ public synchronized String toString(boolean tag) { StringBuilder buf = new StringBuilder(); toString(buf, tag); return buf.toString(); } /* ------------------------------------------------------------ */ /** * Convert to a string. * * @param tag If false, only _content is shown. */ public synchronized String toString(boolean tag, boolean trim) { String s = toString(tag); if (s != null && trim) s = s.trim(); return s; } /* ------------------------------------------------------------ */ private synchronized void toString(StringBuilder buf, boolean tag) { if (tag) { buf.append("<"); buf.append(_tag); if (_attrs != null) { for (int i = 0; i < _attrs.length; i++) { buf.append(' '); buf.append(_attrs[i].getName()); buf.append("=\""); buf.append(_attrs[i].getValue()); buf.append("\""); } } } if (_list != null) { if (tag) buf.append(">"); for (int i = 0; i < _list.size(); i++) { Object o = _list.get(i); if (o == null) continue; if (o instanceof Node) ((Node) o).toString(buf, tag); else buf.append(o.toString()); } if (tag) { buf.append(""); } } else if (tag) buf.append("/>"); } /* ------------------------------------------------------------ */ /** * Iterator over named child nodes. * * @param tag The tag of the nodes. * @return Iterator over all child nodes with the specified tag. */ public Iterator iterator(final String tag) { return new Iterator() { int c = 0; Node _node; /* -------------------------------------------------- */ public boolean hasNext() { if (_node != null) return true; while (_list != null && c < _list.size()) { Object o = _list.get(c); if (o instanceof Node) { Node n = (Node) o; if (tag.equals(n._tag)) { _node = n; return true; } } c++; } return false; } /* -------------------------------------------------- */ public Object next() { try { if (hasNext()) return _node; throw new NoSuchElementException(); } finally { _node = null; c++; } } /* -------------------------------------------------- */ public void remove() { throw new UnsupportedOperationException("Not supported"); } }; } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy