All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.caucho.xml2.AbstractParser Maven / Gradle / Ivy

/*
 * Copyright (c) 1998-2018 Caucho Technology -- all rights reserved
 *
 * This file is part of Resin(R) Open Source
 *
 * Each copy or derived work must preserve the copyright notice and this
 * notice unmodified.
 *
 * Resin Open Source is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Resin Open Source is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
 * of NON-INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Resin Open Source; if not, write to the
 *   Free SoftwareFoundation, Inc.
 *   59 Temple Place, Suite 330
 *   Boston, MA 02111-1307  USA
 *
 * @author Scott Ferguson
 */

package com.caucho.xml2;

import com.caucho.server.util.CauchoSystem;
import com.caucho.util.L10N;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.vfs.Vfs;
import com.caucho.vfs.VfsStream;

import org.w3c.dom.Document;
import org.xml.sax.*;
import org.xml.sax.ext.LexicalHandler;

import javax.xml.parsers.DocumentBuilderFactory;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
import java.util.Hashtable;
import java.util.Locale;
import java.util.logging.Logger;

abstract public class AbstractParser implements XMLReader, Parser
{
  static final Logger log = Logger.getLogger(AbstractParser.class.getName());
  static final L10N L = new L10N(AbstractParser.class);

  static Hashtable _attrTypes = new Hashtable();
  static Entities _xmlEntities = new XmlEntities();

  boolean _isCoalescing = true;
  
  boolean _optionalTags = true;
  boolean _skipWhitespace;
  boolean _skipComments;
  boolean _strictComments;
  boolean _strictAttributes;
  boolean _entitiesAsText = false;
  boolean _expandEntities = true;
  boolean _strictCharacters;
  boolean _strictXml;
  boolean _singleTopElement;

  boolean _isNamespaceAware = true;
  boolean _isNamespacePrefixes = true;
  boolean _isSAXNamespaces = false;
  
  boolean _isXmlnsPrefix;
  boolean _isXmlnsAttribute;
  
  boolean _isValidating = false;

  boolean _isStaticEncoding = false;
  String _defaultEncoding = "UTF-8";

  // sax stuff
  ContentHandler _contentHandler;
  EntityResolver _entityResolver;
  DTDHandler _dtdHandler;
  LexicalHandler _lexicalHandler;
  ErrorHandler _errorHandler;
  Locale _locale;
  
  Entities _entities;
  QDocument _owner;
  QDocumentType _dtd;

  DOMBuilder _builder;

  Path _searchPath;
  
  String _publicId;
  String _systemId;
  String _filename;
  int _line = 1;

  /**
   * Creates a new parser with the XmlPolicy and a new dtd.
   */
  AbstractParser()
  {
    this(null);
  }

  /**
   * Creates a new parser with a given policy and dtd.
   *
   * @param policy the parsing policy, handling optional tags.
   * @param dtd the parser's dtd.
   */
  AbstractParser(QDocumentType dtd)
  {
    if (dtd == null)
      dtd = new QDocumentType(null);
    _dtd = dtd;

    _entities = _xmlEntities;
  }

  void clear()
  {
    _isCoalescing = true;

    _isNamespaceAware = true;
    _isSAXNamespaces = false;
    _isNamespacePrefixes = false;
    _optionalTags = true;
    _skipWhitespace = false;
    _skipComments = false;
    _strictComments = false;
    _strictAttributes = false;
    _entitiesAsText = false;
    _expandEntities = true;
    _strictCharacters = false;
    _strictXml = false;
    _singleTopElement = false;

    _defaultEncoding = "UTF-8";
    _isStaticEncoding = false;
  }

  void init()
  {
    /*
    _isXmlnsPrefix = (_isNamespaceAware ||
                      _isSAXNamespaces ||
                      _isNamespacePrefixes);
    */
    _isXmlnsPrefix = _isNamespaceAware || _isNamespacePrefixes;
    _isXmlnsAttribute = _isNamespacePrefixes || ! _isNamespaceAware;
  }

  /**
   * Sets the owner.
   */
  public void setOwner(QDocument doc)
  {
    _owner = doc;
  }

  /**
   * Sets the configuration for a document builder.
   */
  public void setConfig(DocumentBuilderFactory factory)
  {
    if (_builder == null)
      _builder = new DOMBuilder();

    _isCoalescing = factory.isCoalescing();
    setExpandEntities(factory.isExpandEntityReferences());
    setSkipComments(factory.isIgnoringComments());
    setSkipWhitespace(factory.isIgnoringElementContentWhitespace());
    setNamespaceAware(factory.isNamespaceAware());
    setNamespacePrefixes(false);
    setValidating(factory.isValidating());
  }
    
  public void setEntitiesAsText(boolean entitiesAsText)
  {
    _entitiesAsText = entitiesAsText;
  }

  public boolean getEntitiesAsText()
  {
    return _entitiesAsText;
  }

  public void setExpandEntities(boolean expandEntities)
  {
    _expandEntities = expandEntities;
  }

  /**
   * Set to true if comments should be skipped.  If false events will be
   * generated for the comments.
   */
  public void setSkipComments(boolean skipComments)
  {
    _skipComments = skipComments;
  }

  /**
   * Set to true if ignorable-whitespace should be skipped.
   */
  public void setSkipWhitespace(boolean skipWhitespace)
  {
    _skipWhitespace = skipWhitespace;
  }

  /**
   * Returns true if text and cdata nodes will be combined.
   */
  public boolean isCoalescing()
  {
    return _isCoalescing;
  }

  /**
   * Set true if text and cdata nodes should be combined.
   */
  public void setCoalescing(boolean isCoalescing)
  {
    _isCoalescing = isCoalescing;
  }

  /**
   * Returns true if the XML should be validated
   */
  public boolean isValidating()
  {
    return _isValidating;
  }

  /**
   * Set true if the XML should be validated
   */
  public void setValidating(boolean isValidating)
  {
    _isValidating = isValidating;
  }

  /**
   * Returns true if the parsing is namespace aware.
   */
  public boolean isNamespaceAware()
  {
    return _isNamespaceAware;
  }

  /**
   * Set true if the parsing is namespace aware.
   */
  public void setNamespaceAware(boolean isNamespaceAware)
  {
    _isNamespaceAware = isNamespaceAware;
  }

  /**
   * Returns true if the parsing uses sax namespaces
   */
  public boolean isSAXNamespaces()
  {
    return _isSAXNamespaces;
  }

  /**
   * Set true if the parsing uses sax namespaces
   */
  public void setSAXNamespaces(boolean isNamespaces)
  {
    _isSAXNamespaces = isNamespaces;
  }

  /**
   * Returns true if the parsing uses namespace prefixes
   */
  public boolean isNamespacePrefixes()
  {
    return _isNamespacePrefixes;
  }

  /**
   * Set true if the parsing uses sax namespaces
   */
  public void setNamespacePrefixes(boolean isNamespaces)
  {
    _isNamespacePrefixes = isNamespaces;
  }

  public boolean getSkipComments()
  {
    return _skipComments;
  }

  /**
   * Sets the default encoding if none is specified.
   *
   * @param encoding the default encoding
   */
  public void setDefaultEncoding(String encoding)
  {
    _defaultEncoding = encoding;
  }

  /**
   * Gets the default encoding if none is specified.
   */
  public String getDefaultEncoding()
  {
    return _defaultEncoding;
  }

  public Object getProperty(String name)
    throws SAXNotRecognizedException
  {
    if (name.equals("http://xml.org/sax/properties/lexical-handler"))
      return _lexicalHandler;
    else if (name.equals("http://xml.org/sax/properties/dom-node"))
      return null;
    else if (name.equals("http://xml.org/sax/properties/xml-string"))
      return null;
    else
      throw new SAXNotRecognizedException(name);
  }

  public void setProperty(String name, Object obj)
    throws SAXNotSupportedException
  {
    if (name.equals("http://xml.org/sax/properties/lexical-handler"))
      _lexicalHandler = (LexicalHandler) obj;
    else if (name.equals("http://xml.org/sax/handlers/LexicalHandler"))
      _lexicalHandler = (LexicalHandler) obj;
    else
      throw new SAXNotSupportedException(name);
  }

  public boolean getFeature(String name)
    throws SAXNotRecognizedException
  {
    if (name.equals("http://xml.org/sax/features/namespaces"))
      return _isSAXNamespaces;
    else if (name.equals("http://xml.org/sax/features/namespace-prefixes"))
      return _isNamespacePrefixes;
    else if (name.equals("http://xml.org/sax/features/string-interning"))
      return true;
    else if (name.equals("http://xml.org/sax/features/validation"))
      return _isValidating;
    else if (name.equals("http://xml.org/sax/features/external-general-entities"))
      return true;
    else if (name.equals("http://xml.org/sax/features/external-parameter-entities"))
      return false;
    else if (name.equals("http://caucho.com/xml/features/skip-comments"))
      return _skipComments;
    else
      throw new SAXNotRecognizedException(name);
  }

  public void setFeature(String name, boolean value)
    throws SAXNotSupportedException
  {
    if (name.equals("http://xml.org/sax/features/namespaces")) {
      _isNamespaceAware = value;
    }
    else if (name.equals("http://xml.org/sax/features/namespace-prefixes")) {
      // setting namespace-prefixes, even if false, sets namespace-aware
      // see xml/032b
      _isNamespacePrefixes = value;
      _isNamespaceAware = true;
    }
    else if (name.equals("http://caucho.com/xml/features/skip-comments")) {
      _skipComments = value;
    }
    else if (name.equals("http://xml.org/sax/features/validation"))
      _isValidating = value;
    else
      throw new SAXNotSupportedException(name);
  }

  public void setLexicalHandler(LexicalHandler handler)
  {
    _lexicalHandler = handler;
  }

  /**
   * Sets the callback object to find files.
   *
   * @param resolver the object to find files.
   */
  public void setEntityResolver(EntityResolver resolver)
  {
    _entityResolver = resolver;
  }

  /**
   * Sets the callback object finding files from system ids.
   *
   * @return the resolver to find files.
   */
  public EntityResolver getEntityResolver()
  {
    return _entityResolver;
  }

  public void setDTDHandler(DTDHandler handler)
  {
    _dtdHandler = handler;
  }

  public DTDHandler getDTDHandler()
  {
    return _dtdHandler;
  }

  public void setContentHandler(ContentHandler handler)
  {
    _contentHandler = handler;
  }

  public ContentHandler getContentHandler()
  {
    return _contentHandler;
  }

  /**
   * Configures the document handler callback.
   *
   * @param handler the new document handler.
   */
  public void setDocumentHandler(DocumentHandler handler)
  {
    if (handler == null)
      _contentHandler = null;
    else
      _contentHandler = new ContentHandlerAdapter(handler);
  }

  public void setErrorHandler(ErrorHandler handler)
  {
    _errorHandler = handler;
  }

  public ErrorHandler getErrorHandler()
  {
    return _errorHandler;
  }

  public void setLocale(Locale locale)
  {
    _locale = locale;
  }

  /**
   * SAX parsing from a SAX InputSource
   *
   * @param source source containing the XML
   */
  public void parse(InputSource source)
    throws IOException, SAXException
  {
    init();
    
    if (_searchPath == null) {
      if (source.getSystemId() != null)
        _searchPath = Vfs.lookup(source.getSystemId()).getParent();
    }

    _systemId = source.getSystemId();
    _publicId = source.getPublicId();
    ReadStream stream;
    String encoding = null;

    if (source.getByteStream() != null) {
      stream = Vfs.openRead(source.getByteStream());
      encoding = source.getEncoding();
    }
    else if (source.getCharacterStream() != null) {
      encoding = "UTF-8";
      _isStaticEncoding = true;
      stream = Vfs.openRead(source.getCharacterStream());
    }
    else if (source.getSystemId() != null) {
      InputStream is = openStream(source.getSystemId(),
                                  source.getPublicId(),
                                  null,
                                  true);
      stream = Vfs.openRead(is);
      encoding = source.getEncoding();
    }
    else
      throw new FileNotFoundException(L.l("invalid InputSource"));

    if (encoding != null)
      stream.setEncoding(encoding);

    try {
      parseInt(stream);
    } finally {
      stream.close();
    }
  }
  
  /**
   * SAX parsing from an InputStream
   *
   * @param is stream containing the XML
   */
  public void parse(InputStream is)
    throws IOException, SAXException
  {
    init();

    _systemId = "stream";
    
    if (is instanceof ReadStream) {
      Path path = ((ReadStream) is).getPath();
      _systemId = path.getURL();
      _filename = path.getUserPath();
      
      if (_searchPath != null) {
      }
      else if (path != null)
        _searchPath = path.getParent();

      parseInt((ReadStream) is);
    }
    else {
      ReadStream rs = VfsStream.openRead(is);
      try {
        parseInt(rs);
      } finally {
        if (rs != is)
          rs.close();
      }
    }
  }
  
  /**
   * SAX parsing from an InputStream
   *
   * @param is stream containing the XML
   */
  public void parse(InputStream is, String systemId)
    throws IOException, SAXException
  {
    init();
    
    parseImpl(is, systemId);
  }
  
  /**
   * SAX parsing from an InputStream
   *
   * @param is stream containing the XML
   */
  public void parseImpl(InputStream is, String systemId)
    throws IOException, SAXException
  {
    if (is instanceof ReadStream) {
      Path path = ((ReadStream) is).getPath();
      
      if (_searchPath != null) {
      }
      else if (path != null) {
        _searchPath = path.getParent();
        if (systemId != null)
          _searchPath = _searchPath.lookup(systemId).getParent();
      }
      else if (systemId != null)
        _searchPath = Vfs.lookup(systemId).getParent();

      if (systemId == null) {
        systemId = path.getURL();
        _filename = ((ReadStream) is).getUserPath();
      }
      else
        _filename = systemId;

      _systemId = systemId;
      
      parseInt((ReadStream) is);
    }
    else {
      if (systemId == null) {
        _systemId = "anonymous.xml";
      }
      else {
        _searchPath = Vfs.lookup(systemId).getParent();
        _systemId = systemId;
      }

      ReadStream rs = VfsStream.openRead(is);
      try {
        parseInt(rs);
      } finally {
        if (rs != is)
          rs.close();
      }
    }
  }

  /**
   * SAX parsing from a file path
   *
   * @param systemId path to the file containing the XML
   */
  public void parse(String systemId)
    throws IOException, SAXException
  {
    InputStream is = openTopStream(systemId, null);
    try {
      parse(is);
    } finally {
      is.close();
    }
  }
  
  /**
   * SAX parsing from a VFS path
   */
  public void parse(Path path)
    throws IOException, SAXException
  {
    init();
    
    if (_searchPath == null)
      _searchPath = path.getParent();
    
    ReadStream is = path.openRead();
    try {
      parseInt(is);
    } finally {
      is.close();
    }
  }

  /**
   * SAX parsing from a string.
   *
   * @param string string containing the XML
   */
  public void parseString(String string)
    throws IOException, SAXException
  {
    init();
    
    ReadStream is = Vfs.openString(string);

    try {
      parseInt(is);
    } finally {
      is.close();
    }
  }
  
  /**
   * Parses a document from a SAX InputSource
   *
   * @param source SAX InputSource containing the XML data.
   */
  public Document parseDocument(InputSource source)
    throws IOException, SAXException
  {
    init();
    
    QDocument doc = new QDocument();

    if (_builder == null)
      _builder = new DOMBuilder();

    _builder.init(doc);
    setOwner(doc);
    
    doc.setSystemId(source.getSystemId());
    _builder.setSystemId(source.getSystemId());
    _builder.setStrictXML(_strictXml);
    _builder.setCoalescing(_isCoalescing);
    _builder.setSkipWhitespace(_skipWhitespace);
    _contentHandler = _builder;

    parse(source);

    return doc;
  }

  /**
   * Parses a document from system path.
   *
   * @param systemId path to the XML data.
   */
  public Document parseDocument(String systemId)
    throws IOException, SAXException
  {
    InputStream is = openTopStream(systemId, null);
    try {
      return parseDocument(is);
    } finally {
      is.close();
    }
  }

  /**
   * Parses a document from a VFS path
   *
   * @param path the VFS path containing the XML document.
   */
  public Document parseDocument(Path path)
    throws IOException, SAXException
  {
    if (_searchPath == null)
      _searchPath = path.getParent();
    
    ReadStream is = path.openRead();
    try {
      return parseDocument(is);
    } finally {
      is.close();
    }
  }

  /**
   * Parses an input stream into a DOM document
   *
   * @param is the input stream containing the XML
   *
   * @return the parsed document.
   */
  public Document parseDocument(InputStream is)
    throws IOException, SAXException
  {
    return parseDocument(is, null);
  }

  /**
   * Parses an input stream into a DOM document
   *
   * @param is the input stream containing the XML
   * @param systemId the URL of the stream.
   *
   * @return the parsed document.
   */
  public Document parseDocument(InputStream is, String systemId)
    throws IOException, SAXException
  {
    init();

    QDocument doc = new QDocument();
    parseDocument(doc, is, systemId);

    return doc;
  }

  public void parseDocument(QDocument doc, InputStream is, String systemId)
    throws IOException, SAXException
  {
    _owner = doc;

    if (_builder == null)
      _builder = new DOMBuilder();

    _builder.init(_owner);
    _builder.setSystemId(systemId);
    _builder.setCoalescing(_isCoalescing);
    _builder.setSkipWhitespace(_skipWhitespace);
    _contentHandler = _builder;

    parseImpl(is, systemId);
  }

  /**
   * Parses a string into a DOM document
   *
   * @param string the string containing the XML
   */
  public Document parseDocumentString(String string)
    throws IOException, SAXException
  {
    ReadStream is = Vfs.openString(string);

    try {
      _isStaticEncoding = true;
      return parseDocument(is);
    } finally {
      is.close();
    }
  }

  /**
   * Looks up an input stream from the system id.
   */
  public InputStream openStream(String systemId, String publicId)
    throws IOException, SAXException
  {
    return openStream(systemId, publicId, _entityResolver, false);
  }

  /**
   * Looks up an input stream from the system id.
   */
  public InputStream openTopStream(String systemId, String publicId)
    throws IOException, SAXException
  {
    return openStream(systemId, publicId, _entityResolver, true);
  }

  /**
   * Looks up an input stream from the system id.
   */
  public InputStream openStream(String systemId, String publicId,
                                EntityResolver entityResolver)
    throws IOException, SAXException
  {
    return openStream(systemId, publicId, entityResolver, false);
  }

  /**
   * Looks up an input stream from the system id.
   */
  protected InputStream openStream(String systemId, String publicId,
                                   EntityResolver entityResolver,
                                   boolean isTop)
    throws IOException, SAXException
  {
    int colon = systemId.indexOf(':');
    int slash = systemId.indexOf('/');
    
    boolean isAbsolute = colon > 0 && (colon < slash || slash < 0);
    
    if (slash == 0 || ! isAbsolute) {
      Path pwd;

      if (_searchPath != null)
        pwd = _searchPath;
      else
        pwd = Vfs.lookup(systemId).getParent();
      
      String newId = pwd.lookup(systemId).getURL();
      if (! newId.startsWith("error:"))
        systemId = newId;
      else {
        int tail = _systemId.lastIndexOf('/');
        if (tail >= 0)
          systemId = _systemId.substring(0, tail + 1) + systemId;
      }
    }

    // xml/03c5 -- must be after the normalization
    if (entityResolver != null) {
      InputSource source = entityResolver.resolveEntity(publicId, systemId);

      if (source != null) {
        _filename = systemId;
        _systemId = systemId;

        return openSource(source);
      }
    }

    int ch;
    if (CauchoSystem.isWindows() && systemId.startsWith("file:") &&
        systemId.length() > 7 && systemId.charAt(6) == ':' &&
        (((ch = systemId.charAt(5)) >= 'a' && ch <= 'z') ||
         ch >= 'A' && ch <= 'Z')) {
      colon = 1;
      isAbsolute = false;
      systemId = "/" + systemId.substring(5);
    }

    if (! isTop &&
        isAbsolute && ! systemId.startsWith("file:") &&
        ! systemId.startsWith("jar:") &&
        ! (colon == 1 && CauchoSystem.isWindows())) {
      throw new RemoteURLException(L.l("URL `{0}' was not opened because it is a remote URL.  Any URL scheme other than file: must be handled by a custom entity resolver.",
                                       systemId));
    }
    else if (_searchPath != null) {
      return _searchPath.lookup(systemId).openRead();
    }
    else
      return Vfs.lookup(systemId).openRead();
  }

  /**
   * Opens the source
   */
  protected InputStream openSource(InputSource source)
    throws IOException, SAXException
  {
    if (source.getByteStream() != null) {
      return source.getByteStream();
    }
    else if (source.getCharacterStream() != null) {
      return Vfs.openRead(source.getCharacterStream());
    }
    else if (source.getSystemId() != null) {
      return Vfs.openRead(source.getSystemId());
    }
    else
      throw new FileNotFoundException(L.l("invalid InputSource {0}", source));
  }

  /**
   * Parse the document from a read stream.
   *
   * @param is read stream to parse from.
   *
   * @return The parsed document.
   */
  abstract Document parseInt(ReadStream is)
    throws IOException, SAXException;
  
  static {
    _attrTypes.put("CDATA", "CDATA");
    _attrTypes.put("ID", "ID");
    _attrTypes.put("IDREF", "IDREF");
    _attrTypes.put("IDREFS", "IDREFS");
    _attrTypes.put("ENTITY", "ENTITY");
    _attrTypes.put("ENTITIES", "ENTITIES");
    _attrTypes.put("NMTOKEN", "NMTOKEN");
    _attrTypes.put("NMTOKENS", "NMTOKENS");
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy