All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.caucho.relaxng.CompactParser Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2012 Caucho Technology -- all rights reserved
 *
 * This file is part of Resin(R) Open Source
 *
 * Each copy or derived work must preserve the copyright notice and this
 * notice unmodified.
 *
 * Resin Open Source is free software; you can redistribute it and/or modify
 * it under the terms of the GNU General Public License as published by
 * the Free Software Foundation; either version 2 of the License, or
 * (at your option) any later version.
 *
 * Resin Open Source is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE, or any warranty
 * of NON-INFRINGEMENT.  See the GNU General Public License for more
 * details.
 *
 * You should have received a copy of the GNU General Public License
 * along with Resin Open Source; if not, write to the
 *
 *   Free Software Foundation, Inc.
 *   59 Temple Place, Suite 330
 *   Boston, MA 02111-1307  USA
 *
 * @author Scott Ferguson
 */

package com.caucho.relaxng;

import com.caucho.relaxng.pattern.*;
import com.caucho.util.CharBuffer;
import com.caucho.util.IntMap;
import com.caucho.util.L10N;
import com.caucho.vfs.Path;
import com.caucho.vfs.ReadStream;
import com.caucho.vfs.Vfs;
import com.caucho.xml.QName;
import com.caucho.xml.XmlChar;

import org.xml.sax.InputSource;
import org.xml.sax.SAXException;

import java.io.IOException;
import java.io.InputStream;
import java.util.HashMap;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
 * Builder for the relax.
 */
public class CompactParser {
  private static final L10N L = new L10N(CompactParser.class);
  private static final Logger log
    = Logger.getLogger(CompactParser.class.getName());

  private static final boolean []NAME_CHAR;

  private static final int IDENTIFIER = 256;
  
  private static final int NAMESPACE = IDENTIFIER + 1;
  private static final int DEFAULT = NAMESPACE + 1;
  
  private static final int START = DEFAULT + 1;
  private static final int DIV = START + 1;
  private static final int INCLUDE = DIV + 1;
  
  private static final int ELEMENT = INCLUDE + 1;
  private static final int ATTRIBUTE = ELEMENT + 1;
  
  private static final int TEXT = ATTRIBUTE + 1;
  private static final int STRING = TEXT + 1;
  private static final int TOKEN = STRING + 1;
  private static final int LITERAL = TOKEN + 1;
  
  private static final int EMPTY = LITERAL + 1;
  
  private static final int COMMENT = EMPTY + 1;

  private static final IntMap _tokenMap = new IntMap();

  private GrammarPattern _grammar;
  private Pattern _pattern;

  private String _ns = "";
  private HashMap _nsMap;

  private Path _pwd;
  private ReadStream _is;
  private String _filename;
  private int _line;

  private int _peekToken = -1;

  private final byte []_buffer = new byte[256];
  private int _offset;
  private int _length;

  private CharBuffer _cb = new CharBuffer(256);
  private String _lexeme;

  private int _generatedId;

  CompactParser()
  {
  }

  /**
   * Gets the root pattern.
   */
  public GrammarPattern getGrammar()
  {
    return _grammar;
  }

  public void setGeneratedId(int id)
  {
    _generatedId = id;
  }

  public String generateId()
  {
    _cb.setLength(0);
    _cb.append("__caucho_");
    _cb.append(_generatedId++);
    
    return _cb.toString();
  }

  /**
   * Parses the relax file.
   */
  public void parse(InputSource source)
    throws SAXException, IOException, RelaxException
  {
    InputStream is = source.getByteStream();

    _pwd = null;

    if (is instanceof ReadStream) {
      _is = (ReadStream) is;
      _filename = _is.getUserPath();
      _pwd = _is.getPath().getParent();
    }
    if (is != null)
      _is = Vfs.openRead(is);
    else
      _is = Vfs.openRead(source.getSystemId());

    if (_filename == null)
      _filename = source.getSystemId();
    _line = 1;

    if (_pwd == null)
      _pwd = Vfs.lookup(_filename).getParent();

    try {
      parse();
    } catch (RelaxException e) {
      log.log(Level.FINER, e.toString(), e);
      
      // xml/1196
      //throw new SAXException(_filename + ":" + _line + ": " + e.getMessage(), e);
      throw new SAXException(_filename + ":" + _line + ": " + e.getMessage());
    } finally {
      _is.close();
    }
  }

  /**
   * Internal parser.
   */
  private void parse()
    throws SAXException, IOException, RelaxException
  {
    _grammar = new GrammarPattern();
    _nsMap = new HashMap();

    parseDeclarations();
    
    int token = parseToken();
    _peekToken = token;

    switch (token) {
    case START:
    case IDENTIFIER:
    case INCLUDE:
      parseGrammar(_grammar);
      break;

    case COMMENT:
      break;

    default:
      _grammar.setStart(parsePattern(_grammar));
      break;
    }
  }

  /**
   * Parses declarations.
   */
  private void parseDeclarations()
    throws SAXException, IOException, RelaxException
  {
    while (true) {
      int token = parseToken();

      _peekToken = token;
      
      switch (token) {
      case DEFAULT:
      case NAMESPACE:
        parseNamespace();
        break;
        
      case COMMENT:
        break;

      default:
        return;
      }
    }
  }
  
  /**
   * Parses the namespace declaration
   */
  private void parseNamespace()
    throws SAXException, IOException, RelaxException
  {
    boolean isDefault = false;
    int token = parseToken();

    if (token == DEFAULT) {
      isDefault = true;
      token = parseToken();
    }

    if (token != NAMESPACE)
      throw error(L.l("expected 'namespace' at {0}", errorToken(token)));
      
    token = parseToken();

    if (token != IDENTIFIER)
      throw error(L.l("expected identifier at {0}", errorToken(token)));

    String prefix = _lexeme;

    token = parseToken();
    
    if (token != '=')
      throw error(L.l("expected '=' at {0}", errorToken(token)));

    String value = parseLiteral();

    if (isDefault)
      _ns = value;

    _nsMap.put(prefix, value);
  }

  /**
   * Parses top-level grammar stuff.
   */
  private void parseGrammar(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException, RelaxException
  {
    while (true) {
      int token = parseToken();

      switch (token) {
      case -1:
        return;

      case COMMENT:
        break;

      case START:
        int next = parseToken();
        if (next == '=')
          grammar.setStart(parsePattern(grammar));
        else
          throw error(L.l("expected '=' at {0}", errorToken(next)));
        break;

      case IDENTIFIER:
        String name = _lexeme;
        // Pattern oldPattern = grammar.getDefinition(name);
        // pattern = new GroupPattern();
        next = parseToken();
        if (next == '=') {
          if (grammar.getDefinition(name) != null)
            throw error(L.l("duplicate definition of {0}", name));
            
          grammar.setDefinition(name, parsePattern(grammar));
        }
        else
          throw error(L.l("expected '=' at {0}", errorToken(next)));
        break;

      case INCLUDE:
        parseInclude(grammar);
        break;

      default:
        throw error(L.l("unexpected token {0}", errorToken(token)));
      }
    }
  }
        
  private void parseInclude(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException
  {
    String uri = parseLiteral();

    Path sub = _pwd.lookup(uri);

    ReadStream is = null;
    
    try {
      is = sub.openRead();

      InputSource source = new InputSource(is);
      source.setSystemId(uri);
      
      CompactParser parser = new CompactParser();
      parser.setGeneratedId(_generatedId);
      parser.parse(source);

      GrammarPattern subGrammar = parser.getGrammar();

      _generatedId = parser._generatedId;

      grammar.mergeInclude(subGrammar);
    } finally {
      if (is != null)
        is.close();
    }
  }
        
  /**
   * Parses a pattern.
   */
  private Pattern parsePattern(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException
  {
    Pattern pattern = parseTerm(grammar);

    int token = parseToken();

    switch (token) {
    case '|':
      return parseChoicePattern(grammar, pattern);
    case '&':
      return parseInterleavePattern(grammar, pattern);
    case ',':
      return parseGroupPattern(grammar, pattern);

    default:
      _peekToken = token;
      return pattern;
    }
  }

  /**
   * Parses a interleave pattern.
   */
  private Pattern parseInterleavePattern(GrammarPattern grammar,
                                         Pattern pattern)
    throws IOException, SAXException, RelaxException
  {
    int token;

    do {
      if (! (pattern instanceof InterleavePattern)) {
        Pattern child = pattern;
        pattern = new InterleavePattern();
        pattern.addChild(child);
      }
      
      pattern.addChild(parseTerm(grammar));
    } while ((token = parseToken()) == '&');

    _peekToken = token;

    return pattern;
  }

  /**
   * Parses a group pattern.
   */
  private Pattern parseGroupPattern(GrammarPattern grammar, Pattern pattern)
    throws IOException, SAXException, RelaxException
  {
    int token;

    do {
      if (! (pattern instanceof GroupPattern)) {
        Pattern child = pattern;
        pattern = new GroupPattern();
        pattern.addChild(child);
      }
      
      pattern.addChild(parseTerm(grammar));
    } while ((token = parseToken()) == ',');

    _peekToken = token;

    return pattern;
  }

  /**
   * Parses a choice pattern.
   */
  private Pattern parseChoicePattern(GrammarPattern grammar, Pattern pattern)
    throws IOException, SAXException, RelaxException
  {
    int token;

    do {
      if (! (pattern instanceof ChoicePattern)) {
        Pattern child = pattern;
        pattern = new ChoicePattern();
        pattern.addChild(child);
      }
      
      pattern.addChild(parseTerm(grammar));
    } while ((token = parseToken()) == '|');

    _peekToken = token;

    return pattern;
  }

  /**
   * Parses a term
   */
  private Pattern parseTerm(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException
  {
    int token = parseToken();

    while (token == COMMENT) {
      token = parseToken();
    }

    Pattern pattern;
    switch (token) {
    case EMPTY:
      return new EmptyPattern();
      
    case TEXT:
      return new TextPattern();
      
    case STRING:
    case LITERAL:
      return new DataPattern("string");
      
    case TOKEN:
      return new DataPattern("token");
      
    case ELEMENT:
      pattern = parseElement(grammar);
      break;
      
    case ATTRIBUTE:
      pattern = parseAttribute(grammar);
      break;

    case '(':
      pattern = parsePattern(grammar);

      token = parseToken();
      if (token != ')')
        throw error(L.l("expected ')' at {0}", errorToken(token)));
      break;

    case IDENTIFIER:
      pattern = new RefPattern(_grammar, _lexeme);
      pattern.setFilename(_filename);
      pattern.setLine(_line);
      break;

    default:
      throw error(L.l("unknown token {0}", errorToken(token)));
    }

    token = parseToken();

    if (token == '*')
      pattern = new ZeroOrMorePattern(pattern);
    else if (token == '?') {
      ChoicePattern choice = new ChoicePattern();
      choice.addChild(new EmptyPattern());
      choice.addChild(pattern);
      return choice;
    }
    else if (token == '+') {
      GroupPattern group = new GroupPattern();
      group.addChild(pattern);
      group.addChild(new ZeroOrMorePattern(pattern));
      return group;
    }
    else {
      _peekToken = token;
    }

    return pattern;
  }

  /**
   * Parses an element.
   */
  private Pattern parseElement(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException
  {
    String id = generateId();
    ElementPattern elt = new ElementPattern(id);
    grammar.setDefinition(id, elt);
    
    elt.addNameChild(parseNameClass(grammar, true));

    int token = parseToken();
    if (token == '{') {
      elt.addChild(parsePattern(grammar));

      token = parseToken();
      if (token != '}')
        throw error(L.l("expected '}' at {0}", errorToken(token)));
    }

    return elt;
  }

  /**
   * Parses an element.
   */
  private Pattern parseAttribute(GrammarPattern grammar)
    throws IOException, SAXException, RelaxException
  {
    AttributePattern elt = new AttributePattern();
    elt.addNameChild(parseNameClass(grammar, false));

    int token = parseToken();
    if (token == '{') {
      token = parseToken();

      if (token == '}')
        return elt;

      _peekToken = token;
      
      elt.addChild(parsePattern(grammar));

      token = parseToken();
      if (token != '}')
        throw error(L.l("expected '}' at {0}", errorToken(token)));
    }

    return elt;
  }

  /**
   * Parses a name class.
   */
  private NameClassPattern parseNameClass(GrammarPattern grammar,
                                          boolean isElement)
    throws IOException, SAXException, RelaxException
  {
    NameClassPattern left = parseName(grammar, isElement);
    ChoiceNamePattern choice = null;

    int ch;
    while ((ch = skipWhitespace()) == '|') {
      NameClassPattern right = parseName(grammar, isElement);

      if (choice == null) {
        choice = new ChoiceNamePattern();
        choice.addNameChild(left);
      }

      choice.addNameChild(right);
    }

    unread();

    if (choice != null)
      return choice;
    else
      return left;
  }

  /**
   * Parses a name class.
   */
  private NameClassPattern parseName(GrammarPattern grammar, boolean isElement)
    throws IOException, SAXException, RelaxException
  {
    int ch = skipWhitespace();
    
    if (ch == '(') {
      NameClassPattern name = parseNameClass(grammar, isElement);
      ch = skipWhitespace();
      if (ch != ')')
        throw error(L.l("expected ')' at '{0}'", String.valueOf((char) ch)));
      return name;
    }

    char []cbuf = _cb.getBuffer();
    byte []buffer = _buffer;
    int i = 0;
    
    int offset = _offset;
    int length = _length;
    
    while (ch > 0 && ch < 256 && NAME_CHAR[ch]) {
      cbuf[i++] = (char) ch;

      if (offset < length) {
        ch = buffer[offset++] & 0xff;
        
        if (ch == '\n') {
          _line++;
        }
      }
      else {
        _offset = offset;
        _length = length;
        
        ch = read();
        
        offset = _offset;
        length = _length;
      }
    }
    
    _offset = offset;
    _length = length;
    
    _cb.setLength(i);
    
    if (ch == '*')
      _cb.append('*');
    else
      unread();

    if (_cb.length() == 0)
      throw error(L.l("expected name at '{0}'", String.valueOf((char) ch)));

    NameClassPattern pattern;

    String lexeme = _cb.toString();

    int p = lexeme.lastIndexOf(':');
    String ns = _ns;
    String localName;
    
    if (p < 0) {
      localName = lexeme;

      if (! isElement)
        ns = null;
    }
    else {
      String prefix = lexeme.substring(0, p);
      localName = lexeme.substring(p + 1);
      ns = _nsMap.get(prefix);

      if (ns == null && localName.equals("*"))
        throw error(L.l("'{0}' does not match a defined namespace.", lexeme));
      
      if (ns == null) {// && isElement) {
        pattern = createNamePattern(lexeme, "");

        return pattern;
      }
    }

    if (lexeme.equals("*")) {
      AnyNamePattern namePattern = new AnyNamePattern();
      
      namePattern.setExcept(parseExcept(grammar, isElement));
      
      return namePattern;
    }
    else if (localName.equals("*")) {
      NsNamePattern namePattern = new NsNamePattern(lexeme, ns);
      
      namePattern.setExcept(parseExcept(grammar, isElement));
      
      return namePattern;
    }
    else if ("".equals(ns) || ns == null) {
      pattern = createNamePattern(localName, "");

      return pattern;
    }
    else {
      pattern = createNamePattern(lexeme, ns);

      return pattern;
    }
  }

  private NamePattern createNamePattern(String localName, String namespace)
  {
    return new NamePattern(new QName(localName, namespace));
  }

  /**
   * Parses a name class.
   */
  private NameClassPattern parseExcept(GrammarPattern grammar,
                                       boolean isElement)
    throws IOException, SAXException, RelaxException
  {
    int ch = skipWhitespace();

    if (ch != '-') {
      unread();
      return null;
    }

    return parseName(grammar, isElement);
  }

  /**
   * Parses a token.
   */
  private int parseToken()
    throws IOException, SAXException, RelaxException
  {
    int ch = _peekToken;

    if (ch >= 0) {
      _peekToken = -1;
      return ch;
    }

    CharBuffer cb = _cb;
    cb.clear();
    
    byte []buffer = _buffer;
      
    while (true)  {
      if (_offset < _length) {
        ch = buffer[_offset++];
        if (ch == '\n')
          _line++;
      }
      else {
        ch = read();
      }

      switch (ch) {
      case ' ':
      case '\t':
      case '\n':
      case '\r':
        break;
      
      case '?':
      case '*':
      case '+':
      case ',':
      case '|':
      case '&':
      case '{':
      case '}':
      case '(':
      case ')':
      case '=':
        return ch;

      case '\"':
      case '\'':
        unread();
        _lexeme = parseLiteral();
        return LITERAL;

      case '#':
        do {
          ch = read();
          if (ch != '#')
            throw error(L.l("expected '#' at '{0}'", String.valueOf((char) ch)));
        
          if (cb.length() > 0)
            cb.append('\n');

          for (ch = read(); ch > 0 && ch != '\n' && ch != '\r'; ch = read()) {
            cb.append((char) ch);
          }

          if (ch == '\r') {
            ch = read();
            if (ch != '\n')
              unread();
          }

          ch = read();
        } while (ch == '#');

        unread();
        return COMMENT;

      case -1:
        cb.append("end of file");
        return -1;

      default:
        if (XmlChar.isNameStart(ch)) {
          char []cbuf = cb.getBuffer();
          int i = 0;

          while (ch > 0 && ch < 256 && NAME_CHAR[ch]) {
            cbuf[i++] = (char) ch;

            if (_offset < _length) {
              ch = buffer[_offset++] & 0xff;
              if (ch == '\n')
                _line++;
            }
            else
              ch = read();
          }
          cb.setLength(i);
          unread();

          int token = _tokenMap.get(cb);

          if (token > 0) {
            _lexeme = null;
            return token;
          }
          else {
            _lexeme = _cb.toString().intern();
            return IDENTIFIER;
          }
        }
        else if (ch < 0) {
          cb.append("end of file");
          return -1;
        }
        else {
          throw error(L.l("Unknown character '{0}'", String.valueOf((char) ch)));
        }
      }
    }
  }

  private String parseLiteral()
    throws IOException, SAXException, RelaxException
  {
    int end = skipWhitespace();

    if (end != '"' && end != '\'')
      throw error(L.l("expected '\"' at '{0}'", String.valueOf((char) end)));

    _cb.clear();
    int ch = read();
    for (; ch >= 0 && ch != end; ch = read()) {
      _cb.append((char) ch);
    }

    if (ch != end)
      throw error(L.l("expected '\"' at '{0}'", String.valueOf((char) ch)));

    return _cb.toString();
  }


  private String parseIdentifier()
    throws IOException, SAXException, RelaxException
  {
    int ch = skipWhitespace();

    if (! XmlChar.isNameChar(ch))
      throw error(L.l("expected identifier character at '{0}'", String.valueOf((char) ch)));

    _cb.clear();
    for (; XmlChar.isNameChar(ch); ch = read()) {
      _cb.append((char) ch);
    }

    return _cb.toString();
  }

  /**
   * Parses whitespace.
   */
  private int skipWhitespace()
    throws IOException, SAXException
  {
    int ch;
      
    for (ch = read(); XmlChar.isWhitespace(ch); ch = read()) {
    }

    return ch;
  }

  private String errorToken(int ch)
  {
    switch (ch) {
    case -1:
      return "end of file";
      
    case '?':
    case '*':
    case '+':
    case ',':
    case '|':
    case '&':
    case '{':
    case '}':
    case '(':
    case ')':
    case '=':
      return String.valueOf((char) ch);
      
    default:
      return _cb.toString();
    }
  }

  /**
   * Creates an error.
   */
  private SAXException error(String msg)
  {
    return new SAXException(_filename + ":" + _line + ": " + msg);
  }

  /**
   * Returns the current location string.
   */
  /*
  public String getLocation()
  {
    return _filename + ":" + _line;
  }
  */
  
  /**
   * Reads a character.
   */
  private int read()
    throws IOException
  {
    int offset = _offset;

    if (_length <= offset) {
      fillBuffer();
      
      offset = _offset;

      if (_length < 0)
        return -1;
    }
    
    int ch = _buffer[offset++];
    
    _offset = offset;
    
    if (ch == '\n')
      _line++;
    else if (ch == '\r') {
      _line++;

      if (_length <= _offset)
        fillBuffer();
      
      ch = _buffer[_offset++];
      
      if (ch != '\n') {
        unread();
        ch = '\n';
      }
    }

    return ch;
  }

  private void fillBuffer()
    throws IOException
  {
    _length = _is.read(_buffer, 0, _buffer.length);
    _offset = 0;
  }

  private void unread()
  {
    if (_offset > 0) {
      _offset--;
      int ch = _buffer[_offset];
      if (ch == '\n') {
        _line--;
      }
    }
  }

  static {
    _tokenMap.put(new CharBuffer("namespace"), NAMESPACE);
    _tokenMap.put(new CharBuffer("default"), DEFAULT);
    
    _tokenMap.put(new CharBuffer("start"), START);
    _tokenMap.put(new CharBuffer("div"), DIV);
    
    _tokenMap.put(new CharBuffer("element"), ELEMENT);
    _tokenMap.put(new CharBuffer("attribute"), ATTRIBUTE);
    
    _tokenMap.put(new CharBuffer("text"), TEXT);
    _tokenMap.put(new CharBuffer("string"), STRING);
    _tokenMap.put(new CharBuffer("token"), TOKEN);
    
    _tokenMap.put(new CharBuffer("empty"), EMPTY);
    
    _tokenMap.put(new CharBuffer("include"), INCLUDE);

    NAME_CHAR = new boolean[256];
    for (int i = 0; i < NAME_CHAR.length; i++) {
      if (XmlChar.isNameChar((char) i))
        NAME_CHAR[i] = true;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy