All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thaiopensource.relaxng.pattern.PatternMatcher Maven / Gradle / Ivy

Go to download

Jing is a validator for RELAX NG and other schema languages. This project was taken from http://code.google.com/p/jing-trang and mavenized for inclusion in the Wicket Stuff HTML Validator. The code was taken from the 20091111 release.

There is a newer version: 1.11
Show newest version
package com.thaiopensource.relaxng.pattern;

import com.thaiopensource.datatype.Datatype2;
import com.thaiopensource.relaxng.match.MatchContext;
import com.thaiopensource.relaxng.match.Matcher;
import com.thaiopensource.util.Equal;
import com.thaiopensource.util.Localizer;
import com.thaiopensource.xml.util.Name;
import org.relaxng.datatype.Datatype;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

public class PatternMatcher implements Cloneable, Matcher {

  static private class Shared {
    private final Pattern start;
    private final ValidatorPatternBuilder builder;
    private Map recoverPatternTable;
    Shared(Pattern start, ValidatorPatternBuilder builder) {
      this.start = start;
      this.builder = builder;
    }

    Pattern findElement(Name name) {
      if (recoverPatternTable == null)
        recoverPatternTable = new HashMap();
      Pattern p = recoverPatternTable.get(name);
      if (p == null) {
        p = FindElementFunction.findElement(builder, name, start);
        recoverPatternTable.put(name, p);
      }
      return p;
    }
  }

  private PatternMemo memo;
  private boolean textTyped;
  private boolean hadError;
  private boolean ignoreNextEndTagOrAttributeValue;
  private String errorMessage;
  private final Shared shared;
  private List dataDerivFailureList = new ArrayList();

  public PatternMatcher(Pattern start, ValidatorPatternBuilder builder) {
    shared = new Shared(start, builder);
    memo = builder.getPatternMemo(start);
  }

  private PatternMatcher(PatternMemo memo, Shared shared) {
    this.memo = memo;
    this.shared = shared;
  }

  public Matcher start() {
    return new PatternMatcher(shared.builder.getPatternMemo(shared.start), shared);
  }
  
  public boolean equals(Object obj) {
    if (!(obj instanceof PatternMatcher))
      return false;
    PatternMatcher other = (PatternMatcher)obj;
    // don't need to test equality of shared, because the memos can only be ==
    // if the shareds are ==.
    return (memo == other.memo
            && hadError == other.hadError
            && Equal.equal(errorMessage, other.errorMessage)
            && ignoreNextEndTagOrAttributeValue == other.ignoreNextEndTagOrAttributeValue
            && textTyped == other.textTyped);
  }

  public int hashCode() {
    return memo.hashCode();
  }

  public final Object clone() {
    try {
      PatternMatcher cloned = (PatternMatcher)super.clone();
      cloned.dataDerivFailureList = new ArrayList();
      return cloned;
    }
    catch (CloneNotSupportedException e) {
      throw new Error("unexpected CloneNotSupportedException");
    }
  }

  public Matcher copy() {
    return (Matcher)clone();
  }

  public boolean matchStartDocument() {
    if (memo.isNotAllowed())
      return error("schema_allows_nothing");
    return true;
  }

  public boolean matchEndDocument() {
    // XXX maybe check that memo.isNullable if !hadError
    return true;
  }

  public boolean matchStartTagOpen(Name name, String qName, MatchContext context) {
    if (setMemo(memo.startTagOpenDeriv(name)))
      return true;
    PatternMemo next = memo.startTagOpenRecoverDeriv(name);
    boolean ok = ignoreError();
    if (!next.isNotAllowed()) {
      if (!ok) {
        Set missing = requiredElementNames();
        if (!missing.isEmpty())
          error(missing.size() == 1
                ? "unexpected_element_required_element_missing"
                : "unexpected_element_required_elements_missing",
                errorArgQName(qName, name, context, false),
                formatNames(missing, FORMAT_NAMES_ELEMENT|FORMAT_NAMES_AND, context));
        else
          error("element_not_allowed_yet",
                errorArgQName(qName, name, context, false),
                expectedContent(context));
      }
    }
    else {
      final ValidatorPatternBuilder builder = shared.builder;
      next = builder.getPatternMemo(builder.makeAfter(shared.findElement(name), memo.getPattern()));
      if (!ok)
        error(next.isNotAllowed() ? "unknown_element" : "out_of_context_element",
              errorArgQName(qName, name, context, false),
              expectedContent(context));
    }
    memo = next;
    return ok;
  }

  public boolean matchAttributeName(Name name, String qName, MatchContext context) {
    if (setMemo(memo.startAttributeDeriv(name)))
      return true;
    ignoreNextEndTagOrAttributeValue = true;
    boolean ok = ignoreError();
    if (ok)
      return true;
    qName = errorArgQName(qName, name, context, true);
    NormalizedNameClass nnc = memo.possibleAttributeNames();
    if (nnc.isEmpty())
      error("no_attributes_allowed", qName);
    else
      error("invalid_attribute_name", qName, expectedAttributes(context));
    return false;
  }

  public boolean matchAttributeValue(String value, Name name, String qName, MatchContext context) {
    if (ignoreNextEndTagOrAttributeValue) {
      ignoreNextEndTagOrAttributeValue = false;
      return true;
    }
    dataDerivFailureList.clear();
    if (setMemo(memo.dataDeriv(value, context, dataDerivFailureList)))
      return true;
    boolean ok = error("invalid_attribute_value", errorArgQName(qName, name, context, true),
                       formatDataDerivFailures(value, context));
    memo = memo.recoverAfter();
    return ok;
  }

  public boolean matchStartTagClose(Name name, String qName, MatchContext context) {
    boolean ok;
    if (setMemo(memo.endAttributes()))
      ok = true;
    else {
      ok = ignoreError();
      if (!ok) {
        Set missing = requiredAttributeNames();
        if (missing.isEmpty())
          error("required_attributes_missing_expected",
                errorArgQName(qName, name, context, false),
                expectedAttributes(context));
        else
          error(missing.size() == 1 ? "required_attribute_missing" : "required_attributes_missing",
                errorArgQName(qName, name, context, false),
                formatNames(missing, FORMAT_NAMES_ATTRIBUTE|FORMAT_NAMES_AND, context));
      }
      memo = memo.ignoreMissingAttributes();
    }
    textTyped = memo.getPattern().getContentType() == Pattern.DATA_CONTENT_TYPE;
    return ok;
  }

  public boolean matchTextBeforeEndTag(String string, Name name, String qName, MatchContext context) {
    if (textTyped) {
      ignoreNextEndTagOrAttributeValue = true;
      return setDataDeriv(string, name, qName, context);
    }
    else
      return matchUntypedText(string, context);
  }

  public boolean matchTextBeforeStartTag(String string, MatchContext context) {
    return matchUntypedText(string, context);
  }

  private boolean matchUntypedText(String string, MatchContext context) {
    if (DataDerivFunction.isBlank(string))
      return true;
    return matchUntypedText(context);
  }

  public boolean matchUntypedText(MatchContext context) {
    if (setMemo(memo.mixedTextDeriv()))
      return true;
    return error("text_not_allowed", expectedContent(context));
  }

  public boolean isTextTyped() {
    return textTyped;
  }

  private boolean setDataDeriv(String string, Name name, String qName, MatchContext context) {
    textTyped = false;
    PatternMemo textOnlyMemo = memo.textOnly();
    dataDerivFailureList.clear();
    if (setMemo(textOnlyMemo.dataDeriv(string, context, dataDerivFailureList)))
      return true;
    PatternMemo next = memo.recoverAfter();
    boolean ok = ignoreError();
    if (!ok && (!next.isNotAllowed()
                || textOnlyMemo.emptyAfter().dataDeriv(string, context).isNotAllowed())) {
      NormalizedNameClass nnc = memo.possibleStartTagNames();
      if (!nnc.isEmpty() && DataDerivFunction.isBlank(string))
        error("blank_not_allowed",
              errorArgQName(qName, name, context, false),
              expectedContent(context));
      else
        error("invalid_element_value", errorArgQName(qName, name, context, false),
              formatDataDerivFailures(string, context));
    }
    memo = next;
    return ok;
  }

  public boolean matchEndTag(Name name, String qName, MatchContext context) {
    if (ignoreNextEndTagOrAttributeValue) {
      ignoreNextEndTagOrAttributeValue = false;
      return true;
    }
    if (textTyped)
      return setDataDeriv("", name, qName, context);
    if (setMemo(memo.endTagDeriv()))
      return true;
    boolean ok = ignoreError();
    PatternMemo next = memo.recoverAfter();
    // The tricky thing here is that the derivative that we compute may be notAllowed simply because the parent
    // is notAllowed; we don't want to give an error in this case.
    if (!ok && (!next.isNotAllowed()
                // Retry computing the deriv on a pattern where the after is OK (not notAllowed)
                || memo.emptyAfter().endTagDeriv().isNotAllowed())) {
      Set missing = requiredElementNames();
      if (!missing.isEmpty())
        error(missing.size() == 1
              ? "incomplete_element_required_element_missing"
              : "incomplete_element_required_elements_missing",
              errorArgQName(qName, name, context, false),
              formatNames(missing, FORMAT_NAMES_ELEMENT|FORMAT_NAMES_AND, context));
      else
        // XXX  Could do better here and describe what is required instead of what is possible
        error("incomplete_element_required_elements_missing_expected",
              errorArgQName(qName, name, context, false),
              expectedContent(context));
    }
    memo = next;
    return ok;
  }

  public String getErrorMessage() {
    return errorMessage;
  }

  public boolean isValidSoFar() {
    return !hadError;
  }

  public com.thaiopensource.relaxng.match.NameClass possibleStartTagNames() {
    return memo.possibleStartTagNames();
  }

  public com.thaiopensource.relaxng.match.NameClass possibleAttributeNames() {
    return memo.possibleAttributeNames();
  }

  public Set requiredElementNames() {
    return memo.getPattern().apply(shared.builder.getRequiredElementsFunction());
  }

  public Set requiredAttributeNames() {
    return memo.getPattern().apply(shared.builder.getRequiredAttributesFunction());
  }

  private boolean setMemo(PatternMemo m) {
    if (m.isNotAllowed())
      return false;
    else {
      memo = m;
      return true;
    }
  }

  private boolean ignoreError() {
    return hadError && memo.isNotAllowed();
  }

  /*
   * Return true if the error was ignored, false otherwise.
   */
  private boolean error(String key) {
    return error(key, new String[] { });
  }

  private boolean error(String key, String arg) {
    return error(key, new String[] { arg });
  }

  private boolean error(String key, String arg1, String arg2) {
    return error(key, new String[] { arg1, arg2 });
  }

  private boolean error(String key, String arg1, String arg2, String arg3) {
    return error(key, new String[] { arg1, arg2, arg3 });
  }

  private boolean error(String key, String[] args) {
    if (ignoreError())
      return true;
    hadError = true;
    errorMessage = localizer().message(key, args);
    return false;
  }
   
  private String errorArgQName(String qName, Name name, MatchContext context, boolean isAttribute) {
    if (ignoreError())
      return null;
    if (qName == null || qName.length() == 0) {
      final String ns = name.getNamespaceUri();
      final String localName = name.getLocalName();
      if (ns.length() == 0 || (!isAttribute && ns.equals(context.resolveNamespacePrefix(""))))
        qName = localName;
      else {
        String prefix = context.getPrefix(ns);
        if (prefix != null)
          qName = prefix + ":" + localName;
        // this shouldn't happen unless the parser isn't supplying prefixes properly
        else
          qName = "{" + ns + "}" + localName;
      }
    }
    return quoteQName(qName);
  }

  static private final int UNDEFINED_TOKEN_INDEX = -3;
  static private final int INCONSISTENT_TOKEN_INDEX = -2;
  
  private String formatDataDerivFailures(String str, MatchContext context) {
    if (ignoreError())
      return null;
    if (dataDerivFailureList.size() == 0)
      return "";
    if (dataDerivFailureList.size() > 1) {
      // remove duplicates
      Set failures = new HashSet();
      failures.addAll(dataDerivFailureList);
      dataDerivFailureList.clear();
      dataDerivFailureList.addAll(failures);
    }
    List stringValues = new ArrayList();
    Set names = new HashSet();
    List messages = new ArrayList();
    int tokenIndex = UNDEFINED_TOKEN_INDEX;
    int tokenStart = -1;
    int tokenEnd = -1;
    for (DataDerivFailure fail : dataDerivFailureList) {
      Datatype dt = fail.getDatatype();
      String s = fail.getStringValue();
      if (s != null) {
        Object value = fail.getValue();
        // we imply some special semantics for Datatype2
        if (value instanceof Name && dt instanceof Datatype2)
          names.add((Name)value);
        else if (value instanceof String && dt instanceof Datatype2)
          stringValues.add((String)value);
        else
          stringValues.add(s);
      }
      else {
        String message = fail.getMessage();
        // XXX this might produce strangely worded messages for 3rd party datatype libraries
        if (message != null)
          messages.add(message);
        else if (fail.getExcept() != null)
          return ""; // XXX do better for except
        else
          messages.add(localizer().message("require_datatype",
                                           fail.getDatatypeName().getLocalName()));
      }
      switch (tokenIndex) {
      case INCONSISTENT_TOKEN_INDEX:
        break;
      case UNDEFINED_TOKEN_INDEX:
        tokenIndex = fail.getTokenIndex();
        tokenStart = fail.getTokenStart();
        tokenEnd = fail.getTokenEnd();
        break;
      default:
        if (tokenIndex != fail.getTokenIndex())
          tokenIndex = INCONSISTENT_TOKEN_INDEX;
        break;
      }
    }
    if (stringValues.size() > 0) {
      Collections.sort(stringValues);
      for (int i = 0; i < stringValues.size(); i++)
        stringValues.set(i, quoteValue(stringValues.get(i)));
      messages.add(localizer().message("require_values",
                                       formatList(stringValues, "or")));
    }
    if (names.size() > 0)
      // XXX provide the strings as well so that a sensible prefix can be chosen if none is declared
      messages.add(localizer().message("require_qnames",
                                       formatNames(names,
                                                   FORMAT_NAMES_OR|FORMAT_NAMES_ELEMENT,
                                                   context)));
    if (messages.size() == 0)
      return "";
    String arg = formatList(messages, "or");
    // XXX should do something with inconsistent token index (e.g. list { integer+ } | "foo" )
    if (tokenIndex >= 0 && tokenStart >= 0 && tokenEnd <= str.length()) {
      if (tokenStart == str.length())
        return localizer().message("missing_token", arg);
      return localizer().message("token_failures",
                                 quoteValue(str.substring(tokenStart, tokenEnd)),
                                 arg);
    }
    return localizer().message("data_failures", arg);
  }

  private String quoteValue(String str) {
    StringBuilder buf = new StringBuilder();
    appendAttributeValue(buf, str);
    return buf.toString();
  }

  private String expectedAttributes(MatchContext context) {
    if (ignoreError())
      return null;
    NormalizedNameClass nnc = memo.possibleAttributeNames();
    if (nnc.isEmpty())
      return "";
    Set expectedNames = nnc.getIncludedNames();
    if (!expectedNames.isEmpty())
      return localizer().message(nnc.isAnyNameIncluded() || !nnc.getIncludedNamespaces().isEmpty()
                                 ? "expected_attribute_or_other_ns"
                                 : "expected_attribute",
                                 formatNames(expectedNames,
                                             FORMAT_NAMES_ATTRIBUTE|FORMAT_NAMES_OR, context));
    return "";
  }

  private String expectedContent(MatchContext context) {
    if (ignoreError())
      return null;
    List expected = new ArrayList();
    if (!memo.endTagDeriv().isNotAllowed())
      expected.add(localizer().message("element_end_tag"));
    // getContentType isn't so well-defined on after patterns
    switch (memo.emptyAfter().getPattern().getContentType()) {
    case Pattern.MIXED_CONTENT_TYPE:
      // A pattern such as (element foo { empty }, text) has a MIXED_CONTENT_TYPE
      // but text is not allowed everywhere.
      if (!memo.mixedTextDeriv().isNotAllowed())
        expected.add(localizer().message("text"));
      break;
    case Pattern.DATA_CONTENT_TYPE:
      expected.add(localizer().message("data"));
      break;
    }
    NormalizedNameClass nnc = memo.possibleStartTagNames();
    Set expectedNames = nnc.getIncludedNames();
    // XXX say something about wildcards
    if (!expectedNames.isEmpty()) {
      expected.add(localizer().message("element_list",
                                       formatNames(expectedNames,
                                                   FORMAT_NAMES_ELEMENT|FORMAT_NAMES_OR,
                                                   context)));
      if (nnc.isAnyNameIncluded() || !nnc.getIncludedNamespaces().isEmpty())
        expected.add(localizer().message("element_other_ns"));
    }
    if (expected.isEmpty())
      return "";
    return localizer().message("expected", formatList(expected, "or"));
  }

  static final String GENERATED_PREFIXES[] = { "ns", "ns-", "ns_", "NS", "NS-", "NS_"};

  // Values for flags parameter of formatNames
  static private final int FORMAT_NAMES_ELEMENT = 0x0;
  static private final int FORMAT_NAMES_ATTRIBUTE = 0x1;
  static private final int FORMAT_NAMES_AND = 0x0;
  static private final int FORMAT_NAMES_OR = 0x2;

  private static String formatNames(Set names, int flags, MatchContext context) {
    if (names.isEmpty())
      return "";
    Map nsDecls = new HashMap();
    List qNames = generateQNames(names, flags, context, nsDecls);
    Collections.sort(qNames);
    int len = qNames.size();
    for (int i = 0; i < len; i++)
      qNames.set(i, quoteQName(qNames.get(i)));
    String result = formatList(qNames, (flags & FORMAT_NAMES_OR) != 0 ? "or" : "and");
    if (nsDecls.size() != 0)
      result = localizer().message("qnames_nsdecls", result, formatNamespaceDecls(nsDecls));
    return result;
  }

  private static List generateQNames(Set names, int flags, MatchContext context, Map nsDecls) {
    String defaultNamespace;
    if ((flags & FORMAT_NAMES_ATTRIBUTE) != 0)
      defaultNamespace = "";
    else {
      defaultNamespace = context.resolveNamespacePrefix("");
      for (Name name : names) {
        if (name.getNamespaceUri().length() == 0) {
          if (defaultNamespace != null)
            nsDecls.put("", "");
          defaultNamespace = "";
          break;
        }
      }
    }
    List qNames = new ArrayList();
    Set undeclaredNamespaces = new HashSet();
    List namesWithUndeclaredNamespaces = new ArrayList();
    for (Name name : names) {
      String ns = name.getNamespaceUri();
      String prefix;
      if (ns.equals(defaultNamespace))
        prefix = "";
      else
        prefix = context.getPrefix(ns);
      if (prefix == null) {
        undeclaredNamespaces.add(ns);
        namesWithUndeclaredNamespaces.add(name);
      }
      else
        qNames.add(makeQName(prefix, name.getLocalName()));
    }
    if (namesWithUndeclaredNamespaces.isEmpty())
      return qNames;
    if (undeclaredNamespaces.size() == 1 && defaultNamespace == null)
      nsDecls.put(undeclaredNamespaces.iterator().next(), "");
    else
      choosePrefixes(undeclaredNamespaces, context, nsDecls);
    // now nsDecls has a prefix for each namespace
    for (Name name : namesWithUndeclaredNamespaces)
      qNames.add(makeQName(nsDecls.get(name.getNamespaceUri()), name.getLocalName()));
    return qNames;
  }

  private static void choosePrefixes(Set nsSet, MatchContext context, Map nsDecls) {
    List nsList = new ArrayList(nsSet);
    Collections.sort(nsList);
    int len = nsList.size();
    String prefix;
    int tryIndex = 0;
    do {
      if (tryIndex < GENERATED_PREFIXES.length)
        prefix = GENERATED_PREFIXES[tryIndex];
      else {
        // default is just to stick as many underscores as necessary at the beginning
        prefix = "_" + GENERATED_PREFIXES[0];
        for (int i = GENERATED_PREFIXES.length; i < tryIndex; i++)
          prefix += "_" + prefix;
      }
      for (int i = 0; i < len; i++) {
        if (context.resolveNamespacePrefix(len == 1 ? prefix : prefix + (i + 1)) != null) {
          prefix = null;
          break;
        }
      }
      ++tryIndex;
    } while (prefix == null);
    for (int i = 0; i < len; i++) {
      String ns = nsList.get(i);
      nsDecls.put(ns, len == 1 ? prefix : prefix + (i + 1));
    }
  }

  private static String formatList(List list, String conjunction) {
    int len = list.size();
    switch (len) {
    case 0:
      return "";
    case 1:
      return list.get(0);
    case 2:
      return localizer().message(conjunction + "_list_pair", list.get(0), list.get(1));
    }
    String s = localizer().message(conjunction + "_list_many_first", list.get(0));
    for (int i = 1; i < len - 1; i++)
      s = localizer().message(conjunction + "_list_many_middle", s, list.get(i));
    return localizer().message(conjunction + "_list_many_last", s, list.get(len - 1));
  }

  // nsDecls maps namespaces to prefixes
  private static String formatNamespaceDecls(Map nsDecls) {
    List list = new ArrayList();
    for (Map.Entry entry : nsDecls.entrySet()) {
      StringBuilder buf = new StringBuilder();
      String prefix = entry.getValue();
      if (prefix.length() == 0)
        buf.append("xmlns");
      else
        buf.append("xmlns:").append(prefix);
      buf.append('=');
      appendAttributeValue(buf, entry.getKey());
      list.add(buf.toString());
    }
    Collections.sort(list);
    StringBuilder buf = new StringBuilder();
    for (String aList : list) {
      if (buf.length() != 0)
        buf.append(" ");
      buf.append(aList);
    }
    return buf.toString();
  }

  private static String quoteForAttributeValue(char c) {
    switch (c) {
    case '<':
      return "<";
    case '"':
      return """;
    case '&':
      return "&";
    case 0xA:
      return "
";
    case 0xD:
      return "
";
    case 0x9:
      return "	";
    }
    return null;
  }

  private static StringBuilder appendAttributeValue(StringBuilder buf, String value) {
    buf.append('"');
    for (int i = 0; i < value.length(); i++) {
      char c = value.charAt(i);
      String quoted = quoteForAttributeValue(c);
      if (quoted != null)
        buf.append(quoted);
      else
        buf.append(c);
    }
    buf.append('"');
    return buf;
  }

  private static String makeQName(String prefix, String localName) {
    if (prefix.length() == 0)
      return localName;
    return prefix + ":" + localName;
  }

  static private String quoteQName(String qName) {
    return localizer().message("qname", qName);
  }

  static private Localizer localizer() {
    return SchemaBuilderImpl.localizer;
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy