All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.thaiopensource.relaxng.input.xml.Inferrer Maven / Gradle / Ivy

The newest version!
package com.thaiopensource.relaxng.input.xml;

import com.thaiopensource.datatype.DatatypeLibraryLoader;
import com.thaiopensource.relaxng.edit.AttributePattern;
import com.thaiopensource.relaxng.edit.ChoicePattern;
import com.thaiopensource.relaxng.edit.CompositePattern;
import com.thaiopensource.relaxng.edit.DataPattern;
import com.thaiopensource.relaxng.edit.DefineComponent;
import com.thaiopensource.relaxng.edit.ElementPattern;
import com.thaiopensource.relaxng.edit.EmptyPattern;
import com.thaiopensource.relaxng.edit.GrammarPattern;
import com.thaiopensource.relaxng.edit.GroupPattern;
import com.thaiopensource.relaxng.edit.NameNameClass;
import com.thaiopensource.relaxng.edit.OneOrMorePattern;
import com.thaiopensource.relaxng.edit.OptionalPattern;
import com.thaiopensource.relaxng.edit.Pattern;
import com.thaiopensource.relaxng.edit.RefPattern;
import com.thaiopensource.relaxng.edit.SchemaCollection;
import com.thaiopensource.relaxng.edit.SchemaDocument;
import com.thaiopensource.relaxng.edit.TextPattern;
import com.thaiopensource.relaxng.edit.ZeroOrMorePattern;
import com.thaiopensource.resolver.Resolver;
import com.thaiopensource.resolver.xml.sax.SAXResolver;
import com.thaiopensource.xml.infer.AttributeDecl;
import com.thaiopensource.xml.infer.ChoiceParticle;
import com.thaiopensource.xml.infer.ElementDecl;
import com.thaiopensource.xml.infer.ElementParticle;
import com.thaiopensource.xml.infer.EmptyParticle;
import com.thaiopensource.xml.infer.InferHandler;
import com.thaiopensource.xml.infer.OneOrMoreParticle;
import com.thaiopensource.xml.infer.Particle;
import com.thaiopensource.xml.infer.ParticleVisitor;
import com.thaiopensource.xml.infer.Schema;
import com.thaiopensource.xml.infer.SequenceParticle;
import com.thaiopensource.xml.infer.TextParticle;
import com.thaiopensource.xml.util.Name;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;

import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.Vector;

class Inferrer {
  private final Schema schema;
  private final Set multiplyReferencedElementNames = new HashSet();
  private final GrammarPattern grammar;
  private final ParticleConverter particleConverter = new ParticleConverter();
  private final List outputQueue = new Vector();
  private final Set queued = new HashSet();
  private String prefixSeparator;

  private static final String SEPARATORS = ".-_";

  static class Options {
    String encoding;
    Resolver resolver;
  }

  private static class PatternComparator implements Comparator {
    private static final Class[] classOrder = {
      TextPattern.class, RefPattern.class, ElementPattern.class
    };

    public int compare(Pattern p1, Pattern p2) {
      if (p1.getClass() != p2.getClass())
        return classIndex(p1.getClass()) - classIndex(p2.getClass());
      if (p1 instanceof RefPattern)
        return ((RefPattern)p1).getName().compareTo(((RefPattern)p2).getName());
      if (p1 instanceof ElementPattern)
        return Name.compare(extractElementName(p1), extractElementName(p2));
      return 0;
    }

    private static Name extractElementName(Object o) {
      NameNameClass nnc = (NameNameClass)((ElementPattern)o).getNameClass();
      return new Name(nnc.getNamespaceUri(), nnc.getLocalName());
    }

    private static int classIndex(Class aClass) {
      for (int i = 0; i < classOrder.length; i++)
        if (aClass == classOrder[i])
          return i;
      return classOrder.length;
    }
  }

  private class ParticleConverter extends PatternComparator implements ParticleVisitor {
    public Object visitElement(ElementParticle p) {
      Name name = p.getName();
      if (multiplyReferencedElementNames.contains(name)) {
        if (!queued.contains(name)) {
          queued.add(name);
          outputQueue.add(name);
        }
        return new RefPattern(getDefineName(name));
      }
      else
        return createElementPattern(name);
    }

    public Object visitChoice(ChoiceParticle p) {
      ChoicePattern cp = new ChoicePattern();
      List children = cp.getChildren();
      addChoices(children, p.getChild1());
      addChoices(children, p.getChild2());
      Collections.sort(children, this);
      for (Iterator iter = children.iterator(); iter.hasNext();)
        if (iter.next() instanceof EmptyPattern) {
          iter.remove();
          return makeOptional(cp);
        }
      return cp;
    }

    private Object makeOptional(ChoicePattern cp) {
      List children = cp.getChildren();
      boolean done = false;
      for (int i = 0, len = children.size(); i < len; i++) {
        Pattern child = children.get(i);
        if (child instanceof OneOrMorePattern) {
          children.set(i, new ZeroOrMorePattern(((OneOrMorePattern)child).getChild()));
          done = true;
        }
      }
      if (done)
        return normalize(cp);
      return new OptionalPattern(normalize(cp));
    }


    private void addChoices(List children, Particle child) {
      Pattern pattern = convert(child);
      if (pattern instanceof ChoicePattern)
        children.addAll(((ChoicePattern)pattern).getChildren());
      else
        children.add(pattern);
    }

    public Object visitSequence(SequenceParticle p) {
      GroupPattern gp = new GroupPattern();
      addGroup(gp.getChildren(), p.getChild1());
      addGroup(gp.getChildren(), p.getChild2());
      return gp;
    }

    private void addGroup(List children, Particle child) {
      Pattern pattern = convert(child);
      if (pattern instanceof GroupPattern)
        children.addAll(((GroupPattern)pattern).getChildren());
      else
        children.add(pattern);
    }

    public Object visitEmpty(EmptyParticle p) {
      return new EmptyPattern();
    }

    public Object visitText(TextParticle p) {
      return new TextPattern();
    }

    public Object visitOneOrMore(OneOrMoreParticle p) {
      return new OneOrMorePattern(convert(p.getChild()));
    }

    public Pattern convert(Particle particle) {
      return (Pattern)particle.accept(this);
    }
  }

  private class ReferenceFinder implements ParticleVisitor {
    private final Set referencedElementNames = new HashSet();

    public Object visitElement(ElementParticle p) {
      Name name = p.getName();
      if (referencedElementNames.contains(name))
        multiplyReferencedElementNames.add(name);
      else
        referencedElementNames.add(name);
      return null;
    }

    public Object visitChoice(ChoiceParticle p) {
      p.getChild1().accept(this);
      p.getChild2().accept(this);
      return null;
    }

    public Object visitSequence(SequenceParticle p) {
      p.getChild1().accept(this);
      p.getChild2().accept(this);
      return null;
    }

    public Object visitEmpty(EmptyParticle p) {
      return null;
    }

    public Object visitText(TextParticle p) {
      return null;
    }

    public Object visitOneOrMore(OneOrMoreParticle p) {
      return p.getChild().accept(this);
    }
  }

  static SchemaCollection infer(String[] args, Options options, ErrorHandler eh) throws SAXException, IOException {
    InferHandler handler = new InferHandler(new DatatypeLibraryLoader());
    XMLReader xr = new SAXResolver(options.resolver).createXMLReader();
    xr.setErrorHandler(eh);
    xr.setContentHandler(handler);
    for (int i = 0; i < args.length; i++) {
      InputSource in = new InputSource(args[i]);
      if (options.encoding != null)
        in.setEncoding(options.encoding);
      xr.parse(in);
    }
    SchemaCollection sc = new SchemaCollection();
    sc.setMainUri(args[0]);
    SchemaDocument sd = new SchemaDocument(new Inferrer(handler.getSchema()).grammar);
    sc.getSchemaDocumentMap().put(sc.getMainUri(), sd);
    return sc;
  }

  private Inferrer(Schema schema) {
    this.schema = schema;
    this.grammar = new GrammarPattern();
    findMultiplyReferencedElements();
    choosePrefixSeparator();
    grammar.getComponents().add(new DefineComponent(DefineComponent.START,
                                                    particleConverter.convert(schema.getStart())));
    // Names can get added to outputQueue during this loop,
    // so don't change this to a for each.
    for (int i = 0; i < outputQueue.size(); i++) {
      Name elementName = outputQueue.get(i);
      grammar.getComponents().add(new DefineComponent(getDefineName(elementName),
                                                      createElementPattern(elementName)));
    }
  }

  private void findMultiplyReferencedElements() {
    ReferenceFinder finder = new ReferenceFinder();
    schema.getStart().accept(finder);
    for (ElementDecl decl : schema.getElementDecls().values()) {
      Particle particle = decl.getContentModel();
      if (particle != null)
        particle.accept(finder);
    }
  }


  private void choosePrefixSeparator() {
    Map prefixMap = schema.getPrefixMap();
    Set namespacesInDefines = new HashSet();
    for (Name name : multiplyReferencedElementNames)
      namespacesInDefines.add(name.getNamespaceUri());
    if (namespacesInDefines.size() <= 1)
      return; // don't need to use prefixes in defines
    // define additional prefixes if necessary
    namespacesInDefines.removeAll(prefixMap.keySet());
    if (namespacesInDefines.size() > 1) {
      namespacesInDefines.remove("");
      int n = 1;
      for (String ns : namespacesInDefines) {
        for (; ;) {
          String prefix = "ns" + Integer.toString(n++);
          if (!prefixMap.containsKey(prefix)) {
            prefixMap.put(ns, prefix);
            break;
          }
        }
      }
    }
    // choose a prefixSeparator that avoids all collisions
    StringBuffer buf = new StringBuffer();
    for (int len = 1;; len++)
      for (int i = 0; i < SEPARATORS.length(); i++) {
        char c = SEPARATORS.charAt(i);
        for (int j = 0; j < len; j++)
          buf.append(c);
        prefixSeparator = buf.toString();
        if (prefixSeparatorOk())
          return;
        buf.setLength(0);
      }
  }

  private boolean prefixSeparatorOk() {
    Set names = new HashSet();
    for (Name elementName : multiplyReferencedElementNames) {
      String name = getDefineName(elementName);
      if (names.contains(name))
        return false;
      names.add(name);
    }
    return true;
  }

  private Pattern createElementPattern(Name elementName) {
    ElementDecl elementDecl = schema.getElementDecl(elementName);
    Pattern contentPattern;
    Particle particle = elementDecl.getContentModel();
    if (particle != null)
      contentPattern = particleConverter.convert(particle);
    else
      contentPattern = makeDatatype(elementDecl.getDatatype());
    Map attributeDecls = elementDecl.getAttributeDecls();
    if (attributeDecls.size() > 0) {
      GroupPattern group = new GroupPattern();
      List attributeNames = new Vector();
      attributeNames.addAll(attributeDecls.keySet());
      Collections.sort(attributeNames, new Comparator() {
        public int compare(Name n1, Name n2) {
          return Name.compare(n1, n2);
        }
      });
      for (Name attName : attributeNames) {
        AttributeDecl att = attributeDecls.get(attName);
        Pattern tem;
        if (att.getDatatype() == null)
          tem = new TextPattern();
        else
          tem = makeDatatype(att.getDatatype());
        tem = new AttributePattern(makeNameClass(attName), tem);
        if (att.isOptional())
          tem = new OptionalPattern(tem);
        group.getChildren().add(tem);
      }
      if (contentPattern instanceof GroupPattern)
        group.getChildren().addAll(((GroupPattern)contentPattern).getChildren());
      else if (!(contentPattern instanceof EmptyPattern))
        group.getChildren().add(contentPattern);
      contentPattern = group;
    }
    return new ElementPattern(makeNameClass(elementName), contentPattern);
  }

  private NameNameClass makeNameClass(Name name) {
    String ns = name.getNamespaceUri();
    NameNameClass nnc = new NameNameClass(ns, name.getLocalName());
    if (!ns.equals("")) {
      String prefix = schema.getPrefixMap().get(ns);
      if (prefix != null)
        nnc.setPrefix(prefix);
    }
    return nnc;
  }

  private static DataPattern makeDatatype(Name datatypeName) {
    return new DataPattern(datatypeName.getNamespaceUri(), datatypeName.getLocalName());
  }

  private String getDefineName(Name elementName) {
    if (prefixSeparator != null) {
      String prefix = schema.getPrefixMap().get(elementName.getNamespaceUri());
      if (prefix != null)
        return prefix + prefixSeparator + elementName.getLocalName();
    }
    return elementName.getLocalName();
  }

  private static Pattern normalize(CompositePattern cp) {
    if (cp.getChildren().size() == 1)
      return cp.getChildren().get(0);
    return cp;
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy