All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sun.tools.xjc.reader.internalizer.DOMForest Maven / Gradle / Ivy

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 *
 * Copyright (c) 1997-2011 Oracle and/or its affiliates. All rights reserved.
 *
 * The contents of this file are subject to the terms of either the GNU
 * General Public License Version 2 only ("GPL") or the Common Development
 * and Distribution License("CDDL") (collectively, the "License").  You
 * may not use this file except in compliance with the License.  You can
 * obtain a copy of the License at
 * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
 * or packager/legal/LICENSE.txt.  See the License for the specific
 * language governing permissions and limitations under the License.
 *
 * When distributing the software, include this License Header Notice in each
 * file and include the License file at packager/legal/LICENSE.txt.
 *
 * GPL Classpath Exception:
 * Oracle designates this particular file as subject to the "Classpath"
 * exception as provided by Oracle in the GPL Version 2 section of the License
 * file that accompanied this code.
 *
 * Modifications:
 * If applicable, add the following below the License Header, with the fields
 * enclosed by brackets [] replaced by your own identifying information:
 * "Portions Copyright [year] [name of copyright owner]"
 *
 * Contributor(s):
 * If you wish your version of this file to be governed by only the CDDL or
 * only the GPL Version 2, indicate your decision by adding "[Contributor]
 * elects to include this software in this distribution under the [CDDL or GPL
 * Version 2] license."  If you don't indicate a single choice of license, a
 * recipient has the option to distribute your version of this file under
 * either the CDDL, the GPL Version 2 or to extend the choice of license to
 * its licensees as provided above.  However, if you add GPL Version 2 code
 * and therefore, elected the GPL Version 2 license, then the option applies
 * only if the new code is made subject to such option by the copyright
 * holder.
 */

package com.sun.tools.xjc.reader.internalizer;

import java.io.IOException;
import java.io.OutputStream;
import java.io.OutputStreamWriter;
import java.net.URI;
import java.net.URISyntaxException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import static javax.xml.XMLConstants.W3C_XML_SCHEMA_NS_URI;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Source;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerException;
import javax.xml.transform.TransformerFactory;
import javax.xml.transform.dom.DOMSource;
import javax.xml.transform.sax.SAXResult;
import javax.xml.transform.sax.SAXSource;
import javax.xml.validation.SchemaFactory;

import com.sun.istack.NotNull;
import com.sun.istack.XMLStreamReaderToContentHandler;
import com.sun.tools.xjc.ErrorReceiver;
import com.sun.tools.xjc.reader.Const;
import com.sun.tools.xjc.reader.xmlschema.parser.SchemaConstraintChecker;
import com.sun.tools.xjc.util.ErrorReceiverFilter;
import com.sun.xml.bind.marshaller.DataWriter;
import com.sun.xml.xsom.parser.JAXPParser;
import com.sun.xml.xsom.parser.XMLParser;

import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.xml.sax.ContentHandler;
import org.xml.sax.EntityResolver;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.XMLFilterImpl;


/**
 * Builds a DOM forest and maintains association from
 * system IDs to DOM trees.
 * 
 * 

* A forest is a transitive reflexive closure of referenced documents. * IOW, if a document is in a forest, all the documents referenced from * it is in a forest, too. To support this semantics, {@link DOMForest} * uses {@link InternalizationLogic} to find referenced documents. * *

* Some documents are marked as "root"s, meaning those documents were * put into a forest explicitly, not because it is referenced from another * document. (However, a root document can be referenced from other * documents, too.) * * @author * Kohsuke Kawaguchi ([email protected]) */ public final class DOMForest { /** actual data storage map<SystemId,Document>. */ private final Map core = new HashMap(); /** * To correctly feed documents to a schema parser, we need to remember * which documents (of the forest) were given as the root * documents, and which of them are read as included/imported * documents. * *

* Set of system ids as strings. */ private final Set rootDocuments = new HashSet(); /** Stores location information for all the trees in this forest. */ public final LocatorTable locatorTable = new LocatorTable(); /** Stores all the outer-most <jaxb:bindings> customizations. */ public final Set outerMostBindings = new HashSet(); /** Used to resolve references to other schema documents. */ private EntityResolver entityResolver = null; /** Errors encountered during the parsing will be sent to this object. */ private ErrorReceiver errorReceiver = null; /** Schema language dependent part of the processing. */ protected final InternalizationLogic logic; private final SAXParserFactory parserFactory; private final DocumentBuilder documentBuilder; public DOMForest( SAXParserFactory parserFactory, DocumentBuilder documentBuilder, InternalizationLogic logic ) { this.parserFactory = parserFactory; this.documentBuilder = documentBuilder; this.logic = logic; } public DOMForest( InternalizationLogic logic ) { try { DocumentBuilderFactory dbf = DocumentBuilderFactory.newInstance(); dbf.setNamespaceAware(true); this.documentBuilder = dbf.newDocumentBuilder(); this.parserFactory = SAXParserFactory.newInstance(); this.parserFactory.setNamespaceAware(true); } catch( ParserConfigurationException e ) { throw new AssertionError(e); } this.logic = logic; } /** * Gets the DOM tree associated with the specified system ID, * or null if none is found. */ public Document get( String systemId ) { Document doc = core.get(systemId); if( doc==null && systemId.startsWith("file:/") && !systemId.startsWith("file://") ) { // As of JDK1.4, java.net.URL.toExternal method returns URLs like // "file:/abc/def/ghi" which is an incorrect file protocol URL according to RFC1738. // Some other correctly functioning parts return the correct URLs ("file:///abc/def/ghi"), // and this descripancy breaks DOM look up by system ID. // this extra check solves this problem. doc = core.get( "file://"+systemId.substring(5) ); } if( doc==null && systemId.startsWith("file:") ) { // on Windows, filenames are case insensitive. // perform case-insensitive search for improved user experience String systemPath = getPath(systemId); for (String key : core.keySet()) { if(key.startsWith("file:") && getPath(key).equalsIgnoreCase(systemPath)) { doc = core.get(key); break; } } } return doc; } /** * Strips off the leading 'file:///' portion from an URL. */ private String getPath(String key) { key = key.substring(5); // skip 'file:' while(key.length()>0 && key.charAt(0)=='/') key = key.substring(1); return key; } /** * Returns a read-only set of root document system IDs. */ public Set getRootDocuments() { return Collections.unmodifiableSet(rootDocuments); } /** * Picks one document at random and returns it. */ public Document getOneDocument() { for (Document dom : core.values()) { if (!dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) return dom; } // we should have caught this error very early on throw new AssertionError(); } /** * Checks the correctness of the XML Schema documents and return true * if it's OK. * *

* This method performs a weaker version of the tests where error messages * are provided without line number information. So whenever possible * use {@link SchemaConstraintChecker}. * * @see SchemaConstraintChecker */ public boolean checkSchemaCorrectness(ErrorReceiver errorHandler) { try { SchemaFactory sf = SchemaFactory.newInstance(W3C_XML_SCHEMA_NS_URI); ErrorReceiverFilter filter = new ErrorReceiverFilter(errorHandler); sf.setErrorHandler(filter); Set roots = getRootDocuments(); Source[] sources = new Source[roots.size()]; int i=0; for (String root : roots) { sources[i++] = new DOMSource(get(root),root); } sf.newSchema(sources); return !filter.hadError(); } catch (SAXException e) { // the errors should have been reported return false; } } /** * Gets the system ID from which the given DOM is parsed. *

* Poor-man's base URI. */ public String getSystemId( Document dom ) { for (Map.Entry e : core.entrySet()) { if (e.getValue() == dom) return e.getKey(); } return null; } public Document parse( InputSource source, boolean root ) throws SAXException { if( source.getSystemId()==null ) throw new IllegalArgumentException(); return parse( source.getSystemId(), source, root ); } /** * Parses an XML at the given location ( * and XMLs referenced by it) into DOM trees * and stores them to this forest. * * @return the parsed DOM document object. */ public Document parse( String systemId, boolean root ) throws SAXException, IOException { systemId = normalizeSystemId(systemId); if( core.containsKey(systemId) ) // this document has already been parsed. Just ignore. return core.get(systemId); InputSource is=null; // allow entity resolver to find the actual byte stream. if( entityResolver!=null ) is = entityResolver.resolveEntity(null,systemId); if( is==null ) is = new InputSource(systemId); // but we still use the original system Id as the key. return parse( systemId, is, root ); } /** * Returns a {@link ContentHandler} to feed SAX events into. * *

* The client of this class can feed SAX events into the handler * to parse a document into this DOM forest. * * This version requires that the DOM object to be created and registered * to the map beforehand. */ private ContentHandler getParserHandler( Document dom ) { ContentHandler handler = new DOMBuilder(dom,locatorTable,outerMostBindings); handler = new WhitespaceStripper(handler,errorReceiver,entityResolver); handler = new VersionChecker(handler,errorReceiver,entityResolver); // insert the reference finder so that // included/imported schemas will be also parsed XMLFilterImpl f = logic.createExternalReferenceFinder(this); f.setContentHandler(handler); if(errorReceiver!=null) f.setErrorHandler(errorReceiver); if(entityResolver!=null) f.setEntityResolver(entityResolver); return f; } public interface Handler extends ContentHandler { /** * Gets the DOM that was built. */ public Document getDocument(); } private static abstract class HandlerImpl extends XMLFilterImpl implements Handler { } /** * Returns a {@link ContentHandler} to feed SAX events into. * *

* The client of this class can feed SAX events into the handler * to parse a document into this DOM forest. */ public Handler getParserHandler( String systemId, boolean root ) { final Document dom = documentBuilder.newDocument(); core.put( systemId, dom ); if(root) rootDocuments.add(systemId); ContentHandler handler = getParserHandler(dom); // we will register the DOM to the map once the system ID becomes available. // but the SAX allows the event source to not to provide that information, // so be prepared for such case. HandlerImpl x = new HandlerImpl() { public Document getDocument() { return dom; } }; x.setContentHandler(handler); return x; } /** * Parses the given document and add it to the DOM forest. * * @return * null if there was a parse error. otherwise non-null. */ public Document parse( String systemId, InputSource inputSource, boolean root ) throws SAXException { Document dom = documentBuilder.newDocument(); systemId = normalizeSystemId(systemId); // put into the map before growing a tree, to // prevent recursive reference from causing infinite loop. core.put( systemId, dom ); if(root) rootDocuments.add(systemId); try { XMLReader reader = parserFactory.newSAXParser().getXMLReader(); reader.setContentHandler(getParserHandler(dom)); if(errorReceiver!=null) reader.setErrorHandler(errorReceiver); if(entityResolver!=null) reader.setEntityResolver(entityResolver); reader.parse(inputSource); } catch( ParserConfigurationException e ) { // in practice, this exception won't happen. errorReceiver.error(e.getMessage(),e); core.remove(systemId); rootDocuments.remove(systemId); return null; } catch( IOException e ) { errorReceiver.error(Messages.format(Messages.DOMFOREST_INPUTSOURCE_IOEXCEPTION, systemId, e.toString()),e); core.remove(systemId); rootDocuments.remove(systemId); return null; } return dom; } private String normalizeSystemId(String systemId) { try { systemId = new URI(systemId).normalize().toString(); } catch (URISyntaxException e) { // leave the system ID untouched. In my experience URI is often too strict } return systemId; } public Document parse( String systemId, XMLStreamReader parser, boolean root ) throws XMLStreamException { Document dom = documentBuilder.newDocument(); systemId = normalizeSystemId(systemId); if(root) rootDocuments.add(systemId); if(systemId==null) throw new IllegalArgumentException("system id cannot be null"); core.put( systemId, dom ); new XMLStreamReaderToContentHandler(parser,getParserHandler(dom),false,false).bridge(); return dom; } /** * Performs internalization. * * This method should be called only once, only after all the * schemas are parsed. * * @return * the returned bindings need to be applied after schema * components are built. */ public SCDBasedBindingSet transform(boolean enableSCD) { return Internalizer.transform(this,enableSCD); } /** * Performs the schema correctness check by using JAXP 1.3. * *

* This is "weak", because {@link SchemaFactory#newSchema(Source[])} * doesn't handle inclusions very correctly (it ends up parsing it * from its original source, not in this tree), and because * it doesn't handle two documents for the same namespace very * well. * *

* We should eventually fix JAXP (and Xerces), but meanwhile * this weaker and potentially wrong correctness check is still * better than nothing when used inside JAX-WS (JAXB CLI and Ant * does a better job of checking this.) * *

* To receive errors, use {@link SchemaFactory#setErrorHandler(ErrorHandler)}. */ public void weakSchemaCorrectnessCheck(SchemaFactory sf) { List sources = new ArrayList(); for( String systemId : getRootDocuments() ) { Document dom = get(systemId); if (dom.getDocumentElement().getNamespaceURI().equals(Const.JAXB_NSURI)) continue; // this isn't a schema. we have to do a negative check because if we see completely unrelated ns, we want to report that as an error SAXSource ss = createSAXSource(systemId); try { ss.getXMLReader().setFeature("http://xml.org/sax/features/namespace-prefixes",true); } catch (SAXException e) { throw new AssertionError(e); // Xerces wants this. See 6395322. } sources.add(ss); } try { sf.newSchema(sources.toArray(new SAXSource[0])); } catch (SAXException e) { // error should have been reported. } catch (RuntimeException e) { // JAXP RI isn't very trustworthy when it comes to schema error check, // and we know some cases where it just dies with NPE. So handle it gracefully. // this masks a bug in the JAXP RI, but we need a release that we have to make. try { sf.getErrorHandler().warning( new SAXParseException(Messages.format( Messages.ERR_GENERAL_SCHEMA_CORRECTNESS_ERROR,e.getMessage()), null,null,-1,-1,e)); } catch (SAXException _) { // ignore } } } /** * Creates a {@link SAXSource} that, when parsed, reads from this {@link DOMForest} * (instead of parsing the original source identified by the system ID.) */ public @NotNull SAXSource createSAXSource(String systemId) { ContentHandlerNamespacePrefixAdapter reader = new ContentHandlerNamespacePrefixAdapter(new XMLFilterImpl() { // XMLReader that uses XMLParser to parse. We need to use XMLFilter to indrect // handlers, since SAX allows handlers to be changed while parsing. public void parse(InputSource input) throws SAXException, IOException { createParser().parse(input, this, this, this); } public void parse(String systemId) throws SAXException, IOException { parse(new InputSource(systemId)); } }); return new SAXSource(reader,new InputSource(systemId)); } /** * Creates {@link XMLParser} for XSOM which reads documents from * this DOMForest rather than doing a fresh parse. * * The net effect is that XSOM will read transformed XML Schemas * instead of the original documents. */ public XMLParser createParser() { return new DOMForestParser(this,new JAXPParser()); } public EntityResolver getEntityResolver() { return entityResolver; } public void setEntityResolver(EntityResolver entityResolver) { this.entityResolver = entityResolver; } public ErrorReceiver getErrorHandler() { return errorReceiver; } public void setErrorHandler(ErrorReceiver errorHandler) { this.errorReceiver = errorHandler; } /** * Gets all the parsed documents. */ public Document[] listDocuments() { return core.values().toArray(new Document[core.size()]); } /** * Gets all the system IDs of the documents. */ public String[] listSystemIDs() { return core.keySet().toArray(new String[core.keySet().size()]); } /** * Dumps the contents of the forest to the specified stream. * * This is a debug method. As such, error handling is sloppy. */ public void dump( OutputStream out ) throws IOException { try { // create identity transformer Transformer it = TransformerFactory.newInstance().newTransformer(); for (Map.Entry e : core.entrySet()) { out.write( ("---<< "+e.getKey()+'\n').getBytes() ); DataWriter dw = new DataWriter(new OutputStreamWriter(out),null); dw.setIndentStep(" "); it.transform( new DOMSource(e.getValue()), new SAXResult(dw)); out.write( "\n\n\n".getBytes() ); } } catch( TransformerException e ) { e.printStackTrace(); } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy