All Downloads are FREE. Search and download functionalities are using the official Maven repository.

jdk8u.jaxp.org.apache.xerces.external.impl.XML11DocumentScannerImpl Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 2016, Oracle and/or its affiliates. All rights reserved.
 */
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package jdk8u.jaxp.org.apache.xerces.external.impl;

import jdk8u.jaxp.org.apache.xerces.external.impl.msg.XMLMessageFormatter;
import jdk8u.jaxp.org.apache.xerces.external.util.XML11Char;
import jdk8u.jaxp.org.apache.xerces.external.util.XMLChar;
import jdk8u.jaxp.org.apache.xerces.external.util.XMLStringBuffer;
import jdk8u.jaxp.org.apache.xerces.external.xni.XMLString;
import jdk8u.jaxp.org.apache.xerces.external.xni.XNIException;
import java.io.IOException;

/**
 * This class is responsible for scanning XML document structure
 * and content. The scanner acts as the source for the document
 * information which is communicated to the document handler.
 * 

* This component requires the following features and properties from the * component manager that uses it: *

    *
  • http://xml.org/sax/features/namespaces
  • *
  • http://xml.org/sax/features/validation
  • *
  • http://apache.org/xml/features/nonvalidating/load-external-dtd
  • *
  • http://apache.org/xml/features/scanner/notify-char-refs
  • *
  • http://apache.org/xml/features/scanner/notify-builtin-refs
  • *
  • http://apache.org/xml/properties/external/symbol-table
  • *
  • http://apache.org/xml/properties/external/error-reporter
  • *
  • http://apache.org/xml/properties/external/entity-manager
  • *
  • http://apache.org/xml/properties/external/dtd-scanner
  • *
* * @xerces.internal * * @author Glenn Marcy, IBM * @author Andy Clark, IBM * @author Arnaud Le Hors, IBM * @author Eric Ye, IBM * * @version $Id: XML11DocumentScannerImpl.java,v 1.5 2010/08/04 20:59:09 joehw Exp $ */ public class XML11DocumentScannerImpl extends XMLDocumentScannerImpl { /** String buffer. */ private final XMLStringBuffer fStringBuffer = new XMLStringBuffer(); private final XMLStringBuffer fStringBuffer2 = new XMLStringBuffer(); private final XMLStringBuffer fStringBuffer3 = new XMLStringBuffer(); // // Constructors // /** Default constructor. */ public XML11DocumentScannerImpl() {super();} // () // // overridden methods // // XMLDocumentFragmentImpl methods /** * Scans element content. * * @return Returns the next character on the stream. */ protected int scanContent(XMLStringBuffer content) throws IOException, XNIException { fTempString.length = 0; int c = fEntityScanner.scanContent(fTempString); content.append(fTempString); if (c == '\r' || c == 0x85 || c == 0x2028) { // happens when there is the character reference // but scanContent doesn't do entity expansions... // is this *really* necessary??? - NG fEntityScanner.scanChar(null); content.append((char)c); c = -1; } /*if (fDocumentHandler != null && content.length > 0) { fDocumentHandler.characters(content, null); } */ if (c == ']') { content.append((char)fEntityScanner.scanChar(null)); // remember where we are in case we get an endEntity before we // could flush the buffer out - this happens when we're parsing an // entity which ends with a ] fInScanContent = true; // // We work on a single character basis to handle cases such as: // ']]]>' which we might otherwise miss. // if (fEntityScanner.skipChar(']', null)) { content.append(']'); while (fEntityScanner.skipChar(']', null)) { content.append(']'); } if (fEntityScanner.skipChar('>', null)) { reportFatalError("CDEndInContent", null); } } /*if (fDocumentHandler != null && fStringBuffer.length != 0) { fDocumentHandler.characters(fStringBuffer, null); }*/ fInScanContent = false; c = -1; } return c; } // scanContent():int /** * Scans an attribute value and normalizes whitespace converting all * whitespace characters to space characters. * * [10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" * * @param value The XMLString to fill in with the value. * @param nonNormalizedValue The XMLString to fill in with the * non-normalized value. * @param atName The name of the attribute being parsed (for error msgs). * @param checkEntities true if undeclared entities should be reported as VC violation, * false if undeclared entities should be reported as WFC violation. * @param eleName The name of element to which this attribute belongs. * @param isNSURI The flag indicating whether the content is a namespace URI * * @return true if the non-normalized and normalized value are the same * * Note: This method uses fStringBuffer2, anything in it * at the time of calling is lost. **/ protected boolean scanAttributeValue(XMLString value, XMLString nonNormalizedValue, String atName, boolean checkEntities,String eleName, boolean isNSURI) throws IOException, XNIException { // quote int quote = fEntityScanner.peekChar(); if (quote != '\'' && quote != '"') { reportFatalError("OpenQuoteExpected", new Object[]{eleName,atName}); } fEntityScanner.scanChar(NameType.ATTRIBUTE); int entityDepth = fEntityDepth; int c = fEntityScanner.scanLiteral(quote, value, isNSURI); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** scanLiteral -> \"" + value.toString() + "\""); } int fromIndex = 0; if (c == quote && (fromIndex = isUnchangedByNormalization(value)) == -1) { /** Both the non-normalized and normalized attribute values are equal. **/ nonNormalizedValue.setValues(value); int cquote = fEntityScanner.scanChar(NameType.ATTRIBUTE); if (cquote != quote) { reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); } return true; } fStringBuffer2.clear(); fStringBuffer2.append(value); normalizeWhitespace(value, fromIndex); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** normalizeWhitespace -> \"" + value.toString() + "\""); } if (c != quote) { fScanningAttribute = true; fStringBuffer.clear(); do { fStringBuffer.append(value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value2: \"" + fStringBuffer.toString() + "\""); } if (c == '&') { fEntityScanner.skipChar('&', NameType.REFERENCE); if (entityDepth == fEntityDepth) { fStringBuffer2.append('&'); } if (fEntityScanner.skipChar('#', NameType.REFERENCE)) { if (entityDepth == fEntityDepth) { fStringBuffer2.append('#'); } int ch = scanCharReferenceValue(fStringBuffer, fStringBuffer2); if (ch != -1) { if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** value3: \"" + fStringBuffer.toString() + "\""); } } } else { String entityName = fEntityScanner.scanName(NameType.REFERENCE); if (entityName == null) { reportFatalError("NameRequiredInReference", null); } else if (entityDepth == fEntityDepth) { fStringBuffer2.append(entityName); } if (!fEntityScanner.skipChar(';', NameType.REFERENCE)) { reportFatalError("SemicolonRequiredInReference", new Object []{entityName}); } else if (entityDepth == fEntityDepth) { fStringBuffer2.append(';'); } if (resolveCharacter(entityName, fStringBuffer)) { checkEntityLimit(false, fEntityScanner.fCurrentEntity.name, 1); } else { if (fEntityManager.isExternalEntity(entityName)) { reportFatalError("ReferenceToExternalEntity", new Object[] { entityName }); } else { if (!fEntityManager.isDeclaredEntity(entityName)) { //WFC & VC: Entity Declared if (checkEntities) { if (fValidation) { fErrorReporter.reportError(XMLMessageFormatter.XML_DOMAIN, "EntityNotDeclared", new Object[]{entityName}, XMLErrorReporter.SEVERITY_ERROR); } } else { reportFatalError("EntityNotDeclared", new Object[]{entityName}); } } fEntityManager.startEntity(true, entityName, true); } } } } else if (c == '<') { reportFatalError("LessthanInAttValue", new Object[] { eleName, atName }); fEntityScanner.scanChar(null); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } } else if (c == '%' || c == ']') { fEntityScanner.scanChar(null); fStringBuffer.append((char)c); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueF: \"" + fStringBuffer.toString() + "\""); } } // note that none of these characters should ever get through // XML11EntityScanner. Not sure why // this check was originally necessary. - NG else if (c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { fEntityScanner.scanChar(null); fStringBuffer.append(' '); if (entityDepth == fEntityDepth) { fStringBuffer2.append('\n'); } } else if (c != -1 && XMLChar.isHighSurrogate(c)) { fStringBuffer3.clear(); if (scanSurrogates(fStringBuffer3)) { fStringBuffer.append(fStringBuffer3); if (entityDepth == fEntityDepth) { fStringBuffer2.append(fStringBuffer3); } if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueI: \"" + fStringBuffer.toString() + "\""); } } } else if (c != -1 && isInvalidLiteral(c)) { reportFatalError("InvalidCharInAttValue", new Object[] {eleName, atName, Integer.toString(c, 16)}); fEntityScanner.scanChar(null); if (entityDepth == fEntityDepth) { fStringBuffer2.append((char)c); } } c = fEntityScanner.scanLiteral(quote, value, isNSURI); if (entityDepth == fEntityDepth) { fStringBuffer2.append(value); } normalizeWhitespace(value); } while (c != quote || entityDepth != fEntityDepth); fStringBuffer.append(value); if (DEBUG_ATTR_NORMALIZATION) { System.out.println("** valueN: \"" + fStringBuffer.toString() + "\""); } value.setValues(fStringBuffer); fScanningAttribute = false; } nonNormalizedValue.setValues(fStringBuffer2); // quote int cquote = fEntityScanner.scanChar(null); if (cquote != quote) { reportFatalError("CloseQuoteExpected", new Object[]{eleName,atName}); } return nonNormalizedValue.equals(value.ch, value.offset, value.length); } // scanAttributeValue() // // XMLScanner methods // // NOTE: this is a carbon copy of the code in XML11DTDScannerImpl; // we need to override these methods in both places. // this needs to be refactored!!! - NG /** * Scans public ID literal. * * [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'" * [13] PubidChar::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] * * The returned string is normalized according to the following rule, * from http://www.w3.org/TR/REC-xml#dt-pubid: * * Before a match is attempted, all strings of white space in the public * identifier must be normalized to single space characters (#x20), and * leading and trailing white space must be removed. * * @param literal The string to fill in with the public ID literal. * @return True on success. * * Note: This method uses fStringBuffer, anything in it at * the time of calling is lost. */ protected boolean scanPubidLiteral(XMLString literal) throws IOException, XNIException { int quote = fEntityScanner.scanChar(null); if (quote != '\'' && quote != '"') { reportFatalError("QuoteRequiredInPublicID", null); return false; } fStringBuffer.clear(); // skip leading whitespace boolean skipSpace = true; boolean dataok = true; while (true) { int c = fEntityScanner.scanChar(null); // REVISIT: none of these except \n and 0x20 should make it past the entity scanner if (c == ' ' || c == '\n' || c == '\r' || c == 0x85 || c == 0x2028) { if (!skipSpace) { // take the first whitespace as a space and skip the others fStringBuffer.append(' '); skipSpace = true; } } else if (c == quote) { if (skipSpace) { // if we finished on a space let's trim it fStringBuffer.length--; } literal.setValues(fStringBuffer); break; } else if (XMLChar.isPubid(c)) { fStringBuffer.append((char)c); skipSpace = false; } else if (c == -1) { reportFatalError("PublicIDUnterminated", null); return false; } else { dataok = false; reportFatalError("InvalidCharInPublicID", new Object[]{Integer.toHexString(c)}); } } return dataok; } /** * Normalize whitespace in an XMLString converting all whitespace * characters to space characters. */ protected void normalizeWhitespace(XMLString value) { int end = value.offset + value.length; for (int i = value.offset; i < end; ++i) { int c = value.ch[i]; if (XMLChar.isSpace(c)) { value.ch[i] = ' '; } } } /** * Normalize whitespace in an XMLString converting all whitespace * characters to space characters. */ protected void normalizeWhitespace(XMLString value, int fromIndex) { int end = value.offset + value.length; for (int i = value.offset + fromIndex; i < end; ++i) { int c = value.ch[i]; if (XMLChar.isSpace(c)) { value.ch[i] = ' '; } } } /** * Checks whether this string would be unchanged by normalization. * * @return -1 if the value would be unchanged by normalization, * otherwise the index of the first whitespace character which * would be transformed. */ protected int isUnchangedByNormalization(XMLString value) { int end = value.offset + value.length; for (int i = value.offset; i < end; ++i) { int c = value.ch[i]; if (XMLChar.isSpace(c)) { return i - value.offset; } } return -1; } // returns true if the given character is not // valid with respect to the version of // XML understood by this scanner. protected boolean isInvalid(int value) { return (XML11Char.isXML11Invalid(value)); } // isInvalid(int): boolean // returns true if the given character is not // valid or may not be used outside a character reference // with respect to the version of XML understood by this scanner. protected boolean isInvalidLiteral(int value) { return (!XML11Char.isXML11ValidLiteral(value)); } // isInvalidLiteral(int): boolean // returns true if the given character is // a valid nameChar with respect to the version of // XML understood by this scanner. protected boolean isValidNameChar(int value) { return (XML11Char.isXML11Name(value)); } // isValidNameChar(int): boolean // returns true if the given character is // a valid nameStartChar with respect to the version of // XML understood by this scanner. protected boolean isValidNameStartChar(int value) { return (XML11Char.isXML11NameStart(value)); } // isValidNameStartChar(int): boolean // returns true if the given character is // a valid NCName character with respect to the version of // XML understood by this scanner. protected boolean isValidNCName(int value) { return (XML11Char.isXML11NCName(value)); } // isValidNCName(int): boolean // returns true if the given character is // a valid high surrogate for a nameStartChar // with respect to the version of XML understood // by this scanner. protected boolean isValidNameStartHighSurrogate(int value) { return XML11Char.isXML11NameHighSurrogate(value); } // isValidNameStartHighSurrogate(int): boolean protected boolean versionSupported(String version) { return (version.equals("1.1") || version.equals("1.0")); } // versionSupported(String): boolean // returns the error message key for unsupported // versions of XML with respect to the version of // XML understood by this scanner. protected String getVersionNotSupportedKey () { return "VersionNotSupported11"; } // getVersionNotSupportedKey: String } // class XML11DocumentScannerImpl




© 2015 - 2024 Weber Informatics LLC | Privacy Policy