org.netbeans.modules.html.validation.NbValidationTransaction Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.netbeans.modules.html.validation;
import com.thaiopensource.util.PropertyMap;
import com.thaiopensource.util.PropertyMapBuilder;
import com.thaiopensource.validate.*;
import com.thaiopensource.validate.prop.rng.RngProperty;
import com.thaiopensource.xml.sax.XMLReaderCreator;
import java.io.*;
import java.util.*;
import java.util.logging.Handler;
import java.util.logging.Level;
import java.util.logging.LogRecord;
import java.util.logging.Logger;
import java.util.regex.Pattern;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import nu.validator.checker.jing.CheckerSchema;
import nu.validator.htmlparser.common.*;
import nu.validator.htmlparser.sax.HtmlParser;
import nu.validator.io.DataUri;
import nu.validator.messages.MessageEmitterAdapter;
import nu.validator.messages.TooManyErrorsException;
import nu.validator.servlet.ParserMode;
import nu.validator.source.SourceCode;
import nu.validator.messages.ValidationTransaction;
import nu.validator.messages.BufferingRootNamespaceSniffer;
import nu.validator.messages.RootNamespaceSniffer;
import nu.validator.localentities.LocalCacheEntityResolver;
import nu.validator.spec.html5.Html5SpecBuilder;
import nu.validator.xml.*;
import nu.validator.xml.dataattributes.DataAttributeDroppingSchemaWrapper;
import nu.validator.xml.langattributes.XmlLangAttributeDroppingSchemaWrapper;
import org.netbeans.api.progress.ProgressHandle;
import org.netbeans.api.progress.ProgressHandleFactory;
import org.netbeans.modules.html.editor.lib.api.HtmlVersion;
import org.netbeans.modules.html.editor.lib.api.ProblemDescription;
import org.openide.util.NbBundle;
import org.xml.sax.*;
import org.xml.sax.ext.LexicalHandler;
public class NbValidationTransaction extends ValidationTransaction {
private static final Logger LOGGER = Logger.getLogger(NbValidationTransaction.class.getCanonicalName());
public static void enableDebug() {
LOGGER.setLevel(Level.FINE);
LOGGER.addHandler(new Handler() {
@Override
public void publish(LogRecord record) {
System.out.println(record.getMessage());
}
@Override
public void flush() {
}
@Override
public void close() throws SecurityException {
}
});
}
private static final Pattern SPACE = Pattern.compile("\\s+");
private static boolean INITIALIZED = false;
private static String INTERNAL_ERROR_MSG_SEE_LOG = NbBundle.getMessage(NbValidationTransaction.class, "MSG_Unexpected_Validator_Error_See_IDE_Log"); //NOI18N
private static String INTERNAL_ERROR_MSG = NbBundle.getMessage(NbValidationTransaction.class, "MSG_Unexpected_Validator_Error"); //NOI18N
protected String document = null;
ParserMode parser = ParserMode.AUTO;
private boolean laxType = false;
protected final AttributesImpl attrs = new AttributesImpl();
private String schemaUrls = null;
protected SAXParser xmlParser = null;
private CharacterHandlerReader sourceReader;
protected TypedInputSource documentInput;
protected DataUriEntityResolver dataRes;
protected ContentTypeParser contentTypeParser;
private boolean checkNormalization = false;
private SourceCode sourceCode = new SourceCode();
private boolean showSource;
private BaseUriTracker baseUriTracker = null;
private String charsetOverride = null;
private Set filteredNamespaces = new LinkedHashSet(); // linked
private Reader codeToValidate;
private long validationTime;
private ProblemsHandler problemsHandler = new ProblemsHandler();
private LinesMapper linesMapper = new LinesMapper();
private HtmlVersion version;
private String encoding;
public static synchronized NbValidationTransaction create(HtmlVersion version) {
return new NbValidationTransaction(version);
}
private static void initializeLocalEntities_HACK() {
//some of the validator's resources are read directly by URLConnection-s
//using no entity resolver. The URLs are first checked in System properties
//and if there's no property value defined the default network URL (http://...)
//is used. This causes the support not working offline and if online
//makes the initialization really slow.
//hacked by loading the resources from the internall files cache via
//returned internall URLs.
//IMO should be fixed in validator.nu by using the local cache entity resolver.
//MessageEmitterAdapter:
// URL url = LocalCacheEntityResolver.getResource("http://wiki.whatwg.org/wiki/MicrosyntaxDescriptions");
// System.setProperty("nu.validator.spec.microsyntax-descriptions", url.toExternalForm());
//
// url = LocalCacheEntityResolver.getResource("http://wiki.whatwg.org/wiki/Validator.nu_alt_advice");
// System.setProperty("nu.validator.spec.alt-advice", url.toExternalForm());
// //CharsetData:
// url = LocalCacheEntityResolver.getResource("http://www.iana.org/assignments/character-sets");
// System.setProperty("org.whattf.datatype.charset-registry", url.toExternalForm());
//
// //LanguageData:
// url = LocalCacheEntityResolver.getResource("http://www.iana.org/assignments/language-subtag-registry");
// System.setProperty("org.whattf.datatype.lang-registry", url.toExternalForm());
}
private static synchronized void initialize() {
if (INITIALIZED) {
return;
}
ProgressHandle progress = ProgressHandleFactory.createHandle(NbBundle.getMessage(NbValidationTransaction.class, "MSG_InitHTMLValidation")); //NOI18N
progress.start();
progress.switchToIndeterminate();
initializeLocalEntities_HACK();
try {
LOGGER.fine("Starting initialization.");
BufferedReader r = new BufferedReader(new InputStreamReader(LocalCacheEntityResolver.getPresetsAsStream(), "UTF-8"));
String line;
List doctypes = new LinkedList();
List namespaces = new LinkedList();
List labels = new LinkedList();
List urls = new LinkedList();
LOGGER.fine("Starting to loop over config file lines.");
while ((line = r.readLine()) != null) {
if ("".equals(line.trim())) {
break;
}
String s[] = line.split("\t");
doctypes.add(s[0]);
namespaces.add(s[1]);
labels.add(s[2]);
urls.add(s[3]);
}
// progress.start(10 * (urls.size() + 50) /* reading the html spec */);
// progress.progress(NbBundle.getMessage(ValidationTransaction.class, "MSG_LoadingSchemaFiles"));
LOGGER.fine("Finished reading config.");
String[] presetDoctypesAsStrings = doctypes.toArray(new String[0]);
presetNamespaces = namespaces.toArray(new String[0]);
presetLabels = labels.toArray(new String[0]);
presetUrls = urls.toArray(new String[0]);
LOGGER.fine("Converted config to arrays.");
for (int i = 0; i < presetNamespaces.length; i++) {
String str = presetNamespaces[i];
if ("-".equals(str)) {
presetNamespaces[i] = null;
} else {
presetNamespaces[i] = presetNamespaces[i].intern();
}
}
LOGGER.fine("Prepared namespace array.");
presetDoctypes = new int[presetDoctypesAsStrings.length];
for (int i = 0; i < presetDoctypesAsStrings.length; i++) {
presetDoctypes[i] = Integer.parseInt(presetDoctypesAsStrings[i]);
}
LOGGER.fine("Parsed doctype numbers into ints.");
// String prefix = System.getProperty("nu.validator.servlet.cachepathprefix");
// log4j.fine("The cache path prefix is: " + prefix);
ErrorHandler eh = new SystemErrErrorHandler();
LocalCacheEntityResolver er = new LocalCacheEntityResolver(new NullEntityResolver());
er.setAllowRnc(true);
PropertyMapBuilder pmb = new PropertyMapBuilder();
pmb.put(ValidateProperty.ERROR_HANDLER, eh);
pmb.put(ValidateProperty.ENTITY_RESOLVER, er);
pmb.put(ValidateProperty.XML_READER_CREATOR,
new XMLReaderCreatorImpl(eh, er));
RngProperty.CHECK_ID_IDREF.add(pmb);
PropertyMap pMap = pmb.toPropertyMap();
LOGGER.fine("Parsing set up. Starting to read schemas.");
SortedMap schemaMap = new TreeMap();
schemaMap.put("http://c.validator.nu/table/",
CheckerSchema.TABLE_CHECKER);
schemaMap.put("http://hsivonen.iki.fi/checkers/table/",
CheckerSchema.TABLE_CHECKER);
schemaMap.put("http://c.validator.nu/nfc/",
CheckerSchema.NORMALIZATION_CHECKER);
schemaMap.put("http://hsivonen.iki.fi/checkers/nfc/",
CheckerSchema.NORMALIZATION_CHECKER);
schemaMap.put("http://c.validator.nu/debug/",
CheckerSchema.DEBUG_CHECKER);
schemaMap.put("http://hsivonen.iki.fi/checkers/debug/",
CheckerSchema.DEBUG_CHECKER);
schemaMap.put("http://c.validator.nu/text-content/",
CheckerSchema.TEXT_CONTENT_CHECKER);
schemaMap.put("http://hsivonen.iki.fi/checkers/text-content/",
CheckerSchema.TEXT_CONTENT_CHECKER);
schemaMap.put("http://c.validator.nu/usemap/",
CheckerSchema.USEMAP_CHECKER);
schemaMap.put("http://n.validator.nu/checkers/usemap/",
CheckerSchema.USEMAP_CHECKER);
schemaMap.put("http://c.validator.nu/unchecked/",
CheckerSchema.UNCHECKED_SUBTREE_WARNER);
schemaMap.put("http://s.validator.nu/html5/assertions.sch",
CheckerSchema.ASSERTION_SCH);
schemaMap.put("http://c.validator.nu/obsolete/",
CheckerSchema.CONFORMING_BUT_OBSOLETE_WARNER);
schemaMap.put("http://c.validator.nu/xml-pi/",
CheckerSchema.XML_PI_CHECKER);
for (int i = 0; i < presetUrls.length; i++) {
String[] urls1 = SPACE.split(presetUrls[i]);
for (int j = 0; j < urls1.length; j++) {
String url = urls1[j];
if (schemaMap.get(url) == null && !isCheckerUrl(url)) {
Schema sch = proxySchemaByUrl(url, er, pMap);
schemaMap.put(url, sch);
// progress.progress(10);
}
}
}
LOGGER.fine("Schemas read.");
preloadedSchemaUrls = new String[schemaMap.size()];
preloadedSchemas = new Schema[schemaMap.size()];
int i = 0;
for (Map.Entry entry : schemaMap.entrySet()) {
preloadedSchemaUrls[i] = entry.getKey().intern();
Schema s = entry.getValue();
String u = entry.getKey();
if (isDataAttributeDroppingSchema(u)) {
s = new DataAttributeDroppingSchemaWrapper(
s);
}
if (isXmlLangAllowingSchema(u)) {
s = new XmlLangAttributeDroppingSchemaWrapper(s);
}
preloadedSchemas[i] = s;
i++;
}
// progress.progress(NbBundle.getMessage(ValidationTransaction.class, "MSG_LoadingHtmlSpecification"));
LOGGER.fine("Reading spec.");
html5spec = Html5SpecBuilder.parseSpec(LocalCacheEntityResolver.getHtml5SpecAsStream());
// progress.progress(50);
LOGGER.fine("Spec read.");
LOGGER.fine("Initialization complete.");
INITIALIZED = true;
} catch (Exception e) {
throw new RuntimeException(e);
} finally {
progress.finish();
}
}
private static boolean isDataAttributeDroppingSchema(String key) {
return ("http://s.validator.nu/xhtml5.rnc".equals(key)
|| "http://s.validator.nu/html5.rnc".equals(key)
|| "http://s.validator.nu/html5-its.rnc".equals(key)
|| "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/html5-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5-microdata-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5-microdata-rdfa.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5-microdata-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5-microdata-rdfa.rnc".equals(key));
}
private static boolean isXmlLangAllowingSchema(String key) {
return ("http://s.validator.nu/xhtml5.rnc".equals(key)
|| "http://s.validator.nu/html5.rnc".equals(key)
|| "http://s.validator.nu/html5-its.rnc".equals(key)
|| "http://s.validator.nu/xhtml5-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/html5-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5-microdata-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-xhtml5-microdata-rdfa.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5-microdata-rdfalite.rnc".equals(key)
|| "http://s.validator.nu/w3c-html5-microdata-rdfa.rnc".equals(key));
}
private static boolean isCheckerUrl(String url) {
if ("http://c.validator.nu/all/".equals(url)
|| "http://hsivonen.iki.fi/checkers/all/".equals(url)) {
return true;
} else if ("http://c.validator.nu/all-html4/".equals(url)
|| "http://hsivonen.iki.fi/checkers/all-html4/".equals(url)) {
return true;
} else if ("http://c.validator.nu/base/".equals(url)) {
return true;
} else if ("http://c.validator.nu/rdfalite/".equals(url)) {
return true;
}
for (int i = 0; i < ALL_CHECKERS.length; i++) {
if (ALL_CHECKERS[i].equals(url)) {
return true;
}
}
return false;
}
public NbValidationTransaction(HtmlVersion version) {
this.version = version;
initialize();
}
public List getFoundProblems() {
return problemsHandler.getProblems();
}
/** return a list of problems with the given severity and higher (more severe issues) */
public List getFoundProblems(int ofThisTypeAndMoreSevere) {
return getFoundProblems(new ProblemDescriptionFilter.SeverityFilter(ofThisTypeAndMoreSevere));
}
public List getFoundProblems(ProblemDescriptionFilter filter) {
List filtered = new ArrayList();
for (ProblemDescription pd : getFoundProblems()) {
if (filter.accepts(pd)) {
filtered.add(pd);
}
}
return filtered;
}
public long getValidationTime() {
return validationTime;
}
public void validateCode(Reader code, String sourceURI, Set filteredNamespaces, String encoding) throws SAXException {
long from = System.currentTimeMillis();
codeToValidate = code;
document = sourceURI; //represents an URI where the document can be loaded
parser = htmlVersion2ParserMode(version);
LOGGER.fine(String.format("Using %s parser.", parser.name()));
// charsetOverride = "UTF-8";
this.encoding = encoding;
this.filteredNamespaces = filteredNamespaces;
if (!filteredNamespaces.isEmpty()) {
StringBuilder fns = new StringBuilder();
for (String ns : filteredNamespaces) {
fns.append(ns).append(", ");
}
LOGGER.fine(String.format("Filtering following namespaces: %s", fns));
}
int lineOffset = 0;
errorHandler = new MessageEmitterAdapter(sourceCode,
showSource, null, lineOffset, false,
new NbMessageEmitter(problemsHandler, linesMapper, true));
errorHandler.setLoggingOk(true);
errorHandler.setErrorsOnly(false);
validate();
validationTime = System.currentTimeMillis() - from;
}
public boolean isSuccess() {
return getFoundProblems(ProblemDescription.WARNING).isEmpty();
}
private ParserMode htmlVersion2ParserMode(HtmlVersion version) {
if (version.isXhtml()) {
return ParserMode.XML_NO_EXTERNAL_ENTITIES; //we do not use the parser for validation, no need to load external entities
} else {
switch (version) {
case HTML41_STRICT:
return ParserMode.HTML401_STRICT;
case HTML41_TRANSATIONAL:
return ParserMode.HTML401_TRANSITIONAL;
case HTML41_FRAMESET:
return ParserMode.AUTO; //???
case HTML5:
return ParserMode.HTML;
default:
return ParserMode.AUTO;
}
}
}
private boolean isHtmlUnsafePreset() {
if ("".equals(schemaUrls)) {
return false;
}
boolean preset = false;
for (int i = 0; i < presetUrls.length; i++) {
if (presetUrls[i].equals(schemaUrls)) {
preset = true;
break;
}
}
if (!preset) {
return false;
}
return !(schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-basic.rnc")
|| schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-strict.rnc")
|| schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-transitional.rnc")
|| schemaUrls.startsWith("http://s.validator.nu/xhtml10/xhtml-frameset.rnc")
|| schemaUrls.startsWith("http://s.validator.nu/html5/html5full.rnc")
|| schemaUrls.startsWith("http://s.validator.nu/html5/html5full-aria.rnc") || schemaUrls.startsWith("http://s.validator.nu/html5-aria-svg-mathml.rnc"));
}
@SuppressWarnings("deprecation")
void validate() throws SAXException {
// httpRes = new PrudentHttpEntityResolver(SIZE_LIMIT, laxType,
// errorHandler);
// dataRes = new DataUriEntityResolver(httpRes, laxType, errorHandler);
// contentTypeParser = new ContentTypeParser(errorHandler, laxType);
// entityResolver = new LocalCacheEntityResolver(dataRes);
entityResolver = new LocalCacheEntityResolver(new NullEntityResolver());
setAllowRnc(true);
try {
this.errorHandler.start(document);
PropertyMapBuilder pmb = new PropertyMapBuilder();
pmb.put(ValidateProperty.ERROR_HANDLER, errorHandler);
pmb.put(ValidateProperty.ENTITY_RESOLVER, entityResolver);
pmb.put(ValidateProperty.XML_READER_CREATOR,
new XMLReaderCreatorImpl(errorHandler,
entityResolver));
pmb.put(ValidateProperty.SCHEMA_RESOLVER, this);
RngProperty.CHECK_ID_IDREF.add(pmb);
jingPropertyMap = pmb.toPropertyMap();
// tryToSetupValidator();
setAllowRnc(false);
loadDocAndSetupParser();
if (htmlParser != null) {
setErrorProfile();
}
reader.setErrorHandler(errorHandler);
contentType = documentInput.getType();
sourceCode.initialize(documentInput);
WiretapXMLReaderWrapper wiretap = new WiretapXMLReaderWrapper(
reader);
boolean isXhtml = parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION
|| parser == ParserMode.XML_NO_EXTERNAL_ENTITIES;
ContentHandler recorder = isXhtml
? new XercesInaccurateLocatorWorkaround(sourceCode.getLocationRecorder(), linesMapper)
: sourceCode.getLocationRecorder();
if (baseUriTracker == null) {
wiretap.setWiretapContentHander(recorder);
} else {
wiretap.setWiretapContentHander(new CombineContentHandler(
recorder, baseUriTracker));
}
wiretap.setWiretapLexicalHandler((LexicalHandler) recorder);
reader = wiretap;
if (htmlParser != null) {
htmlParser.addCharacterHandler(linesMapper);
htmlParser.addCharacterHandler(sourceCode);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setErrorHandler(errorHandler.getExactErrorHandler());
htmlParser.setTreeBuilderErrorHandlerOverride(errorHandler);
errorHandler.setHtml(true);
} else if (xmlParser != null) {
// this must be after wiretap!
if (!filteredNamespaces.isEmpty()) {
reader = new NamespaceDroppingXMLReaderWrapper(reader,
filteredNamespaces);
}
xmlParser.getXMLReader().setErrorHandler(errorHandler.getExactErrorHandler());
sourceReader.addCharacterHandler(linesMapper);
} else {
throw new RuntimeException("Bug. Unreachable.");
}
reader = new AttributesPermutingXMLReaderWrapper(reader); // make
// RNG
// validation
// better
if (charsetOverride != null) {
String charset = documentInput.getEncoding();
if (charset == null) {
errorHandler.warning(new SAXParseException(
"Overriding document character encoding from none to \u201C"
+ charsetOverride + "\u201D.", null));
} else {
errorHandler.warning(new SAXParseException(
"Overriding document character encoding from \u201C"
+ charset + "\u201D to \u201C"
+ charsetOverride + "\u201D.", null));
}
documentInput.setEncoding(charsetOverride);
}
reader.parse(documentInput);
} catch (ParserConfigurationException e) {
LOGGER.log(Level.INFO, getDocumentErrorMsg(), e);
errorHandler.internalError(
e,
INTERNAL_ERROR_MSG_SEE_LOG);
} catch (TooManyErrorsException e) {
LOGGER.log(Level.FINE, getDocumentErrorMsg(), e);
errorHandler.fatalError(e);
} catch (SAXException e) {
LOGGER.log(Level.FINE, getDocumentErrorMsg(), e);
} catch (IOException e) {
LOGGER.log(Level.INFO, getDocumentErrorMsg(), e);
errorHandler.ioError(e);
} catch (IncorrectSchemaException e) {
LOGGER.log(Level.INFO, getDocumentErrorMsg(), e);
errorHandler.schemaError(e);
} catch (RuntimeException e) {
String message = reportRuntimeExceptionOnce(e)
? INTERNAL_ERROR_MSG_SEE_LOG
: INTERNAL_ERROR_MSG;
errorHandler.internalError(e, message);
} catch (Error e) {
LOGGER.log(Level.INFO, getDocumentInternalErrorMsg(), e);
errorHandler.internalError(
e,
INTERNAL_ERROR_MSG_SEE_LOG);
} finally {
errorHandler.end(successMessage(), failureMessage());
}
}
private static final Set REPORTED_RUNTIME_EXCEPTIONS = new HashSet();
/**
* Report REs only once per ide session and use lower log levels for known issues
*
* @return true if the exception has been logged and is visible in the IDE log
*/
private boolean reportRuntimeExceptionOnce(RuntimeException e) {
int hash = document.hashCode();
hash = 21 * hash + e.getClass().hashCode();
if(e.getMessage() != null) {
hash = 21 * hash + e.getMessage().hashCode();
} else {
//no message provided, so use the whole stacktrace hashcode
StringWriter sw = new StringWriter();
PrintWriter pw = new PrintWriter(sw);
e.printStackTrace(pw);
pw.flush();
sw.flush();
hash = 21 * hash + sw.toString().hashCode();
}
Level level = isKnownProblem(e) ? Level.FINE : Level.INFO;
Marker marker = new Marker(hash);
if(REPORTED_RUNTIME_EXCEPTIONS.add(marker)) {
LOGGER.log(level, getDocumentInternalErrorMsg(), e);
}
return LOGGER.isLoggable(level);
}
private static boolean isKnownProblem(RuntimeException e) {
//issue #194939
Class eClass = e.getClass();
if(eClass.equals(StringIndexOutOfBoundsException.class)) {
StackTraceElement[] stelements = e.getStackTrace();
if(stelements.length >= 1) {
if(stelements[1].getClassName().equals("com.thaiopensource.validate.schematron.OutputHandler") //NOI18N
&& stelements[1].getMethodName().equals("startElement")) { //NOI18N
return true;
}
}
} else if(eClass.equals(IllegalStateException.class)) {
//Bug 199647 - Failed validation and IllegalStateException during pojects scanning
String msg = "Two cells in effect cannot start on the same column, so this should never happen!"; //NOI18N
return e.getMessage() != null && e.getMessage().indexOf(msg) != -1;
}
return false;
}
private String getDocumentErrorMsg() {
return new StringBuilder().append("An error occurred during validation of ").append(document).toString(); //NOI18N
}
private String getDocumentInternalErrorMsg() {
return new StringBuilder().append("An internal error occurred during validation of ").append(document).toString(); //NOI18N
}
/**
* @return
* @throws SAXException
*/
protected String successMessage() throws SAXException {
return "The document validates according to the specified schema(s).";
}
protected String failureMessage() throws SAXException {
return "There were errors.";
}
/**
* @throws SAXException
* @throws IOException
* @throws IncorrectSchemaException
*/
protected void tryToSetupValidator() throws SAXException, IOException,
IncorrectSchemaException {
validator = validatorByUrls(schemaUrls);
}
protected void setErrorProfile() {
// profile = request.getParameter("profile");
HashMap profileMap = new HashMap();
// if ("pedagogical".equals(profile)) {
// profileMap.put("xhtml1", "warn");
// } else if ("polyglot".equals(profile)) {
// profileMap.put("xhtml1", "warn");
// profileMap.put("xhtml2", "warn");
// } else {
// return; // presumed to be permissive
// }
htmlParser.setErrorProfile(profileMap);
}
/**
* @throws SAXException
* @throws IOException
* @throws IncorrectSchemaException
* @throws SAXNotRecognizedException
* @throws SAXNotSupportedException
*/
protected void loadDocAndSetupParser() throws SAXException, IOException,
IncorrectSchemaException, SAXNotRecognizedException,
SAXNotSupportedException, ParserConfigurationException {
switch (parser) {
case HTML_AUTO:
case HTML:
case HTML401_STRICT:
case HTML401_TRANSITIONAL:
if (isHtmlUnsafePreset()) {
String message = "The chosen preset schema is not appropriate for HTML.";
SAXException se = new SAXException(message);
errorHandler.schemaError(se);
throw se;
}
setAllowGenericXml(false);
setAllowHtml(true);
setAcceptAllKnownXmlTypes(false);
setAllowXhtml(false);
loadDocumentInput(false);
newHtmlParser();
DoctypeExpectation doctypeExpectation;
int schemaId;
switch (parser) {
case HTML:
doctypeExpectation = DoctypeExpectation.HTML;
schemaId = HTML5_SCHEMA;
break;
case HTML401_STRICT:
doctypeExpectation = DoctypeExpectation.HTML401_STRICT;
schemaId = XHTML1STRICT_SCHEMA;
break;
case HTML401_TRANSITIONAL:
doctypeExpectation = DoctypeExpectation.HTML401_TRANSITIONAL;
schemaId = XHTML1TRANSITIONAL_SCHEMA;
break;
default:
doctypeExpectation = DoctypeExpectation.AUTO;
schemaId = 0;
break;
}
htmlParser.setDoctypeExpectation(doctypeExpectation);
htmlParser.setDocumentModeHandler(this);
// htmlParser.setProperty("http://validator.nu/properties/body-fragment-context-mode", bodyFragmentContextMode);
reader = htmlParser;
if (validator == null) {
LOGGER.fine(String.format("Using following schemas: %s", getSchemasForDoctypeId(schemaId)));
validator = validatorByDoctype(schemaId);
}
if (validator != null) {
reader.setContentHandler(validator.getContentHandler());
}
break;
case XML_NO_EXTERNAL_ENTITIES:
case XML_EXTERNAL_ENTITIES_NO_VALIDATION:
setAllowGenericXml(true);
setAllowHtml(false);
setAcceptAllKnownXmlTypes(true);
setAllowXhtml(true);
loadDocumentInput(true);
if (version != null) {
switch (version) {
case XHTML10_TRANSATIONAL:
schemaId = XHTML1TRANSITIONAL_SCHEMA;
break;
case XHTML10_STICT:
schemaId = XHTML1STRICT_SCHEMA;
break;
case XHTML10_FRAMESET:
schemaId = XHTML1FRAMESET_SCHEMA;
break;
default:
schemaId = 0;
}
if (schemaId != 0) {
validator = validatorByDoctype(schemaId);
LOGGER.fine(String.format("Using following schemas: %s", getSchemasForDoctypeId(schemaId)));
}
}
setupXmlParser();
break;
default:
setAllowGenericXml(true);
setAllowHtml(true);
setAcceptAllKnownXmlTypes(true);
setAllowXhtml(true);
loadDocumentInput(false);
if ("text/html".equals(documentInput.getType())) {
if (isHtmlUnsafePreset()) {
String message = "The Content-Type was \u201Ctext/html\u201D, but the chosen preset schema is not appropriate for HTML.";
SAXException se = new SAXException(message);
errorHandler.schemaError(se);
throw se;
}
errorHandler.info("The Content-Type was \u201Ctext/html\u201D. Using the HTML parser.");
newHtmlParser();
htmlParser.setDoctypeExpectation(DoctypeExpectation.AUTO);
htmlParser.setDocumentModeHandler(this);
reader = htmlParser;
if (validator != null) {
reader.setContentHandler(validator.getContentHandler());
}
} else {
errorHandler.info("The Content-Type was \u201C"
+ documentInput.getType()
+ "\u201D. Using the XML parser (not resolving external entities).");
setupXmlParser();
}
break;
}
}
/**
*
*/
protected void newHtmlParser() {
htmlParser = new HtmlParser();
htmlParser.setCommentPolicy(XmlViolationPolicy.ALLOW);
htmlParser.setContentNonXmlCharPolicy(XmlViolationPolicy.ALLOW);
htmlParser.setContentSpacePolicy(XmlViolationPolicy.ALTER_INFOSET);
htmlParser.setNamePolicy(XmlViolationPolicy.ALLOW);
htmlParser.setStreamabilityViolationPolicy(XmlViolationPolicy.FATAL);
htmlParser.setXmlnsPolicy(XmlViolationPolicy.ALTER_INFOSET);
htmlParser.setMappingLangToXmlLang(true);
htmlParser.setHtml4ModeCompatibleWithXhtml1Schemata(true);
htmlParser.setHeuristics(Heuristics.ALL);
htmlParser.setEntityResolver(entityResolver);
}
/**
* @param entityResolver2
* @return
* @throws SAXNotRecognizedException
* @throws SAXNotSupportedException
*/
protected void setupXmlParser() throws SAXNotRecognizedException,
SAXNotSupportedException,
ParserConfigurationException,
SAXException {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
xmlParser = factory.newSAXParser();
// xmlParser.getXMLReader().setFeature(
// "http://apache.org/xml/features/continue-after-fatal-error",
// true);
sourceReader.addCharacterHandler(sourceCode);
reader = new IdFilter(xmlParser.getXMLReader());
if (lexicalHandler != null) {
xmlParser.setProperty("http://xml.org/sax/properties/lexical-handler", lexicalHandler);
}
reader.setFeature("http://xml.org/sax/features/string-interning", true);
reader.setFeature(
"http://xml.org/sax/features/external-general-entities",
parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
reader.setFeature(
"http://xml.org/sax/features/external-parameter-entities",
parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION);
if (parser == ParserMode.XML_EXTERNAL_ENTITIES_NO_VALIDATION) {
reader.setEntityResolver(entityResolver);
} else {
reader.setEntityResolver(new NullEntityResolver());
}
if (validator == null) {
bufferingRootNamespaceSniffer = new BufferingRootNamespaceSniffer(
this);
reader.setContentHandler(bufferingRootNamespaceSniffer);
} else {
reader.setContentHandler(new RootNamespaceSniffer(this,
validator.getContentHandler()));
reader.setDTDHandler(validator.getDTDHandler());
}
}
protected String shortenDataUri(String uri) {
if (DataUri.startsWithData(uri)) {
return "data:\u2026";
} else {
return uri;
}
}
/**
* @param acceptAllKnownXmlTypes
* @see nu.validator.xml.ContentTypeParser#setAcceptAllKnownXmlTypes(boolean)
*/
protected void setAcceptAllKnownXmlTypes(boolean acceptAllKnownXmlTypes) {
// contentTypeParser.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
// dataRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
// httpRes.setAcceptAllKnownXmlTypes(acceptAllKnownXmlTypes);
}
/**
* @param allowGenericXml
* @see nu.validator.xml.ContentTypeParser#setAllowGenericXml(boolean)
*/
protected void setAllowGenericXml(boolean allowGenericXml) {
// contentTypeParser.setAllowGenericXml(allowGenericXml);
// httpRes.setAllowGenericXml(allowGenericXml);
// dataRes.setAllowGenericXml(allowGenericXml);
}
/**
* @param allowHtml
* @see nu.validator.xml.ContentTypeParser#setAllowHtml(boolean)
*/
protected void setAllowHtml(boolean allowHtml) {
// contentTypeParser.setAllowHtml(allowHtml);
// httpRes.setAllowHtml(allowHtml);
// dataRes.setAllowHtml(allowHtml);
}
/**
* @param allowRnc
* @see nu.validator.xml.ContentTypeParser#setAllowRnc(boolean)
*/
protected void setAllowRnc(boolean allowRnc) {
// contentTypeParser.setAllowRnc(allowRnc);
// httpRes.setAllowRnc(allowRnc);
// dataRes.setAllowRnc(allowRnc);
entityResolver.setAllowRnc(allowRnc);
}
/**
* @param allowXhtml
* @see nu.validator.xml.ContentTypeParser#setAllowXhtml(boolean)
*/
protected void setAllowXhtml(boolean allowXhtml) {
// contentTypeParser.setAllowXhtml(allowXhtml);
// httpRes.setAllowXhtml(allowXhtml);
// dataRes.setAllowXhtml(allowXhtml);
}
public void loadDocumentInput(boolean xhtmlContent) {
assert codeToValidate != null;
//Aelfred removal workaround - we need to somehow preserve the
//functionality added by hsivonen - CharacterHandler-s.
//So for xml we use a patched reader which does more or less the same.
//for html content the flow remains.
Reader readerImpl = xhtmlContent
? sourceReader = new CharacterHandlerReader(codeToValidate)
: codeToValidate;
documentInput = new TypedInputSource(readerImpl);
documentInput.setType("text/html"); //NOI18N
// documentInput.setLength(codeToValidate.length());
documentInput.setEncoding(encoding);
}
private String getSchemasForDoctypeId(int schemaId) {
for (int i = 0; i < presetDoctypes.length; i++) {
if (presetDoctypes[i] == schemaId) {
return presetUrls[i];
}
}
return null;
}
private static class XMLReaderCreatorImpl implements XMLReaderCreator {
private ErrorHandler errorHandler;
private EntityResolver entityResolver;
public XMLReaderCreatorImpl(ErrorHandler errorHandler, EntityResolver entityResolver) {
this.errorHandler = errorHandler;
this.entityResolver = entityResolver;
}
public XMLReader createXMLReader() throws SAXException {
try {
SAXParserFactory factory = SAXParserFactory.newInstance();
factory.setNamespaceAware(true);
factory.setValidating(false);
XMLReader r = factory.newSAXParser().getXMLReader();
r.setFeature("http://xml.org/sax/features/external-general-entities", true); //NOI18N
r.setFeature("http://xml.org/sax/features/external-parameter-entities", true); //NOI18N
r.setEntityResolver(this.entityResolver);
r.setErrorHandler(this.errorHandler);
return r;
} catch (ParserConfigurationException ex) {
throw new SAXException("Cannot create XMLReader instance", ex); //NOI18N
}
}
}
//xerces's default locator returns slightly shifted positions for character content
//this affects the LocationRecorder and hence the error positions quite nastily
private static class XercesInaccurateLocatorWorkaround implements ContentHandler, LexicalHandler {
//nu.validator.source.LocationRecorder is not accessible
private ContentHandler contentHandler;
private LexicalHandler lexicalHandler;
private LinesMapper mapper;
private ColumnAdjustingLocator locator;
private Locator originalLocator;
public XercesInaccurateLocatorWorkaround(Object source, LinesMapper mapper) {
this.contentHandler = (ContentHandler) source;
this.lexicalHandler = (LexicalHandler) source;
this.mapper = mapper;
}
public void setDocumentLocator(Locator locator) {
this.originalLocator = locator;
this.locator = new ColumnAdjustingLocator(locator);
contentHandler.setDocumentLocator(this.locator);
}
public void startDocument() throws SAXException {
contentHandler.startDocument();
}
public void endDocument() throws SAXException {
contentHandler.endDocument();
}
public void startPrefixMapping(String prefix, String uri) throws SAXException {
contentHandler.startPrefixMapping(prefix, uri);
}
public void endPrefixMapping(String prefix) throws SAXException {
contentHandler.endPrefixMapping(prefix);
}
public void startElement(String uri, String localName, String qName, Attributes atts) throws SAXException {
contentHandler.startElement(uri, localName, qName, atts);
}
public void endElement(String uri, String localName, String qName) throws SAXException {
contentHandler.endElement(uri, localName, qName);
}
public void characters(char[] ch, int start, int length) throws SAXException {
assert locator != null;
int line = originalLocator.getLineNumber();
int column = originalLocator.getColumnNumber();
int offset = mapper.getSourceOffsetForLocation(line - 1, column);
int diff = findBackwardDiff(mapper.getSourceText(), offset, ch, start, length);
locator.setColumnNumberDiff(-diff);
contentHandler.characters(ch, start, length);
locator.setColumnNumberDiff(0);
}
public void ignorableWhitespace(char[] ch, int start, int length) throws SAXException {
contentHandler.ignorableWhitespace(ch, start, length);
}
public void processingInstruction(String target, String data) throws SAXException {
contentHandler.processingInstruction(target, data);
}
public void skippedEntity(String name) throws SAXException {
contentHandler.skippedEntity(name);
}
public void startDTD(String name, String publicId, String systemId) throws SAXException {
lexicalHandler.startDTD(name, publicId, systemId);
}
public void endDTD() throws SAXException {
lexicalHandler.endDTD();
}
public void startEntity(String name) throws SAXException {
lexicalHandler.startEntity(name);
}
public void endEntity(String name) throws SAXException {
lexicalHandler.endEntity(name);
}
public void startCDATA() throws SAXException {
lexicalHandler.startCDATA();
}
public void endCDATA() throws SAXException {
lexicalHandler.endCDATA();
}
public void comment(char[] ch, int start, int length) throws SAXException {
lexicalHandler.comment(ch, start, length);
}
private static class ColumnAdjustingLocator implements Locator {
private Locator delegate;
private int diff;
public ColumnAdjustingLocator(Locator delegate) {
this.delegate = delegate;
}
public void setColumnNumberDiff(int diff) {
this.diff = diff;
}
public String getPublicId() {
return delegate.getPublicId();
}
public String getSystemId() {
return delegate.getSystemId();
}
public int getLineNumber() {
return delegate.getLineNumber();
}
public int getColumnNumber() {
return delegate.getColumnNumber() + diff;
}
}
}
static int PATTERN_LEN_LIMIT = 10; //consider backward match PATTER_LEN_LIMIT long as OK
static int findBackwardDiff(CharSequence text, int tlen, char[] pattern, int pstart, int plen) {
assert text.length() >= tlen;
assert plen > 0;
int pend = pstart + plen - 1;
int limitedpstart = plen - PATTERN_LEN_LIMIT > 0 ? pstart + (plen - PATTERN_LEN_LIMIT) : pstart;
int pidx = pend;
int point = tlen;
boolean inp = false;
for (int i = tlen - 1; i >= 0; i--) {
char textChar = text.charAt(i);
char patternChar = pattern[pidx--];
if (textChar != patternChar) {
pidx = pend;
if (inp) {
i = point - 1;
inp = false;
}
point = i;
} else {
if (limitedpstart == pidx + 1) {
break; //match, reached start of prefix
}
if (pidx == 0) {
break;
}
inp = true;
}
}
return tlen - point;
}
private static final class Marker {
private final int hashCode;
public Marker(int hashCode) {
this.hashCode = hashCode;
}
@Override
public boolean equals(Object o) {
return o.hashCode() == hashCode();
}
@Override
public int hashCode() {
return hashCode;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy