All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sf.okapi.steps.xmlvalidation.XMLValidationStep Maven / Gradle / Ivy

/*===========================================================================
  Copyright (C) 2010-2011 by the Okapi Framework contributors
-----------------------------------------------------------------------------
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

  http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License.
===========================================================================*/

package net.sf.okapi.steps.xmlvalidation;

import java.io.File;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

import javax.xml.XMLConstants;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.stream.XMLInputFactory;
import javax.xml.stream.XMLStreamException;
import javax.xml.stream.XMLStreamReader;
import javax.xml.transform.Source;
import javax.xml.validation.Schema;
import javax.xml.validation.SchemaFactory;
import javax.xml.validation.Validator;

import net.sf.okapi.common.Event;
import net.sf.okapi.common.IParameters;
import net.sf.okapi.common.UsingParameters;
import net.sf.okapi.common.Util;
import net.sf.okapi.common.exceptions.OkapiBadStepInputException;
import net.sf.okapi.common.pipeline.BasePipelineStep;
import net.sf.okapi.common.resource.RawDocument;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.xml.sax.EntityResolver;
import org.xml.sax.ErrorHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;

@UsingParameters(Parameters.class)
public class XMLValidationStep extends BasePipelineStep {

	private final Logger logger = LoggerFactory.getLogger(getClass());

	private XMLInputFactory xmlInputFact;
	private String currentFileDir;
	private Parameters params;
	
	public XMLValidationStep () {
		params = new Parameters();
	}
	
	@Override
	public void destroy () {
		// Make available to GC
		xmlInputFact = null;
	}
	
	@Override
	public String getDescription () {
		return "Validate XML documents."
			+ " Expects: raw XML document. Sends back: raw XML document.";
	}

	@Override
	public String getName () {
		return "XML Validation";
	}

	@Override
	public Parameters getParameters () {
		return params;
	}

	@Override
	public void setParameters (IParameters params) {
		this.params = (Parameters)params;
	}
 
	@Override
	protected Event handleStartBatch (Event event) {
		
		xmlInputFact = XMLInputFactory.newInstance();
		xmlInputFact.setProperty(XMLInputFactory.SUPPORT_DTD, params.getUseFoundDTD());
		
		if ( params.isValidate() ) {
			logger.info("Validating using XML Schema: {}", params.getSchemaPath());
		}
		
		return event;
	}
	
	
	@Override
	protected Event handleRawDocument (Event event) {
		
		RawDocument rawDoc = (RawDocument)event.getResource();
		
		//--this is used to locate relative dtds--
		currentFileDir = Util.getDirectoryName(rawDoc.getInputURI().getPath());
		
		// Create the input source
		// Use the stream, so the encoding auto-detection can be done
		// Use the file path as systemId if possible
		Source xmlInput = null;
		if ( rawDoc.getInputURI() == null ) {
			xmlInput = new javax.xml.transform.stream.StreamSource(rawDoc.getStream());
		}
		else {
			xmlInput = new javax.xml.transform.stream.StreamSource(rawDoc.getStream(),
				rawDoc.getInputURI().getPath());
		}
		
		//--Checking for well-formedness--
		try {
			XMLStreamReader reader  = xmlInputFact.createXMLStreamReader(xmlInput);
			
			while(reader.hasNext()) {
				reader.next();
			}
			reader.close();

		}
		catch ( XMLStreamException e ) {
			logger.error(e.getMessage());
			logger.error("XML error line: {}, Column: {}, Offset: {}",
				e.getLocation().getLineNumber(), e.getLocation().getColumnNumber(),
				e.getLocation().getCharacterOffset());
			return event;
		}
		
		if(params.isValidate()){			
			xmlInput = new javax.xml.transform.stream.StreamSource(rawDoc.getStream());
			
			//--validating against schema--
			try {
				SchemaFactory factory = null;	
			    if(params.getValidationType() == Parameters.VALIDATIONTYPE_DTD){
			    	// JAXB doesn't seem to handle DTD very well, resort to SAX parser
			    	SAXParserFactory saxFactory = SAXParserFactory.newInstance();
			    	saxFactory.setNamespaceAware(true);
			    	saxFactory.setValidating(true);

					SAXParser parser = saxFactory.newSAXParser();
					XMLReader reader = parser.getXMLReader();
					reader.setErrorHandler(new ValidatingErrorHandler(logger));
					reader.setEntityResolver(new DTDResolver(currentFileDir));
					reader.parse(new InputSource(rawDoc.getStream()));
			    } else {
			    	// normal JAXB validation
					if(params.getValidationType() == Parameters.VALIDATIONTYPE_SCHEMA) {
						factory = SchemaFactory.newInstance(XMLConstants.W3C_XML_SCHEMA_NS_URI);    
					} else if(params.getValidationType() == Parameters.VALIDATIONTYPE_RELAXNG) {
						System.setProperty(SchemaFactory.class.getName() + ":" + 
				    			XMLConstants.RELAXNG_NS_URI, "com.thaiopensource.relaxng.jaxp.XMLSyntaxSchemaFactory");
						factory = SchemaFactory.newInstance(XMLConstants.RELAXNG_NS_URI);
						if (params.getSchemaPath().length() <= 0) {
							throw new OkapiBadStepInputException("Please specify a valid RelaxNG schema path");
						}
					}			    
					URL schemaLocation = null;
					if (params.getSchemaPath().length() > 0) {
						try {
							// try true URL syntax first
							schemaLocation = new URL(params.getSchemaPath());
						} catch(MalformedURLException e) {
							// URL parse failed must be a local file path
							schemaLocation = new File(params.getSchemaPath()).toURI().toURL();
						}
					}
					         
				    Schema schema;
				    if (schemaLocation == null) {
				    	// the xml document specifies the schema internally
				    	// only works for W3C schemas
				    	schema = factory.newSchema();
				    } else {
				    	// user specified schema
				    	schema = factory.newSchema(schemaLocation);
				    }
				      
			        Validator validator = schema.newValidator();
			        validator.setErrorHandler(new ValidatingErrorHandler(logger));
				    validator.validate(xmlInput);
				}
			} catch (SAXException | ParserConfigurationException | IOException e) {
				logger.error(e.getMessage());
				return event;
			}
		}
		
		return event;
	}
}

class ValidatingErrorHandler implements ErrorHandler {
	Logger logger;
	
	public ValidatingErrorHandler(Logger logger) {
		this.logger = logger;
	}
	
	 @Override
	public void error(SAXParseException e) throws SAXException {
		logger.error("Validation Error Line: {}, Column: {}\n{}\n",
				e.getLineNumber(), e.getColumnNumber(), e.getMessage());
		throw new SAXException("Error encountered");
	}

	@Override
	public void fatalError(SAXParseException e) throws SAXException {
		logger.error("Validation Fatal Error Line: {}, Column: {}\n{}\n",
				e.getLineNumber(), e.getColumnNumber(), e.getMessage());
		throw new SAXException("Fatal Error encountered");
	}

	@Override
	public void warning(SAXParseException e) throws SAXException {
		logger.error("Validation Warning Line: {}, Column: {}\n{}\n",
				e.getLineNumber(), e.getColumnNumber(), e.getMessage());
		throw new SAXException("Warning encountered");
	}
}
	 
class DTDResolver implements EntityResolver {
	String currentFileDir;
	
    public DTDResolver(String currentFileDir) {
    	super();
    	this.currentFileDir = currentFileDir;
	}

	@Override
	public InputSource resolveEntity(String publicID, String systemID) throws SAXException {

		//--check if the default dtd-location file is resolved--
    	File file = new File(systemID);
    	if (file.exists()){
	    	return null;
	    }
	    
    	//--check for dtd relative to the xml file--
	    file = new File(currentFileDir+systemID.substring(systemID.lastIndexOf("/")));
	    if (file.exists()){
	    	return new InputSource(currentFileDir+systemID.substring(systemID.lastIndexOf("/")));
	    }
        
        return null;
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy