All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.semanticweb.yars.rdfxml.RdfXmlParser Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package org.semanticweb.yars.rdfxml;

import java.io.IOException;
import java.io.InputStream;
import java.util.Iterator;
import java.util.NoSuchElementException;
import java.util.concurrent.ArrayBlockingQueue;
import java.util.concurrent.BlockingQueue;
import java.util.concurrent.TimeUnit;

import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;

import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.parser.Callback;
import org.semanticweb.yars.nx.parser.ParseException;
import org.xml.sax.SAXException;

/**
 * RdfXmlParser... for... you guessed it... parsing RDF/XML
 * Based on SAXParser. Default behaviour creates a parsing thread
 * which fills a BlockingQueue and is consumed externally through the 
 * iterator model.
 * 
 * Can use custom CallBack which will not use a seperate thread for 
 * parsing and which does not use iterator methods.
 * 
 * @author aidhog
 *
 */
public class RdfXmlParser implements Iterator, Iterable {
	private BlockingQueue _q = null;
	private boolean _done = false;
	private Exception _e = null;
	private ParserThread _pt = null;
	private Node[] _current = null;
	private Resource _con = null;
	
	public static final int DEFAULT_BUFFER = 1000;
	public static final int TIME_OUT = 1000; //1 sec
	
//	private static final Header[] _headers = {
//		new Header("Accept", "application/rdf+xml"),
//		new Header("User-Agent", "nxparser/java"),	
//	};
	
	private SAXParser _parser;
	{
		SAXParserFactory factory = SAXParserFactory.newInstance();
		factory.setNamespaceAware(true);
		factory.setValidating(false);
		try {
			_parser = factory.newSAXParser();
		} catch (ParserConfigurationException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		} catch (SAXException e) {
			// TODO Auto-generated catch block
			e.printStackTrace();
		}
	}
	
	/**
	 * Short default constructor.
	 * 
	 */
	public Iterator parse(InputStream in, String baseURI) throws ParseException, IOException {
		return parse(in, false, true, baseURI, DEFAULT_BUFFER);
	}
	
	/**
	 * Short default constructor.
	 * 
	 */
	public Iterator parse(InputStream in, boolean strict, boolean skolemise, String baseURI) throws ParseException, IOException {
		return parse(in, strict, skolemise, baseURI, DEFAULT_BUFFER);
	}
	
	/**
	 * Default constructor. Creates a BlockingCallBack instance, whose buffer is filled
	 * by a parser thread and consumed by this instance using the iterator model.
	 */
	public Iterator parse(InputStream in, boolean strict, boolean skolemise, String baseURI, int buffer) throws ParseException, IOException {
//		SAXParserFactory factory = SAXParserFactory.newInstance();
//		factory.setNamespaceAware(true);
//		factory.setValidating(strict);
		try {
//			SAXParser saxParser = factory.newSAXParser();
			_q = new ArrayBlockingQueue(buffer);
			CallbackBlockingQueue bcb = new CallbackBlockingQueue(_q);
			_pt = new ParserThread(_parser, in, new RdfXmlParserBase(baseURI, bcb, skolemise),_q);
			_pt.start();
		} catch (Exception err) {
			throw new ParseException(err);
//			err.printStackTrace ();
		}
		
		return this;
	}
	
	/**
	 * Constructor given Callback. 
	 * Interaction outside of iterator model but analogous to NxParser :(.
	 */
	@Deprecated
	// @@@@ need always context for relative URIs?
	public void parse(InputStream in, boolean strict, boolean skolemise, String baseURI, Callback c) throws ParseException, IOException {
		SAXParserFactory factory = SAXParserFactory.newInstance();
		factory.setNamespaceAware(true);
		factory.setValidating(strict);
		try {
			SAXParser saxParser = factory.newSAXParser();
			saxParser.parse(in, new RdfXmlParserBase(baseURI, c, skolemise));
		} catch (Exception err) {
			throw new ParseException(err);
//			err.printStackTrace ();
		}
	}
	
	/**
	 * Constructor given Callback. 
	 * Interaction outside of iterator model but analagous to NxParser :(.
	 * Produces quadruples using the provided context.
	 * 
	 */
	public void parse(InputStream in, boolean strict, boolean skolemise, String baseURI, Callback c, Resource con) throws ParseException, IOException {
		_con = con;
		SAXParserFactory factory = SAXParserFactory.newInstance();
		factory.setNamespaceAware(true);
		factory.setValidating(strict);
		try {
			SAXParser saxParser = factory.newSAXParser();
			saxParser.parse(in, new RdfXmlParserBase(baseURI, c, skolemise, con));
		} catch (Exception err) {
			throw new ParseException(err);
//			err.printStackTrace ();
		}
	}
	
	public Resource getContext(){
		return _con;
	}
	
	public boolean hasNext() {
		if(_q==null){
			return false;
		} else if(_done){
			return false;
		} else if(_current!=null){
			//if(NodeComparator.NC.equals(_current,BlockingCallBack.POISON_TOKEN)){
			//faster hack :( :)
			if(_current.length==0){
				_done = true;
				_e = _pt.getException();
				return false;
			}
			else return true;
		} else if(_q.size()>0){
			_current = _q.poll();
			return hasNext();
		} else{
			try {
				_current = _q.poll(1000, TimeUnit.MILLISECONDS);
			} catch (InterruptedException e) {
				e.printStackTrace();
				_done = true;
				return false;
			}
			return hasNext();
		}
	}

	public Node[] next() {
		if(_current==null){
			if(!hasNext()){
				throw new NoSuchElementException();
			}
		}
		
		Node[] result = new Node[_current.length];
		System.arraycopy(_current, 0, result, 0, _current.length);
		_current = null;
		return result;
	}

	public void remove() {
		throw new UnsupportedOperationException();
	}
	
	public boolean isSuccess(){
		return _e == null;
	}
	
	public Exception getException(){
		return _e;
	}

	@Override
	public Iterator iterator() {
		return this;
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy