All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.semanticweb.yars.nx.parser.NxParser Maven / Gradle / Ivy

There is a newer version: 4.0.0
Show newest version
package org.semanticweb.yars.nx.parser;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.Reader;
import java.nio.charset.Charset;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;
import java.util.logging.Logger;

import org.semanticweb.yars.nx.BNode;
import org.semanticweb.yars.nx.Literal;
import org.semanticweb.yars.nx.Node;
import org.semanticweb.yars.nx.Resource;
import org.semanticweb.yars.nx.Unbound;
import org.semanticweb.yars.nx.Variable;

/**
 * NxParser is a non-validating N1, N2, N3, N4, Nx parser.
 * 
 * Assumes the data to be formatted according to NTRIPLES spec, including it to
 * be encoded in ASCII with Unicode characters properly escaped as in the
 * NTRIPLES spec.
 * 
 * Deviates from NTRIPLES spec in two points known so far:
 * 
    *
  • Only space delimits parts of a statement, not space or tab.
  • *
  • Previous to the full stop at the end of a statement, there must be a * space.
  • *
* * * @link http://www.w3.org/TR/rdf-testcases/#ntriples * @version 1.2 * * @author Aidan Hogan * @author Andreas Harth * @author Tobias Kaefer */ public class NxParser implements Iterator, Iterable { private static Logger _log = Logger.getLogger(NxParser.class.getName()); private int _lineNo = 0; private String _line = null; private Iterator _stringIt = null; private Node[] next = null; public Iterator parse(Reader r) { return parse(new BufferedReader(r)); } public Iterator parse(InputStream is, Charset cs) { return parse(new BufferedReader(new InputStreamReader(is, cs))); } public Iterator parse(InputStream is) { return parse(new BufferedReader(new InputStreamReader(is))); } public Iterator parse(BufferedReader br) { return parse(stringItFromBufferedReader(br)); } public Iterator parse(Iterable iterable) { return parse(iterable.iterator()); } public Iterator parse(Iterator iterator) { _stringIt = iterator; loadNext(); return this; } public boolean hasNext() { return next != null; } public Node[] next(){ if(next==null) throw new NoSuchElementException(); Node[] now = next; loadNext(); return now; } private void loadNext() { next = null; do{ if (_stringIt.hasNext()){ _line = _stringIt.next(); } else { next = null; return; } ++_lineNo; } while(_line==null || isEntirelyWhitespaceOrEmpty(_line)); //iterate until we get a non-blank line try { next = parseNodesInternal(_line); } catch (Exception e) { _log.warning("Moving on to the next line, as I couldn't parse line " + _lineNo + ": " + _line); e.printStackTrace(); //invalid: skip and try again loadNext(); } if (next == null) return; if(next.length==0)//valid but empty: skip and try again loadNext(); } private static boolean isEntirelyWhitespaceOrEmpty(String s){ for(char c:s.toCharArray()){ if (!Character.isWhitespace(c)) return false; } return true; } /** * Calls remove from underlying string iterator. */ @Override public void remove() { _stringIt.remove(); } @Override public Iterator iterator() { return this; } public static Node[] parseNodes(final String line) throws ParseException { try { return parseNodesInternal(line); } catch (Exception e) { throw new ParseException(e); } } /** * Parses line and returns a Node[] contained within that line. May return * an empty Node[] if the line is valid N-Triples but contains no nodes * (e.g., a blank or comment line). * * @param line * @return A {@link Node} array with the RDF terms found in the line. Can be * of zero length. * @throws ParseException */ protected static Node[] parseNodesInternal(final String line) throws ParseException { int startIndex = 0; int endIndex = 0; List nx = new LinkedList(); if(line.isEmpty()) return new Node[0]; //instead of checking for individual IndexOutOfBoundExceptions, //they are allowed to be thrown and caught in parseNodes() while (true) { while (Character.isWhitespace(line.charAt(startIndex))) { // skipping spaces ++startIndex; ++endIndex; if(startIndex==line.length()){ if(nx.isEmpty()){ return new Node[0]; } else{ throw new ParseException("Could not find closing '.' bracket for line "+line); } } } if (line.charAt(startIndex) == '<') { // resource. endIndex = startIndex; while (line.charAt(endIndex) != '>' && (line.charAt(endIndex + 1) != '.' || !Character.isWhitespace(line.charAt(endIndex + 1)))) ++endIndex; ++endIndex; if(endIndex==0) throw new ParseException("Could not find closing '>' bracket for resource starting at char "+startIndex+" while parsing line "+line); nx.add(new Resource(line.substring(startIndex, endIndex), true)); } else if (line.charAt(startIndex) == '_') { // bnode. endIndex = startIndex; while (!((line.charAt(endIndex) == '.' && (endIndex + 2 >= line .length() || Character.isWhitespace(endIndex + 1))) || Character .isWhitespace(line.charAt(endIndex)))) { // (fullstop at endIndex and (at endIndex+1, whitespace or // line end )) OR whitespace at endIndex ends the thing. ++endIndex; } nx.add(new BNode(line.substring(startIndex, endIndex), true)); } else if (line.charAt(startIndex) == '.') { // statement's end. if(nx.isEmpty()){ throw new ParseException("Exception at position " + startIndex+ " while parsing: '" + line +"'"); } break; } else if (line.charAt(startIndex) == '"') { // literal. // telling escaped quotes within the literal from // literal-delimiting ones: endIndex = startIndex; do { endIndex = line.indexOf('\"', endIndex + 1); } while (line.charAt(endIndex - 1) == '\\' && (((endIndex - 1 - onlyCharUntil(line, '\\', endIndex - 1)) % 2) == 0)); // ^^ if the number of backslashes in front of a quote is even, // the found quote is the literal-delimiting one. while (!((line.charAt(endIndex) == '.' && (endIndex + 2 >= line .length() || Character.isWhitespace(endIndex + 1))) || Character .isWhitespace(line.charAt(endIndex)))) { // (fullstop at endIndex and (at endIndex+1, whitespace or // line end )) OR whitespace at endIndex ends the thing. ++endIndex; } nx.add(new Literal(line.substring(startIndex, endIndex), true)); } else if(line.charAt(startIndex) == '#' && nx.isEmpty()){ // comment line. return new Node[0]; } else if (line.charAt(startIndex) == '?') { // variable. endIndex = startIndex; while (!((line.charAt(endIndex) == '.' && (endIndex + 2 >= line .length() || Character.isWhitespace(endIndex + 1))) || Character .isWhitespace(line.charAt(endIndex)))) { // (fullstop at endIndex and (at endIndex+1, whitespace or // line end )) OR whitespace at endIndex ends the thing. ++endIndex; } nx.add(new Variable(line.substring(startIndex, endIndex), true)); } else if (line.charAt(startIndex) == Unbound.TO_STRING.charAt(0)) { // unbound. if (line.substring(startIndex, startIndex + Unbound.TO_STRING.length()).equals( Unbound.TO_STRING)) { nx.add(new Unbound()); endIndex = startIndex + Unbound.TO_STRING.length(); if (endIndex >= line.length() || line.charAt(endIndex) != ' ') { throw new ParseException("Exception at position " + startIndex + " while parsing: '" + line + "'"); } } else { throw new ParseException("Exception at position " + endIndex+ " while parsing: '" + line +"'"); } } else{ throw new ParseException("Exception at position " + endIndex+ " while parsing: '" + line +"'"); } if (line.charAt(endIndex) == '.') break; else startIndex = endIndex + 1; } return nx.toArray(new Node[nx.size()]); } /** * Looks from a given position i in a string line backwards and returns the * index of the last occurence of parameter c in a row. * * @param line * the string * @param c * the character * @param i * the starting index * @return the index to be returned */ private static int onlyCharUntil(String line, char c, int i) { while (line.charAt(i) == c) { --i; } return i + 1; } /** * Creates an iterator of strings from a given buffered reader. * * @param br * the buffered reader * @return the iterator */ private static Iterator stringItFromBufferedReader( final BufferedReader br) { return new Iterator() { boolean nextIsFetched = true; String next = null; @Override public boolean hasNext() { if (!nextIsFetched) { return next == null ? false : true; } else { try { next = br.readLine(); } catch (IOException e) { e.printStackTrace(); next = null; } nextIsFetched = false; return next == null ? false : true; } } @Override public String next() { if (nextIsFetched) try { next = br.readLine(); } catch (IOException e) { e.printStackTrace(); } nextIsFetched = true; if (next == null) throw new NoSuchElementException(); return next; } @Override public void remove() { throw new UnsupportedOperationException(); } }; } /* * * FROM HERE DOWNWARDS: LEGACY STUFF */ /** * Returns the line number that has currently been processed. * * @return the line number */ public int lineNumber() { return _lineNo; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy