org.openrdf.rio.ntriples.NTriplesUtil Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of sesame-rio-ntriples Show documentation
Show all versions of sesame-rio-ntriples Show documentation
Rio parser and writer implementation for the N-Triples file format.
/*
* Licensed to Aduna under one or more contributor license agreements.
* See the NOTICE.txt file distributed with this work for additional
* information regarding copyright ownership.
*
* Aduna licenses this file to you under the terms of the Aduna BSD
* License (the "License"); you may not use this file except in compliance
* with the License. See the LICENSE.txt file distributed with this work
* for the full License.
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
* implied. See the License for the specific language governing permissions
* and limitations under the License.
*/
package org.openrdf.rio.ntriples;
import java.io.IOException;
import org.openrdf.model.BNode;
import org.openrdf.model.Literal;
import org.openrdf.model.Resource;
import org.openrdf.model.URI;
import org.openrdf.model.Value;
import org.openrdf.model.ValueFactory;
/**
* Utility methods for N-Triples encoding/decoding.
*/
public class NTriplesUtil {
/**
* Parses an N-Triples value, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesValue The N-Triples value to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed value.
* @throws IllegalArgumentException If the supplied value could not be
* parsed correctly.
*/
public static Value parseValue(String nTriplesValue, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesValue.startsWith("<")) {
return parseURI(nTriplesValue, valueFactory);
}
else if (nTriplesValue.startsWith("_:")) {
return parseBNode(nTriplesValue, valueFactory);
}
else if (nTriplesValue.startsWith("\"")) {
return parseLiteral(nTriplesValue, valueFactory);
}
else {
throw new IllegalArgumentException("Not a legal N-Triples value: " + nTriplesValue);
}
}
/**
* Parses an N-Triples resource, creates an object for it using
* the supplied ValueFactory and returns this object.
*
* @param nTriplesResource The N-Triples resource to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed resource.
* @throws IllegalArgumentException If the supplied resource could not be
* parsed correctly.
*/
public static Resource parseResource(String nTriplesResource, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesResource.startsWith("<")) {
return parseURI(nTriplesResource, valueFactory);
}
else if (nTriplesResource.startsWith("_:")) {
return parseBNode(nTriplesResource, valueFactory);
}
else {
throw new IllegalArgumentException(
"Not a legal N-Triples resource: " + nTriplesResource);
}
}
/**
* Parses an N-Triples URI, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesURI The N-Triples URI to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed URI.
* @throws IllegalArgumentException If the supplied URI could not be
* parsed correctly.
*/
public static URI parseURI(String nTriplesURI, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesURI.startsWith("<") && nTriplesURI.endsWith(">")) {
String uri = nTriplesURI.substring(1, nTriplesURI.length() - 1);
uri = unescapeString(uri);
return valueFactory.createURI(uri);
}
else {
throw new IllegalArgumentException("Not a legal N-Triples URI: " + nTriplesURI);
}
}
/**
* Parses an N-Triples bNode, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesBNode The N-Triples bNode to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed bNode.
* @throws IllegalArgumentException If the supplied bNode could not be
* parsed correctly.
*/
public static BNode parseBNode(String nTriplesBNode, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesBNode.startsWith("_:")) {
return valueFactory.createBNode(nTriplesBNode.substring(2));
}
else {
throw new IllegalArgumentException("Not a legal N-Triples URI: " + nTriplesBNode);
}
}
/**
* Parses an N-Triples literal, creates an object for it using the
* supplied ValueFactory and returns this object.
*
* @param nTriplesLiteral The N-Triples literal to parse.
* @param valueFactory The ValueFactory to use for creating the
* object.
* @return An object representing the parsed literal.
* @throws IllegalArgumentException If the supplied literal could not be
* parsed correctly.
*/
public static Literal parseLiteral(String nTriplesLiteral, ValueFactory valueFactory)
throws IllegalArgumentException
{
if (nTriplesLiteral.startsWith("\"")) {
// Find string separation points
int endLabelIdx = findEndOfLabel(nTriplesLiteral);
if (endLabelIdx != -1) {
int startLangIdx = nTriplesLiteral.indexOf("@", endLabelIdx);
int startDtIdx = nTriplesLiteral.indexOf("^^", endLabelIdx);
if (startLangIdx != -1 && startDtIdx != -1) {
throw new IllegalArgumentException(
"Literals can not have both a language and a datatype");
}
// Get label
String label = nTriplesLiteral.substring(1, endLabelIdx);
label = unescapeString(label);
if (startLangIdx != -1) {
// Get language
String language = nTriplesLiteral.substring(startLangIdx + 1);
return valueFactory.createLiteral(label, language);
}
else if (startDtIdx != -1) {
// Get datatype
String datatype = nTriplesLiteral.substring(startDtIdx + 2);
URI dtURI = parseURI(datatype, valueFactory);
return valueFactory.createLiteral(label, dtURI);
}
else {
return valueFactory.createLiteral(label);
}
}
}
throw new IllegalArgumentException("Not a legal N-Triples literal: " + nTriplesLiteral);
}
/**
* Finds the end of the label in a literal string. This method
* takes into account that characters can be escaped using
* backslashes.
*
* @return The index of the double quote ending the label, or
* -1 if it could not be found.
*/
private static int findEndOfLabel(String nTriplesLiteral) {
// First character of literal is guaranteed to be a double
// quote, start search at second character.
boolean previousWasBackslash = false;
for (int i = 1; i < nTriplesLiteral.length(); i++) {
char c = nTriplesLiteral.charAt(i);
if (c == '"' && !previousWasBackslash) {
return i;
}
else if (c == '\\' && !previousWasBackslash) {
// start of escape
previousWasBackslash = true;
}
else if (previousWasBackslash) {
// c was escaped
previousWasBackslash = false;
}
}
return -1;
}
/**
* Creates an N-Triples string for the supplied value.
*/
public static String toNTriplesString(Value value) {
if (value instanceof Resource) {
return toNTriplesString((Resource)value);
}
else if (value instanceof Literal) {
return toNTriplesString((Literal)value);
}
else {
throw new IllegalArgumentException("Unknown value type: " + value.getClass());
}
}
public static void append(Value value, Appendable appendable)
throws IOException
{
if (value instanceof Resource) {
append((Resource)value, appendable);
}
else if (value instanceof Literal) {
append((Literal)value, appendable);
}
else {
throw new IllegalArgumentException("Unknown value type: " + value.getClass());
}
}
/**
* Creates an N-Triples string for the supplied resource.
*/
public static String toNTriplesString(Resource resource) {
if (resource instanceof URI) {
return toNTriplesString((URI)resource);
}
else if (resource instanceof BNode) {
return toNTriplesString((BNode)resource);
}
else {
throw new IllegalArgumentException("Unknown resource type: " + resource.getClass());
}
}
public static void append(Resource resource, Appendable appendable)
throws IOException
{
if (resource instanceof URI) {
append((URI)resource, appendable);
}
else if (resource instanceof BNode) {
append((BNode)resource, appendable);
}
else {
throw new IllegalArgumentException("Unknown resource type: " + resource.getClass());
}
}
/**
* Creates an N-Triples string for the supplied URI.
*/
public static String toNTriplesString(URI uri) {
return "<" + escapeString(uri.toString()) + ">";
}
public static void append(URI uri, Appendable appendable)
throws IOException
{
appendable.append("<");
escapeString(uri.toString(), appendable);
appendable.append(">");
}
/**
* Creates an N-Triples string for the supplied bNode.
*/
public static String toNTriplesString(BNode bNode) {
return "_:" + bNode.getID();
}
public static void append(BNode bNode, Appendable appendable)
throws IOException
{
appendable.append("_:");
appendable.append(bNode.getID());
}
/**
* Creates an N-Triples string for the supplied literal.
*/
public static String toNTriplesString(Literal lit) {
try {
StringBuilder sb = new StringBuilder();
append(lit, sb);
return sb.toString();
}
catch (IOException e) {
throw new AssertionError();
}
}
public static void append(Literal lit, Appendable appendable)
throws IOException
{
// Do some character escaping on the label:
appendable.append("\"");
escapeString(lit.getLabel(), appendable);
appendable.append("\"");
if (lit.getDatatype() != null) {
// Append the literal's datatype
appendable.append("^^");
append(lit.getDatatype(), appendable);
}
else if (lit.getLanguage() != null) {
// Append the literal's language
appendable.append("@");
appendable.append(lit.getLanguage());
}
}
/**
* Checks whether the supplied character is a letter or number
* according to the N-Triples specification.
* @see #isLetter
* @see #isNumber
*/
public static boolean isLetterOrNumber(int c) {
return isLetter(c) || isNumber(c);
}
/**
* Checks whether the supplied character is a letter according to
* the N-Triples specification. N-Triples letters are A - Z and a - z.
*/
public static boolean isLetter(int c) {
return (c >= 65 && c <= 90) || // A - Z
(c >= 97 && c <= 122); // a - z
}
/**
* Checks whether the supplied character is a number according to
* the N-Triples specification. N-Triples numbers are 0 - 9.
*/
public static boolean isNumber(int c) {
return (c >= 48 && c <= 57); // 0 - 9
}
/**
* Escapes a Unicode string to an all-ASCII character sequence. Any special
* characters are escaped using backslashes (" becomes \",
* etc.), and non-ascii/non-printable characters are escaped using Unicode
* escapes (\uxxxx and \Uxxxxxxxx).
*/
public static String escapeString(String label) {
try {
StringBuilder sb = new StringBuilder(2 * label.length());
escapeString(label, sb);
return sb.toString();
}
catch (IOException e) {
throw new AssertionError();
}
}
/**
* Escapes a Unicode string to an all-ASCII character sequence. Any special
* characters are escaped using backslashes (" becomes \",
* etc.), and non-ascii/non-printable characters are escaped using Unicode
* escapes (\uxxxx and \Uxxxxxxxx).
*
* @throws IOException
*/
public static void escapeString(String label, Appendable appendable)
throws IOException
{
int labelLength = label.length();
for (int i = 0; i < labelLength; i++) {
char c = label.charAt(i);
int cInt = c;
if (c == '\\') {
appendable.append("\\\\");
}
else if (c == '"') {
appendable.append("\\\"");
}
else if (c == '\n') {
appendable.append("\\n");
}
else if (c == '\r') {
appendable.append("\\r");
}
else if (c == '\t') {
appendable.append("\\t");
}
else if (
cInt >= 0x0 && cInt <= 0x8 ||
cInt == 0xB || cInt == 0xC ||
cInt >= 0xE && cInt <= 0x1F ||
cInt >= 0x7F && cInt <= 0xFFFF)
{
appendable.append("\\u");
appendable.append(toHexString(cInt, 4));
}
else if (cInt >= 0x10000 && cInt <= 0x10FFFF) {
appendable.append("\\U");
appendable.append(toHexString(cInt, 8));
}
else {
appendable.append(c);
}
}
}
/**
* Unescapes an escaped Unicode string. Any Unicode sequences
* (\uxxxx and \Uxxxxxxxx) are restored to the
* value indicated by the hexadecimal argument and any backslash-escapes
* (\", \\, etc.) are decoded to their original form.
*
* @param s An escaped Unicode string.
* @return The unescaped string.
* @throws IllegalArgumentException If the supplied string is not a
* correctly escaped N-Triples string.
*/
public static String unescapeString(String s) {
int backSlashIdx = s.indexOf('\\');
if (backSlashIdx == -1) {
// No escaped characters found
return s;
}
int startIdx = 0;
int sLength = s.length();
StringBuilder sb = new StringBuilder(sLength);
while (backSlashIdx != -1) {
sb.append(s.substring(startIdx, backSlashIdx));
if (backSlashIdx + 1 >= sLength) {
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
char c = s.charAt(backSlashIdx + 1);
if (c == 't') {
sb.append('\t');
startIdx = backSlashIdx + 2;
}
else if (c == 'r') {
sb.append('\r');
startIdx = backSlashIdx + 2;
}
else if (c == 'n') {
sb.append('\n');
startIdx = backSlashIdx + 2;
}
else if (c == '"') {
sb.append('"');
startIdx = backSlashIdx + 2;
}
else if (c == '\\') {
sb.append('\\');
startIdx = backSlashIdx + 2;
}
else if (c == 'u') {
// \\uxxxx
if (backSlashIdx + 5 >= sLength) {
throw new IllegalArgumentException(
"Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 6);
try {
c = (char)Integer.parseInt(xx, 16);
sb.append(c);
startIdx = backSlashIdx + 6;
}
catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Illegal Unicode escape sequence '\\u" + xx + "' in: " + s);
}
}
else if (c == 'U') {
// \\Uxxxxxxxx
if (backSlashIdx + 9 >= sLength) {
throw new IllegalArgumentException(
"Incomplete Unicode escape sequence in: " + s);
}
String xx = s.substring(backSlashIdx + 2, backSlashIdx + 10);
try {
c = (char)Integer.parseInt(xx, 16);
sb.append(c);
startIdx = backSlashIdx + 10;
}
catch (NumberFormatException e) {
throw new IllegalArgumentException(
"Illegal Unicode escape sequence '\\U" + xx + "' in: " + s);
}
}
else {
throw new IllegalArgumentException("Unescaped backslash in: " + s);
}
backSlashIdx = s.indexOf('\\', startIdx);
}
sb.append(s.substring(startIdx));
return sb.toString();
}
/**
* Converts a decimal value to a hexadecimal string represention
* of the specified length.
*
* @param decimal A decimal value.
* @param stringLength The length of the resulting string.
*/
public static String toHexString(int decimal, int stringLength) {
StringBuilder sb = new StringBuilder(stringLength);
String hexVal = Integer.toHexString(decimal).toUpperCase();
// insert zeros if hexVal has less than stringLength characters:
int nofZeros = stringLength - hexVal.length();
for (int i = 0; i < nofZeros; i++) {
sb.append('0');
}
sb.append(hexVal);
return sb.toString();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy