All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jsonldjava.core.RDFDatasetUtils Maven / Gradle / Ivy

package com.github.jsonldjava.core;

import static com.github.jsonldjava.core.JsonLdConsts.RDF_FIRST;
import static com.github.jsonldjava.core.JsonLdConsts.RDF_LANGSTRING;
import static com.github.jsonldjava.core.JsonLdConsts.RDF_NIL;
import static com.github.jsonldjava.core.JsonLdConsts.RDF_REST;
import static com.github.jsonldjava.core.JsonLdConsts.RDF_TYPE;
import static com.github.jsonldjava.core.JsonLdConsts.XSD_BOOLEAN;
import static com.github.jsonldjava.core.JsonLdConsts.XSD_DOUBLE;
import static com.github.jsonldjava.core.JsonLdConsts.XSD_INTEGER;
import static com.github.jsonldjava.core.JsonLdConsts.XSD_STRING;
import static com.github.jsonldjava.core.JsonLdUtils.isKeyword;
import static com.github.jsonldjava.core.JsonLdUtils.isList;
import static com.github.jsonldjava.core.JsonLdUtils.isObject;
import static com.github.jsonldjava.core.JsonLdUtils.isValue;
import static com.github.jsonldjava.core.Regex.HEX;

import java.text.DecimalFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

public class RDFDatasetUtils {

    /**
     * Creates an array of RDF triples for the given graph.
     * 
     * @param graph
     *            the graph to create RDF triples for.
     * @param namer
     *            a UniqueNamer for assigning blank node names.
     * 
     * @return the array of RDF triples for the given graph.
     * @deprecated Use {@link RDFDataset#graphToRDF(String, Map)} instead
     */
    @Deprecated
    static List graphToRDF(Map graph, UniqueNamer namer) {
        final List rval = new ArrayList();
        for (final String id : graph.keySet()) {
            final Map node = (Map) graph.get(id);
            final List properties = new ArrayList(node.keySet());
            Collections.sort(properties);
            for (String property : properties) {
                final Object items = node.get(property);
                if ("@type".equals(property)) {
                    property = RDF_TYPE;
                } else if (isKeyword(property)) {
                    continue;
                }

                for (final Object item : (List) items) {
                    // RDF subjects
                    final Map subject = new LinkedHashMap();
                    if (id.indexOf("_:") == 0) {
                        subject.put("type", "blank node");
                        subject.put("value", namer.getName(id));
                    } else {
                        subject.put("type", "IRI");
                        subject.put("value", id);
                    }

                    // RDF predicates
                    final Map predicate = new LinkedHashMap();
                    predicate.put("type", "IRI");
                    predicate.put("value", property);

                    // convert @list to triples
                    if (isList(item)) {
                        listToRDF((List) ((Map) item).get("@list"), namer,
                                subject, predicate, rval);
                    }
                    // convert value or node object to triple
                    else {
                        final Object object = objectToRDF(item, namer);
                        final Map tmp = new LinkedHashMap();
                        tmp.put("subject", subject);
                        tmp.put("predicate", predicate);
                        tmp.put("object", object);
                        rval.add(tmp);
                    }
                }
            }
        }

        return rval;
    }

    /**
     * Converts a @list value into linked list of blank node RDF triples (an RDF
     * collection).
     * 
     * @param list
     *            the @list value.
     * @param namer
     *            a UniqueNamer for assigning blank node names.
     * @param subject
     *            the subject for the head of the list.
     * @param predicate
     *            the predicate for the head of the list.
     * @param triples
     *            the array of triples to append to.
     */
    private static void listToRDF(List list, UniqueNamer namer,
            Map subject, Map predicate, List triples) {
        final Map first = new LinkedHashMap();
        first.put("type", "IRI");
        first.put("value", RDF_FIRST);
        final Map rest = new LinkedHashMap();
        rest.put("type", "IRI");
        rest.put("value", RDF_REST);
        final Map nil = new LinkedHashMap();
        nil.put("type", "IRI");
        nil.put("value", RDF_NIL);

        for (final Object item : list) {
            final Map blankNode = new LinkedHashMap();
            blankNode.put("type", "blank node");
            blankNode.put("value", namer.getName());

            {
                final Map tmp = new LinkedHashMap();
                tmp.put("subject", subject);
                tmp.put("predicate", predicate);
                tmp.put("object", blankNode);
                triples.add(tmp);
            }

            subject = blankNode;
            predicate = first;
            final Object object = objectToRDF(item, namer);

            {
                final Map tmp = new LinkedHashMap();
                tmp.put("subject", subject);
                tmp.put("predicate", predicate);
                tmp.put("object", object);
                triples.add(tmp);
            }

            predicate = rest;
        }
        final Map tmp = new LinkedHashMap();
        tmp.put("subject", subject);
        tmp.put("predicate", predicate);
        tmp.put("object", nil);
        triples.add(tmp);
    }

    /**
     * Converts a JSON-LD value object to an RDF literal or a JSON-LD string or
     * node object to an RDF resource.
     * 
     * @param item
     *            the JSON-LD value or node object.
     * @param namer
     *            the UniqueNamer to use to assign blank node names.
     * 
     * @return the RDF literal or RDF resource.
     */
    private static Object objectToRDF(Object item, UniqueNamer namer) {
        final Map object = new LinkedHashMap();

        // convert value object to RDF
        if (isValue(item)) {
            object.put("type", "literal");
            final Object value = ((Map) item).get("@value");
            final Object datatype = ((Map) item).get("@type");

            // convert to XSD datatypes as appropriate
            if (value instanceof Boolean || value instanceof Number) {
                // convert to XSD datatype
                if (value instanceof Boolean) {
                    object.put("value", value.toString());
                    object.put("datatype", datatype == null ? XSD_BOOLEAN : datatype);
                } else if (value instanceof Double || value instanceof Float) {
                    // canonical double representation
                    final DecimalFormat df = new DecimalFormat("0.0###############E0");
                    object.put("value", df.format(value));
                    object.put("datatype", datatype == null ? XSD_DOUBLE : datatype);
                } else {
                    final DecimalFormat df = new DecimalFormat("0");
                    object.put("value", df.format(value));
                    object.put("datatype", datatype == null ? XSD_INTEGER : datatype);
                }
            } else if (((Map) item).containsKey("@language")) {
                object.put("value", value);
                object.put("datatype", datatype == null ? RDF_LANGSTRING : datatype);
                object.put("language", ((Map) item).get("@language"));
            } else {
                object.put("value", value);
                object.put("datatype", datatype == null ? XSD_STRING : datatype);
            }
        }
        // convert string/node object to RDF
        else {
            final String id = isObject(item) ? (String) ((Map) item).get("@id")
                    : (String) item;
            if (id.indexOf("_:") == 0) {
                object.put("type", "blank node");
                object.put("value", namer.getName(id));
            } else {
                object.put("type", "IRI");
                object.put("value", id);
            }
        }

        return object;
    }

    public static String toNQuads(RDFDataset dataset) {
        final List quads = new ArrayList();
        for (String graphName : dataset.graphNames()) {
            final List triples = dataset.getQuads(graphName);
            if ("@default".equals(graphName)) {
                graphName = null;
            }
            for (final RDFDataset.Quad triple : triples) {
                quads.add(toNQuad(triple, graphName));
            }
        }
        Collections.sort(quads);
        String rval = "";
        for (final String quad : quads) {
            rval += quad;
        }
        return rval;
    }

    static String toNQuad(RDFDataset.Quad triple, String graphName, String bnode) {
        final RDFDataset.Node s = triple.getSubject();
        final RDFDataset.Node p = triple.getPredicate();
        final RDFDataset.Node o = triple.getObject();

        String quad = "";

        // subject is an IRI or bnode
        if (s.isIRI()) {
            quad += "<" + escape(s.getValue()) + ">";
        }
        // normalization mode
        else if (bnode != null) {
            quad += bnode.equals(s.getValue()) ? "_:a" : "_:z";
        }
        // normal mode
        else {
            quad += s.getValue();
        }

        if (p.isIRI()) {
            quad += " <" + escape(p.getValue()) + "> ";
        }
        // otherwise it must be a bnode (TODO: can we only allow this if the
        // flag is set in options?)
        else {
            quad += " " + escape(p.getValue()) + " ";
        }

        // object is IRI, bnode or literal
        if (o.isIRI()) {
            quad += "<" + escape(o.getValue()) + ">";
        } else if (o.isBlankNode()) {
            // normalization mode
            if (bnode != null) {
                quad += bnode.equals(o.getValue()) ? "_:a" : "_:z";
            }
            // normal mode
            else {
                quad += o.getValue();
            }
        } else {
            final String escaped = escape(o.getValue());
            quad += "\"" + escaped + "\"";
            if (RDF_LANGSTRING.equals(o.getDatatype())) {
                quad += "@" + o.getLanguage();
            } else if (!XSD_STRING.equals(o.getDatatype())) {
                quad += "^^<" + escape(o.getDatatype()) + ">";
            }
        }

        // graph
        if (graphName != null) {
            if (graphName.indexOf("_:") != 0) {
                quad += " <" + escape(graphName) + ">";
            } else if (bnode != null) {
                quad += " _:g";
            } else {
                quad += " " + graphName;
            }
        }

        quad += " .\n";
        return quad;
    }

    static String toNQuad(RDFDataset.Quad triple, String graphName) {
        return toNQuad(triple, graphName, null);
    }

    final private static Pattern UCHAR_MATCHED = Pattern.compile("\\u005C(?:([tbnrf\\\"'])|(?:u("
            + HEX + "{4}))|(?:U(" + HEX + "{8})))");

    public static String unescape(String str) {
        String rval = str;
        if (str != null) {
            final Matcher m = UCHAR_MATCHED.matcher(str);
            while (m.find()) {
                String uni = m.group(0);
                if (m.group(1) == null) {
                    final String hex = m.group(2) != null ? m.group(2) : m.group(3);
                    final int v = Integer.parseInt(hex, 16);// hex =
                                                            // hex.replaceAll("^(?:00)+",
                                                            // "");
                    if (v > 0xFFFF) {
                        // deal with UTF-32
                        // Integer v = Integer.parseInt(hex, 16);
                        final int vt = v - 0x10000;
                        final int vh = vt >> 10;
                        final int v1 = vt & 0x3FF;
                        final int w1 = 0xD800 + vh;
                        final int w2 = 0xDC00 + v1;

                        final StringBuffer b = new StringBuffer();
                        b.appendCodePoint(w1);
                        b.appendCodePoint(w2);
                        uni = b.toString();
                    } else {
                        uni = Character.toString((char) v);
                    }
                } else {
                    final char c = m.group(1).charAt(0);
                    switch (c) {
                    case 'b':
                        uni = "\b";
                        break;
                    case 'n':
                        uni = "\n";
                        break;
                    case 't':
                        uni = "\t";
                        break;
                    case 'f':
                        uni = "\f";
                        break;
                    case 'r':
                        uni = "\r";
                        break;
                    case '\'':
                        uni = "'";
                        break;
                    case '\"':
                        uni = "\"";
                        break;
                    case '\\':
                        uni = "\\";
                        break;
                    default:
                        // do nothing
                        continue;
                    }
                }
                final String pat = Pattern.quote(m.group(0));
                final String x = Integer.toHexString(uni.charAt(0));
                rval = rval.replaceAll(pat, uni);
            }
        }
        return rval;
    }

    public static String escape(String str) {
        String rval = "";
        for (int i = 0; i < str.length(); i++) {
            final char hi = str.charAt(i);
            if (hi <= 0x8 || hi == 0xB || hi == 0xC || (hi >= 0xE && hi <= 0x1F)
                    || (hi >= 0x7F && hi <= 0xA0) || // 0xA0 is end of
                                                     // non-printable latin-1
                                                     // supplement
                                                     // characters
                    ((hi >= 0x24F // 0x24F is the end of latin extensions
                    && !Character.isHighSurrogate(hi))
                    // TODO: there's probably a lot of other characters that
                    // shouldn't be escaped that
                    // fall outside these ranges, this is one example from the
                    // json-ld tests
                    )) {
                rval += String.format("\\u%04x", (int) hi);
            } else if (Character.isHighSurrogate(hi)) {
                final char lo = str.charAt(++i);
                final int c = (hi << 10) + lo + (0x10000 - (0xD800 << 10) - 0xDC00);
                rval += String.format("\\U%08x", c);
            } else {
                switch (hi) {
                case '\b':
                    rval += "\\b";
                    break;
                case '\n':
                    rval += "\\n";
                    break;
                case '\t':
                    rval += "\\t";
                    break;
                case '\f':
                    rval += "\\f";
                    break;
                case '\r':
                    rval += "\\r";
                    break;
                // case '\'':
                // rval += "\\'";
                // break;
                case '\"':
                    rval += "\\\"";
                    // rval += "\\u0022";
                    break;
                case '\\':
                    rval += "\\\\";
                    break;
                default:
                    // just put the char as is
                    rval += hi;
                    break;
                }
            }
        }
        return rval;
    }

    private static class Regex {
        // define partial regexes
        // final public static Pattern IRI =
        // Pattern.compile("(?:<([^:]+:[^>]*)>)");
        final public static Pattern IRI = Pattern.compile("(?:<([^>]*)>)");
        final public static Pattern BNODE = Pattern.compile("(_:(?:[A-Za-z][A-Za-z0-9]*))");
        final public static Pattern PLAIN = Pattern.compile("\"([^\"\\\\]*(?:\\\\.[^\"\\\\]*)*)\"");
        final public static Pattern DATATYPE = Pattern.compile("(?:\\^\\^" + IRI + ")");
        final public static Pattern LANGUAGE = Pattern.compile("(?:@([a-z]+(?:-[a-zA-Z0-9]+)*))");
        final public static Pattern LITERAL = Pattern.compile("(?:" + PLAIN + "(?:" + DATATYPE
                + "|" + LANGUAGE + ")?)");
        final public static Pattern WS = Pattern.compile("[ \\t]+");
        final public static Pattern WSO = Pattern.compile("[ \\t]*");
        final public static Pattern EOLN = Pattern.compile("(?:\r\n)|(?:\n)|(?:\r)");
        final public static Pattern EMPTY = Pattern.compile("^" + WSO + "$");

        // define quad part regexes
        final public static Pattern SUBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + ")" + WS);
        final public static Pattern PROPERTY = Pattern.compile(IRI.pattern() + WS.pattern());
        final public static Pattern OBJECT = Pattern.compile("(?:" + IRI + "|" + BNODE + "|"
                + LITERAL + ")" + WSO);
        final public static Pattern GRAPH = Pattern.compile("(?:\\.|(?:(?:" + IRI + "|" + BNODE
                + ")" + WSO + "\\.))");

        // full quad regex
        final public static Pattern QUAD = Pattern.compile("^" + WSO + SUBJECT + PROPERTY + OBJECT
                + GRAPH + WSO + "$");
    }

    /**
     * Parses RDF in the form of N-Quads.
     * 
     * @param input
     *            the N-Quads input to parse.
     * 
     * @return an RDF dataset.
     * @throws JsonLdError
     *             If there was an error parsing the N-Quads document.
     */
    public static RDFDataset parseNQuads(String input) throws JsonLdError {
        // build RDF dataset
        final RDFDataset dataset = new RDFDataset();

        // split N-Quad input into lines
        final String[] lines = Regex.EOLN.split(input);
        int lineNumber = 0;
        for (final String line : lines) {
            lineNumber++;

            // skip empty lines
            if (Regex.EMPTY.matcher(line).matches()) {
                continue;
            }

            // parse quad
            final Matcher match = Regex.QUAD.matcher(line);
            if (!match.matches()) {
                throw new JsonLdError(JsonLdError.Error.SYNTAX_ERROR,
                        "Error while parsing N-Quads; invalid quad. line:" + lineNumber);
            }

            // get subject
            RDFDataset.Node subject;
            if (match.group(1) != null) {
                subject = new RDFDataset.IRI(unescape(match.group(1)));
            } else {
                subject = new RDFDataset.BlankNode(unescape(match.group(2)));
            }

            // get predicate
            final RDFDataset.Node predicate = new RDFDataset.IRI(unescape(match.group(3)));

            // get object
            RDFDataset.Node object;
            if (match.group(4) != null) {
                object = new RDFDataset.IRI(unescape(match.group(4)));
            } else if (match.group(5) != null) {
                object = new RDFDataset.BlankNode(unescape(match.group(5)));
            } else {
                final String language = unescape(match.group(8));
                final String datatype = match.group(7) != null ? unescape(match.group(7)) : match
                        .group(8) != null ? RDF_LANGSTRING : XSD_STRING;
                final String unescaped = unescape(match.group(6));
                object = new RDFDataset.Literal(unescaped, datatype, language);
            }

            // get graph name ('@default' is used for the default graph)
            String name = "@default";
            if (match.group(9) != null) {
                name = unescape(match.group(9));
            } else if (match.group(10) != null) {
                name = unescape(match.group(10));
            }

            final RDFDataset.Quad triple = new RDFDataset.Quad(subject, predicate, object, name);

            // initialise graph in dataset
            if (!dataset.containsKey(name)) {
                final List tmp = new ArrayList();
                tmp.add(triple);
                dataset.put(name, tmp);
            }
            // add triple if unique to its graph
            else {
                final List triples = (List) dataset.get(name);
                if (!triples.contains(triple)) {
                    triples.add(triple);
                }
            }
        }

        return dataset;
    }
}