All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.github.jsonldjava.impl.TurtleTripleCallback Maven / Gradle / Ivy

package com.github.jsonldjava.impl;

import static com.github.jsonldjava.core.JSONLDConsts.RDF_FIRST;
import static com.github.jsonldjava.core.JSONLDConsts.RDF_NIL;
import static com.github.jsonldjava.core.JSONLDConsts.RDF_REST;
import static com.github.jsonldjava.core.JSONLDConsts.XSD_BOOLEAN;
import static com.github.jsonldjava.core.JSONLDConsts.XSD_DOUBLE;
import static com.github.jsonldjava.core.JSONLDConsts.XSD_FLOAT;
import static com.github.jsonldjava.core.JSONLDConsts.XSD_INTEGER;
import static com.github.jsonldjava.core.JSONLDConsts.XSD_STRING;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;

import com.github.jsonldjava.core.JSONLDTripleCallback;
import com.github.jsonldjava.core.RDFDataset;

public class TurtleTripleCallback implements JSONLDTripleCallback {

    private static final int MAX_LINE_LENGTH = 160;
    private static final int TAB_SPACES = 4;
    private static final String COLS_KEY = "..cols.."; // this shouldn't be a
                                                       // valid iri/bnode i
                                                       // hope!
    final Map availableNamespaces = new LinkedHashMap() {
        {
            // TODO: fill with default namespaces
        }
    };
    Set usedNamespaces;

    public TurtleTripleCallback() {
    }

    @Override
    public Object call(RDFDataset dataset) {
        for (final Entry e : dataset.getNamespaces().entrySet()) {
            availableNamespaces.put(e.getValue(), e.getKey());
        }
        usedNamespaces = new LinkedHashSet();

        final int tabs = 0;

        final Map> refs = new LinkedHashMap>();
        final Map>> ttl = new LinkedHashMap>>();

        for (String graphName : dataset.keySet()) {
            final List triples = dataset.getQuads(graphName);
            if ("@default".equals(graphName)) {
                graphName = null;
            }

            // http://www.w3.org/TR/turtle/#unlabeled-bnodes
            // TODO: implement nesting for unlabled nodes

            // map of what the output should look like
            // subj (or [ if bnode) > pred > obj
            // > obj (set ref if IRI)
            // > pred > obj (set ref if bnode)
            // subj > etc etc etc

            // subjid -> [ ref, ref, ref ]

            String prevSubject = "";
            String prevPredicate = "";

            Map> thisSubject = null;
            List thisPredicate = null;

            for (final RDFDataset.Quad triple : triples) {
                final String subject = triple.getSubject().getValue();
                final String predicate = triple.getPredicate().getValue();

                if (prevSubject.equals(subject)) {
                    if (prevPredicate.equals(predicate)) {
                        // nothing to do
                    } else {
                        // new predicate
                        if (thisSubject.containsKey(predicate)) {
                            thisPredicate = thisSubject.get(predicate);
                        } else {
                            thisPredicate = new ArrayList();
                            thisSubject.put(predicate, thisPredicate);
                        }
                        prevPredicate = predicate;
                    }
                } else {
                    // new subject
                    if (ttl.containsKey(subject)) {
                        thisSubject = ttl.get(subject);
                    } else {
                        thisSubject = new LinkedHashMap>();
                        ttl.put(subject, thisSubject);
                    }
                    if (thisSubject.containsKey(predicate)) {
                        thisPredicate = thisSubject.get(predicate);
                    } else {
                        thisPredicate = new ArrayList();
                        thisSubject.put(predicate, thisPredicate);
                    }

                    prevSubject = subject;
                    prevPredicate = predicate;
                }

                if (triple.getObject().isLiteral()) {
                    thisPredicate.add(triple.getObject());
                } else {
                    final String o = triple.getObject().getValue();
                    if (o.startsWith("_:")) {
                        // add ref to o
                        if (!refs.containsKey(o)) {
                            refs.put(o, new ArrayList());
                        }
                        refs.get(o).add(thisPredicate);
                    }
                    thisPredicate.add(o);
                }
            }
        }

        final Map> collections = new LinkedHashMap>();

        final List subjects = new ArrayList(ttl.keySet());
        // find collections
        for (final String subj : subjects) {
            Map> preds = ttl.get(subj);
            if (preds != null && preds.containsKey(RDF_FIRST)) {
                final List col = new ArrayList();
                collections.put(subj, col);
                while (true) {
                    final List first = preds.remove(RDF_FIRST);
                    final Object o = first.get(0);
                    col.add(o);
                    // refs
                    if (refs.containsKey(o)) {
                        refs.get(o).remove(first);
                        refs.get(o).add(col);
                    }
                    final String next = (String) preds.remove(RDF_REST).get(0);
                    if (RDF_NIL.equals(next)) {
                        // end of this list
                        break;
                    }
                    // if collections already contains a value for "next", add
                    // it to this col and break out
                    if (collections.containsKey(next)) {
                        col.addAll(collections.remove(next));
                        break;
                    }
                    preds = ttl.remove(next);
                    refs.remove(next);
                }
            }
        }

        // process refs (nesting referenced bnodes if only one reference to them
        // in the whole graph)
        for (final String id : refs.keySet()) {
            // skip items if there is more than one reference to them in the
            // graph
            if (refs.get(id).size() > 1) {
                continue;
            }

            // otherwise embed them into the referenced location
            Object object = ttl.remove(id);
            if (collections.containsKey(id)) {
                object = new LinkedHashMap>();
                final List tmp = new ArrayList();
                tmp.add(collections.remove(id));
                ((HashMap) object).put(COLS_KEY, tmp);
            }
            final List predicate = (List) refs.get(id).get(0);
            // replace the one bnode ref with the object
            predicate.set(predicate.lastIndexOf(id), object);
        }

        // replace the rest of the collections
        for (final String id : collections.keySet()) {
            final Map> subj = ttl.get(id);
            if (!subj.containsKey(COLS_KEY)) {
                subj.put(COLS_KEY, new ArrayList());
            }
            subj.get(COLS_KEY).add(collections.get(id));
        }

        // build turtle output
        final String output = generateTurtle(ttl, 0, 0, false);

        String prefixes = "";
        for (final String prefix : usedNamespaces) {
            final String name = availableNamespaces.get(prefix);
            prefixes += "@prefix " + name + ": <" + prefix + "> .\n";
        }

        return ("".equals(prefixes) ? "" : prefixes + "\n") + output;
    }

    private String generateObject(Object object, String sep, boolean hasNext, int indentation,
            int lineLength) {
        String rval = "";
        String obj;
        if (object instanceof String) {
            obj = getURI((String) object);
        } else if (object instanceof RDFDataset.Literal) {
            obj = ((RDFDataset.Literal) object).getValue();
            final String lang = ((RDFDataset.Literal) object).getLanguage();
            final String dt = ((RDFDataset.Literal) object).getDatatype();
            if (lang != null) {
                obj = "\"" + obj + "\"";
                obj += "@" + lang;
            } else if (dt != null) {
                // TODO: this probably isn't an exclusive list of all the
                // datatype literals that can be represented as native types
                if (!(XSD_DOUBLE.equals(dt) || XSD_INTEGER.equals(dt) || XSD_FLOAT.equals(dt) || XSD_BOOLEAN
                        .equals(dt))) {
                    obj = "\"" + obj + "\"";
                    if (!XSD_STRING.equals(dt)) {
                        obj += "^^" + getURI(dt);
                    }
                }
            } else {
                obj = "\"" + obj + "\"";
            }
        } else {
            // must be an object
            final Map>> tmp = new LinkedHashMap>>();
            tmp.put("_:x", (Map>) object);
            obj = generateTurtle(tmp, indentation + 1, lineLength, true);
        }

        final int idxofcr = obj.indexOf("\n");
        // check if output will fix in the max line length (factor in comma if
        // not the last item, current line length and length to the next CR)
        if ((hasNext ? 1 : 0) + lineLength + (idxofcr != -1 ? idxofcr : obj.length()) > MAX_LINE_LENGTH) {
            rval += "\n" + tabs(indentation + 1);
            lineLength = (indentation + 1) * TAB_SPACES;
        }
        rval += obj;
        if (idxofcr != -1) {
            lineLength += (obj.length() - obj.lastIndexOf("\n"));
        } else {
            lineLength += obj.length();
        }
        if (hasNext) {
            rval += sep;
            lineLength += sep.length();
            if (lineLength < MAX_LINE_LENGTH) {
                rval += " ";
                lineLength++;
            } else {
                rval += "\n";
            }
        }
        return rval;
    }

    private String generateTurtle(Map>> ttl, int indentation,
            int lineLength, boolean isObject) {
        String rval = "";
        final Iterator subjIter = ttl.keySet().iterator();
        while (subjIter.hasNext()) {
            final String subject = subjIter.next();
            final Map> subjval = ttl.get(subject);
            // boolean isBlankNode = subject.startsWith("_:");
            boolean hasOpenBnodeBracket = false;
            if (subject.startsWith("_:")) {
                // only open blank node bracket the node doesn't contain any
                // collections
                if (!subjval.containsKey(COLS_KEY)) {
                    rval += "[ ";
                    lineLength += 2;
                    hasOpenBnodeBracket = true;
                }

                // TODO: according to http://www.rdfabout.com/demo/validator/
                // 1) collections as objects cannot contain any predicates other
                // than rdf:first and rdf:rest
                // 2) collections cannot be surrounded with [ ]

                // check for collection
                if (subjval.containsKey(COLS_KEY)) {
                    final List collections = subjval.remove(COLS_KEY);
                    for (final Object collection : collections) {
                        rval += "( ";
                        lineLength += 2;
                        final Iterator objIter = ((List) collection).iterator();
                        while (objIter.hasNext()) {
                            final Object object = objIter.next();
                            rval += generateObject(object, "", objIter.hasNext(), indentation,
                                    lineLength);
                            lineLength = rval.length() - rval.lastIndexOf("\n");
                        }
                        rval += " ) ";
                        lineLength += 3;
                    }
                }
                // check for blank node
            } else {
                rval += getURI(subject) + " ";
                lineLength += subject.length() + 1;
            }
            final Iterator predIter = ttl.get(subject).keySet().iterator();
            while (predIter.hasNext()) {
                final String predicate = predIter.next();
                rval += getURI(predicate) + " ";
                lineLength += predicate.length() + 1;
                final Iterator objIter = ttl.get(subject).get(predicate).iterator();
                while (objIter.hasNext()) {
                    final Object object = objIter.next();
                    rval += generateObject(object, ",", objIter.hasNext(), indentation, lineLength);
                    lineLength = rval.length() - rval.lastIndexOf("\n");
                }
                if (predIter.hasNext()) {
                    rval += " ;\n" + tabs(indentation + 1);
                    lineLength = (indentation + 1) * TAB_SPACES;
                }
            }
            if (hasOpenBnodeBracket) {
                rval += " ]";
            }
            if (!isObject) {
                rval += " .\n";
                if (subjIter.hasNext()) { // add blank space if we have another
                                          // object below this
                    rval += "\n";
                }
            }
        }
        return rval;
    }

    // TODO: Assert (TAB_SPACES == 4) otherwise this needs to be edited, and
    // should fail to compile
    private String tabs(int tabs) {
        String rval = "";
        for (int i = 0; i < tabs; i++) {
            rval += "    "; // using spaces for tabs
        }
        return rval;
    }

    /**
     * checks the URI for a prefix, and if one is found, set used prefixes to
     * true
     * 
     * @param predicate
     * @return
     */
    private String getURI(String uri) {
        // check for bnode
        if (uri.startsWith("_:")) {
            // return the bnode id
            return uri;
        }
        for (final String prefix : availableNamespaces.keySet()) {
            if (uri.startsWith(prefix)) {
                usedNamespaces.add(prefix);
                // return the prefixed URI
                return availableNamespaces.get(prefix) + ":" + uri.substring(prefix.length());
            }
        }
        // return the full URI
        return "<" + uri + ">";
    }

}