All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.semarglproject.rdf.TurtleSerializer Maven / Gradle / Ivy

There is a newer version: 0.7
Show newest version
/**
 * Copyright 2012-2013 Lev Khomich
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.semarglproject.rdf;

import org.semarglproject.sink.CharSink;
import org.semarglproject.sink.Pipe;
import org.semarglproject.sink.TripleSink;
import org.semarglproject.vocab.RDF;

import java.util.HashSet;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Set;

/**
 * Implementation of {@link TripleSink} which serializes triples to {@link CharSink} using
 * Turtle syntax. *
 */
public final class TurtleSerializer extends Pipe implements TripleSink {

    private static final String DOT_EOL = " .\n";
    private static final String COMMA_EOL = " ,\n";
    private static final String SEMICOLON_EOL = " ;\n";
    private static final String EOL = "\n";

    private static final String MULTILINE_QUOTE = "\"\"\"";
    private static final char SINGLE_LINE_QUOTE = '"';
    private static final char BNODE_START = '[';
    private static final char BNODE_END = ']';
    private static final char URI_START = '<';
    private static final char URI_END = '>';

    private static final char SPACE = ' ';
    private static final char RDF_TYPE_ABBR = 'a';
    private static final String INDENT = "    ";

    private static final short BATCH_SIZE = 10;

    private StringBuilder builder;

    private String prevSubj;
    private String prevPred;
    private short step;
    private final Queue bnodeStack = new LinkedList();
    private final Set namedBnodes = new HashSet();
    private String baseUri;

    private TurtleSerializer(CharSink sink) {
        super(sink);
    }

    /**
     * Creates instance of TurtleSerializer connected to specified sink.
     * @param sink sink to be connected to
     * @return instance of TurtleSerializer
     */
    public static TripleSink connect(CharSink sink) {
        return new TurtleSerializer(sink);
    }

    @Override
    public void addNonLiteral(String subj, String pred, String obj) {
        startTriple(subj, pred);
        if (obj.startsWith(RDF.BNODE_PREFIX)) {
            if (!namedBnodes.contains(obj) && obj.endsWith(RDF.SHORTENABLE_BNODE_SUFFIX)) {
                openBnode(obj);
            } else {
                builder.append(obj);
            }
        } else {
            serializeUri(obj);
        }
        endTriple();
    }

    @Override
    public void addPlainLiteral(String subj, String pred, String content, String lang) {
        startTriple(subj, pred);
        addContent(content);
        if (lang != null) {
            builder.append('@').append(lang);
        }
        endTriple();
    }

    @Override
    public void addTypedLiteral(String subj, String pred, String content, String type) {
        startTriple(subj, pred);
        addContent(content);
        builder.append("^^");
        serializeUri(type);
        endTriple();
    }

    @Override
    public void startStream() throws ParseException {
        prevSubj = null;
        prevPred = null;
        builder = new StringBuilder();
        if (baseUri != null) {
            builder.append("@base ").append(URI_START).append(baseUri).append(URI_END).append(DOT_EOL);
        }
        builder.append("@prefix rdf: ").append(URI_START).append(RDF.NS).append(URI_END).append(DOT_EOL);
        step = 0;
        bnodeStack.clear();
        namedBnodes.clear();
        super.startStream();
    }

    @Override
    public void endStream() throws ParseException {
        super.endStream();
        if (builder == null) {
            builder = new StringBuilder();
        }
        while (!bnodeStack.isEmpty()) {
            closeBnode();
        }
        if (prevPred != null) {
            builder.append(DOT_EOL);
        } else {
            builder.append(EOL);
        }
        if (builder != null) {
            sink.process(builder.toString());
        }
        builder = null;
        baseUri = null;
    }

    @Override
    protected boolean setPropertyInternal(String key, Object value) {
        return false;
    }

    @Override
    public void setBaseUri(String baseUri) {
        this.baseUri = baseUri.substring(0, baseUri.length() - 1);
    }

    private void startTriple(String subj, String pred) {
        if (builder == null) {
            builder = new StringBuilder();
        }
        if (subj.equals(prevSubj)) {
            if (pred.equals(prevPred)) {
                builder.append(COMMA_EOL);
                indent(2);
            } else if (prevPred != null) {
                builder.append(SEMICOLON_EOL);
                indent(1);
                serializePredicate(pred);
            } else {
                indent(0);
                serializePredicate(pred);
            }
        } else {
            if (!bnodeStack.isEmpty()) {
                closeBnode();
                startTriple(subj, pred);
                return;
            } else if (prevSubj != null) {
                builder.append(DOT_EOL);
            }
            if (subj.startsWith(RDF.BNODE_PREFIX)) {
                if (subj.endsWith(RDF.SHORTENABLE_BNODE_SUFFIX)) {
                    openBnode(subj);
                } else {
                    builder.append(subj).append(SPACE);
                    namedBnodes.add(subj);
                }
            } else {
                serializeUri(subj);
            }
            serializePredicate(pred);
        }
        prevSubj = subj;
        prevPred = pred;
    }

    private void serializePredicate(String pred) {
        if (RDF.TYPE.equals(pred)) {
            builder.append(RDF_TYPE_ABBR).append(SPACE);
        } else {
            serializeUri(pred);
        }
    }

    private void serializeUri(String uri) {
        String escapedUri = uri.replace("\\", "\\\\").replace(">", "\\u003E");
        if (escapedUri.startsWith(RDF.NS)) {
            builder.append("rdf:").append(escapedUri.substring(RDF.NS.length()));
        } else if (baseUri != null && escapedUri.startsWith(baseUri)) {
            builder.append(URI_START).append(escapedUri.substring(baseUri.length())).append(URI_END);
        } else {
            builder.append(URI_START).append(escapedUri).append(URI_END);
        }
        builder.append(SPACE);
    }

    private void indent(int additionalIndent) {
        for (int i = 0; i < bnodeStack.size() + additionalIndent; i++) {
            builder.append(INDENT);
        }
    }

    private void endTriple() {
        if (step == BATCH_SIZE) {
            try {
                sink.process(builder.toString());
            } catch (ParseException e) {
                // do nothing
            }
            builder = null;
            step = 0;
        }
        step++;
    }

    private void addContent(String content) {
        String escapedContent = content.replace("\\", "\\\\").replace("\"", "\\\"");
        if (escapedContent.contains(EOL)) {
            builder.append(MULTILINE_QUOTE).append(escapedContent).append(MULTILINE_QUOTE);
        } else {
            builder.append(SINGLE_LINE_QUOTE).append(escapedContent).append(SINGLE_LINE_QUOTE);
        }
    }

    private void openBnode(String obj) {
        builder.append(BNODE_START);
        bnodeStack.offer(obj);
        prevSubj = obj;
        prevPred = null;
    }

    private void closeBnode() {
        builder.append(BNODE_END);
        bnodeStack.poll();
        prevSubj = bnodeStack.peek();
        prevPred = null;
        if (prevSubj == null) {
            builder.append(DOT_EOL);
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy