All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.clerezza.rdf.stable.serializer.NTriplesSerializer Maven / Gradle / Ivy

Go to download

A SerializingProvider that prodocues a stable output. This means if the graph to be serialized changes a little, then also the output changes a little.

The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.clerezza.rdf.stable.serializer;

import java.io.IOException;
import java.io.OutputStream;
import java.util.HashMap;

import org.apache.clerezza.commons.rdf.BlankNode;
import org.apache.clerezza.commons.rdf.Literal;
import org.apache.clerezza.commons.rdf.Triple;
import org.apache.clerezza.commons.rdf.Graph;
import org.apache.clerezza.commons.rdf.IRI;
import org.apache.clerezza.rdf.ontologies.RDF;
import org.apache.clerezza.rdf.ontologies.XSD;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A serializer that can serialze RDF Graphs into N-Triples format.
 *
 * NOTE: This is a special purpose serializer to be used with
 * {@link org.apache.clerezza.rdf.stable.serializer.StableSerializerProvider}.
 * Instances are assumed to be short-lived and not used concurrently.
 *
 * @author Daniel Spicar ([email protected])
 */
class NTriplesSerializer {

    private final Logger logger = LoggerFactory.getLogger(getClass());

    private long genSymCounter = 0;
    private HashMap bNodeLabels;

    /**
     * Resets the counter to zero that constitutes
     * the numerical part of blank node labels.
     */
    void resetGenSymCounter() {
        genSymCounter = 0;
    }

    /**
     * Serializes a given Graph using the N-Triples format.
     *
     * @param os
     *                An outputstream.
     * @param tc
     *                the triples of the graph to be serialized.
     */
    void serialize(OutputStream os, Graph tc) {
        try {
            bNodeLabels = new HashMap(tc.size() / 2);

            for (Triple t : tc) {
                if (t.getSubject() instanceof BlankNode) {
                    os.write(serializeBlankNode((BlankNode) t.getSubject()).getBytes());
                } else {
                    os.write(serializeIRI(
                            (IRI) t.getSubject()).getBytes());
                }

                os.write((t.getPredicate().toString() + " ").getBytes());

                if (t.getObject() instanceof BlankNode) {
                    os.write(serializeBlankNode((BlankNode) t.getObject()).getBytes());
                    os.write(".\n".getBytes());
                } else {
                    if (t.getObject() instanceof Literal) {
                        os.write((serializeLiteral((Literal) t.getObject()) +
                                ".\n").getBytes());
                    } else {
                        os.write((serializeIRI((IRI) t.getObject()) +
                                ".\n").getBytes());
                    }
                }
            }
        } catch (IOException ex) {
            logger.error("Exception while serializing graph: {}", ex);
        }
    }

    private String serializeIRI(IRI uriRef) {
        StringBuffer sb = new StringBuffer("<");
        escapeUtf8ToUsAscii(uriRef.getUnicodeString(), sb, true);
        sb.append("> ");

        return sb.toString() ;
    }

    private String serializeBlankNode(BlankNode bNode) {
        if (bNodeLabels.containsKey(bNode)) {
            return bNodeLabels.get(bNode) + " ";
        } else {
            String label = "_:b" + genSymCounter++;
            bNodeLabels.put(bNode, label);
            return label + " ";
        }
    }
    
    private String serializeLiteral(Literal literal) {
        StringBuffer sb = new StringBuffer("\"");
        escapeUtf8ToUsAscii(literal.getLexicalForm(), sb, false);
        sb = sb.append("\"");

        if(literal.getLanguage() != null &&
                !literal.getLanguage().toString().equals("")) {

            sb.append("@");
            sb.append(literal.getLanguage().toString());
        } else {
            if(!literal.getDataType().equals(XSD.string)) {
                sb.append("^^<");
                escapeUtf8ToUsAscii(
                        literal.getDataType().getUnicodeString(), sb, false);
                sb.append(">");
            }
        } 

        sb.append(" ");

        return sb.toString() ;
    }

    private void escapeUtf8ToUsAscii(String input, StringBuffer sb, boolean uri) {

        for (int i = 0; i < input.length(); ++i) {
            char c = input.charAt(i);
            int val = (int) c;
            if (c == '\t') {
                sb.append("\\t");
            } else if (c == '\n') {
                sb.append("\\n");
            } else if (c == '\r') {
                sb.append("\\r");
            } else if (c == '"') {
                sb.append("\\\"");
            } else if (c == '\\') {
                sb.append("\\\\");
            } else if ((val >= 0x0 && val <= 0x8) || (val >= 0xB && val <= 0xC) 
                    || (val >= 0xE && val <= 0x1F)
                    || (val >= 0x7F && val <= 0xFFFF)) {
                sb.append("\\u");
                sb.append(getIntegerHashString(val, 4));
            } else if (val >= 0x10000 && val <= 0x10FFFF) {
                sb.append("\\U");
                sb.append(getIntegerHashString(val, 8));
            } else {
                if (uri && (c == '>' || c == '<')) {
                    sb.append("\\u");
                    sb.append(getIntegerHashString(val, 4));
                } else {
                    sb.append(c);
                }
            }
        }
    }

    private String getIntegerHashString(int val, int length) {
        StringBuffer sb = new StringBuffer();
        String hex = Integer.toHexString(val);
        for(int i = 0; i < length - hex.length(); ++i) {
            sb.append("0");
        }
        sb.append(hex);

        return sb.toString().toUpperCase();
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy