All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.eclipse.rdf4j.rio.binary.BinaryRDFWriter Maven / Gradle / Ivy

/*******************************************************************************
 * Copyright (c) 2015 Eclipse RDF4J contributors, Aduna, and others.
 *
 * All rights reserved. This program and the accompanying materials
 * are made available under the terms of the Eclipse Distribution License v1.0
 * which accompanies this distribution, and is available at
 * http://www.eclipse.org/org/documents/edl-v10.php.
 *
 * SPDX-License-Identifier: BSD-3-Clause
 *******************************************************************************/

package org.eclipse.rdf4j.rio.binary;

import static org.eclipse.rdf4j.common.io.IOUtil.writeVarInt;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.BNODE_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.COMMENT;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.DATATYPE_LITERAL_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.END_OF_DATA;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.FORMAT_V1;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.FORMAT_V2;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.LANG_LITERAL_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.MAGIC_NUMBER;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.NAMESPACE_DECL;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.NULL_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.PLAIN_LITERAL_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.STATEMENT;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.TRIPLE_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.URI_VALUE;
import static org.eclipse.rdf4j.rio.binary.BinaryRDFConstants.VALUE_REF;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.util.ArrayDeque;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Map;
import java.util.Optional;
import java.util.Queue;
import java.util.Set;

import org.eclipse.rdf4j.common.io.ByteSink;
import org.eclipse.rdf4j.model.BNode;
import org.eclipse.rdf4j.model.IRI;
import org.eclipse.rdf4j.model.Literal;
import org.eclipse.rdf4j.model.Statement;
import org.eclipse.rdf4j.model.Triple;
import org.eclipse.rdf4j.model.Value;
import org.eclipse.rdf4j.model.vocabulary.XSD;
import org.eclipse.rdf4j.rio.RDFFormat;
import org.eclipse.rdf4j.rio.RDFHandlerException;
import org.eclipse.rdf4j.rio.RDFWriter;
import org.eclipse.rdf4j.rio.RioSetting;
import org.eclipse.rdf4j.rio.WriterConfig;
import org.eclipse.rdf4j.rio.helpers.AbstractRDFWriter;
import org.eclipse.rdf4j.rio.helpers.BinaryRDFWriterSettings;

/**
 * A {@link RDFWriter} for the binary RDF format.
 *
 * @author Arjohn Kampman
 * @author Frens Jan Rumph
 */
public class BinaryRDFWriter extends AbstractRDFWriter implements ByteSink {

	private final Queue statementQueue;

	private int bufferSize;

	private final Map valueMeta;

	private int nextId = 0;

	private final Queue idPool;

	private final DataOutputStream out;

	private int formatVersion;
	private Charset charset;
	private boolean recycleIds;

	public BinaryRDFWriter(OutputStream out) {
		this(out, 8192);
	}

	public BinaryRDFWriter(OutputStream out, int bufferSize) {
		this.out = new DataOutputStream(new BufferedOutputStream(out));
		this.statementQueue = new ArrayDeque<>(bufferSize);
		this.valueMeta = new HashMap<>(bufferSize * 3);
		this.idPool = new ArrayDeque<>(bufferSize);
		this.bufferSize = bufferSize;
	}

	@Override
	public RDFFormat getRDFFormat() {
		return RDFFormat.BINARY;
	}

	@Override
	public Collection> getSupportedSettings() {
		Set> result = new HashSet<>(super.getSupportedSettings());
		result.add(BinaryRDFWriterSettings.VERSION);
		result.add(BinaryRDFWriterSettings.BUFFER_SIZE);
		result.add(BinaryRDFWriterSettings.CHARSET);
		result.add(BinaryRDFWriterSettings.RECYCLE_IDS);
		return result;
	}

	@Override
	public OutputStream getOutputStream() {
		return out;
	}

	@Override
	public void startRDF() throws RDFHandlerException {
		super.startRDF();

		handleWriterConfig();

		try {
			out.write(MAGIC_NUMBER);
			out.writeInt(formatVersion);

			if (formatVersion != FORMAT_V1) {
				byte[] charsetBytes = charset.toString().getBytes(charset);
				writeInt(charsetBytes.length);
				out.write(charsetBytes);
			}
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	private void handleWriterConfig() {
		WriterConfig config = getWriterConfig();

		formatVersion = Math.toIntExact(config.get(BinaryRDFWriterSettings.VERSION));
		if (formatVersion == FORMAT_V1) {
			charset = StandardCharsets.UTF_16BE;
		} else if (formatVersion == FORMAT_V2) {
			charset = Charset.forName(config.get(BinaryRDFWriterSettings.CHARSET));
		} else {
			throw new IllegalArgumentException("Unsupported binary RDF version: " + formatVersion);
		}

		if (config.isSet(BinaryRDFWriterSettings.BUFFER_SIZE)) {
			bufferSize = Math.toIntExact(config.get(BinaryRDFWriterSettings.BUFFER_SIZE));
		}

		recycleIds = config.get(BinaryRDFWriterSettings.RECYCLE_IDS);
	}

	@Override
	public void endRDF() throws RDFHandlerException {
		checkWritingStarted();
		try {
			while (!statementQueue.isEmpty()) {
				writeStatement();
			}
			out.writeByte(END_OF_DATA);
			out.flush();
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	@Override
	public void handleNamespace(String prefix, String uri) throws RDFHandlerException {
		checkWritingStarted();
		try {
			out.writeByte(NAMESPACE_DECL);
			writeString(prefix);
			writeString(uri);
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	@Override
	public void handleComment(String comment) throws RDFHandlerException {
		checkWritingStarted();
		try {
			out.writeByte(COMMENT);
			writeString(comment);
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	@Override
	protected void consumeStatement(Statement st) {
		statementQueue.add(st);
		incValueFreq(st.getSubject());
		incValueFreq(st.getPredicate());
		incValueFreq(st.getObject());
		incValueFreq(st.getContext());

		if (statementQueue.size() < bufferSize) {
			// postpone statement writing until queue is filled
			return;
		}

		// Process the first statement from the queue
		try {
			writeStatement();
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	/** Writes the first statement from the statement queue */
	private void writeStatement() throws RDFHandlerException, IOException {
		Statement st = statementQueue.remove();

		out.writeByte(STATEMENT);
		writeValueOrId(st.getSubject());
		writeValueOrId(st.getPredicate());
		writeValueOrId(st.getObject());
		writeValueOrId(st.getContext());
	}

	private void incValueFreq(Value v) {
		if (v == null) {
			return;
		}

		ValueMeta meta = valueMeta.get(v);
		if (meta == null) {
			valueMeta.put(v, new ValueMeta(1));
		} else {
			meta.frequency++;
			if (meta.frequency == 2 && !meta.hasId()) {
				assignId(v, meta);
			}
		}
	}

	private void assignId(Value v, ValueMeta meta) {
		Integer id = idPool.poll();
		if (id == null) {
			id = nextId++; // get then increment
		}

		meta.id = id;

		try {
			out.writeByte(BinaryRDFConstants.VALUE_DECL);
			writeInt(id);
			writeValue(v);
		} catch (IOException e) {
			throw new RDFHandlerException(e);
		}
	}

	private void writeValueOrId(Value value) throws RDFHandlerException, IOException {
		if (value == null) {
			out.writeByte(NULL_VALUE);
		} else {
			ValueMeta meta = valueMeta.get(value);

			if (meta.hasId()) {
				out.writeByte(VALUE_REF);
				writeInt(meta.id);
			} else {
				writeValue(value);
			}

			meta.frequency--;

			if (meta.frequency == 0) {
				if (!meta.hasId()) {
					valueMeta.remove(value);
				} else if (recycleIds) {
					valueMeta.remove(value);
					idPool.add(meta.id);
				}
				// else keep value and id
			}
		}
	}

	private void writeValue(Value value) throws RDFHandlerException, IOException {
		if (value instanceof IRI) {
			writeURI((IRI) value);
		} else if (value instanceof BNode) {
			writeBNode((BNode) value);
		} else if (value instanceof Literal) {
			writeLiteral((Literal) value);
		} else if (value instanceof Triple) {
			writeTriple((Triple) value);
		} else {
			throw new RDFHandlerException("Unknown Value object type: " + value.getClass());
		}
	}

	private void writeURI(IRI uri) throws IOException {
		out.writeByte(URI_VALUE);
		writeString(uri.toString());
	}

	private void writeBNode(BNode bnode) throws IOException {
		out.writeByte(BNODE_VALUE);
		writeString(bnode.getID());
	}

	private void writeLiteral(Literal literal) throws IOException {
		String label = literal.getLabel();
		IRI datatype = literal.getDatatype();
		Optional language = literal.getLanguage();

		if (language.isPresent()) {
			out.writeByte(LANG_LITERAL_VALUE);
			writeString(label);
			writeString(language.get());
		} else if (datatype.equals(XSD.STRING)) {
			out.writeByte(PLAIN_LITERAL_VALUE);
			writeString(label);
		} else {
			out.writeByte(DATATYPE_LITERAL_VALUE);
			writeString(label);
			writeString(datatype.toString());
		}
	}

	private void writeTriple(Triple triple) throws IOException {
		out.writeByte(TRIPLE_VALUE);
		writeValue(triple.getSubject());
		writeValue(triple.getPredicate());
		writeValue(triple.getObject());
	}

	private void writeString(String s) throws IOException {
		byte[] bytes = s.getBytes(charset);

		if (formatVersion == FORMAT_V1) {
			writeInt(s.length());
		} else {
			writeInt(bytes.length);
		}

		out.write(bytes);
	}

	private void writeInt(int i) throws IOException {
		if (formatVersion == FORMAT_V1) {
			out.writeInt(i);
		} else {
			writeVarInt(out, i);
		}
	}

	/**
	 * Holds the frequency of a value within the current {@link #statementQueue} as well as an identifier if any has
	 * been assigned.
	 */
	private static class ValueMeta {

		private long frequency;
		private int id = -1;

		public ValueMeta(long frequency) {
			this.frequency = frequency;
		}

		private boolean hasId() {
			return id != -1;
		}
	}

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy