All Downloads are FREE. Search and download functionalities are using the official Maven repository.

de.prob.prolog.output.FastReadWriter Maven / Gradle / Ivy

The newest version!
package de.prob.prolog.output;

import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.ByteOrder;
import java.nio.CharBuffer;
import java.nio.charset.Charset;
import java.nio.charset.CharsetEncoder;
import java.nio.charset.CodingErrorAction;
import java.nio.charset.StandardCharsets;
import java.nio.charset.UnmappableCharacterException;
import java.util.ArrayDeque;
import java.util.Deque;
import java.util.HashMap;
import java.util.Locale;
import java.util.Map;
import java.util.Objects;

import de.prob.prolog.term.AIntegerPrologTerm;
import de.prob.prolog.term.FloatPrologTerm;
import de.prob.prolog.term.ListPrologTerm;
import de.prob.prolog.term.PrologTerm;

/**
 * Writes Prolog terms in SICStus or SWI (undocumented) fastrw format.
 * Generates same output as fast_write(Stream,Term) after use_module(library(fastrw)).
 * And can be read using fast_read(Stream,Term).
 */
public final class FastReadWriter {

	public enum PrologSystem {
		SICSTUS, SWI
	}

	private final PrologSystem flavor;
	private final OutputStream out;

	private int wordBytes;
	private ByteOrder endianness;
	private boolean windows;
	private boolean allowWAtom;
	private Charset cachedWAtomCharset;

	public FastReadWriter(PrologSystem flavor, OutputStream out) {
		this.flavor = Objects.requireNonNull(flavor, "flavor");
		this.out = Objects.requireNonNull(out, "out");
		this.wordBytes = is64Bit() ? 8 : 4;
		this.endianness = ByteOrder.nativeOrder();
		this.windows = System.getProperty("os.name", "").toLowerCase(Locale.ROOT).contains("windows");
		this.allowWAtom = true;
		this.cachedWAtomCharset = null;
	}

	public FastReadWriter(OutputStream out) {
		this(PrologSystem.SICSTUS, out);
	}

	/**
	 * Enables wide (non latin characters) atom support on SWI.
	 */
	public FastReadWriter withWAtomSupport() {
		this.allowWAtom = true;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Disables wide (non latin characters) atom support on SWI.
	 */
	public FastReadWriter withoutWAtomSupport() {
		this.allowWAtom = false;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Sets the wide (non latin characters) atom charset on SWI.
	 * This needs to be set last, as other with-ers might reset the charset.
	 */
	public FastReadWriter withWAtomCharset(Charset charset) {
		this.cachedWAtomCharset = charset;
		return this;
	}

	/**
	 * Set the target word size to 64bit on SWI.
	 */
	public FastReadWriter withTarget64bit() {
		this.wordBytes = 8;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Set the target word size to 32bit on SWI.
	 */
	public FastReadWriter withTarget32bit() {
		this.wordBytes = 4;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Set the target endianness to big on SWI.
	 */
	public FastReadWriter withTargetBigEndian() {
		this.endianness = ByteOrder.BIG_ENDIAN;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Set the target endianness to little on SWI.
	 */
	public FastReadWriter withTargetLittleEndian() {
		this.endianness = ByteOrder.LITTLE_ENDIAN;
		this.cachedWAtomCharset = null;
		return this;
	}

	/**
	 * Set the target OS to windows on SWI.
	 */
	public FastReadWriter withTargetWindows() {
		this.windows = true;
		return this;
	}

	/**
	 * Set the target OS to mac/linux on SWI.
	 */
	public FastReadWriter withTargetNoWindows() {
		this.windows = false;
		return this;
	}

	public void fastwrite(PrologTerm term) throws IOException {
		switch (this.flavor) {
			case SICSTUS:
				this.writeTermSicstus(term);
				break;
			case SWI:
				this.writeTermSWI(term);
				break;
			default:
				throw new AssertionError("unknown prolog system: " + this.flavor);
		}
	}

	public void flush() throws IOException {
		this.out.flush();
	}

	private void writeTermSicstus(PrologTerm term) throws IOException {
		this.out.write('D'); // version

		// local variable name -> index table, it is impossible to share variables between different sentences
		Map varCache = new HashMap<>();

		Deque q = new ArrayDeque<>();
		q.addFirst(term);
		while (!q.isEmpty()) {
			PrologTerm t = q.removeFirst();
			if (t.isList()) {
				// strings/lists of bytes can be written using "
				// but we always use the standard way
				ListPrologTerm l = (ListPrologTerm) t;
				if (l.isEmpty()) {
					this.out.write(']');
				} else {
					this.out.write('[');
					q.addFirst(l.tail());
					q.addFirst(l.head());
				}
			} else if (t.isCompound()) {
				this.out.write('S');
				this.writeStringSicstus(t.getFunctor());

				int arity = t.getArity();
				if (arity > 0xff) {
					throw new IllegalArgumentException("can only write terms with a max arity of 255, but got arity " + arity);
				}

				this.out.write(arity);
				for (int i = arity; i >= 1; i--) { // need reverse order because q is a stack
					q.addFirst(t.getArgument(i));
				}
			} else {
				byte b;
				String text;
				if (t instanceof AIntegerPrologTerm) {
					b = 'I';
					text = t.getFunctor(); // '-'-prefix is supported
				} else if (t instanceof FloatPrologTerm) {
					b = 'F';
					text = t.getFunctor(); // this even works with numbers like 1.337E101
				} else if (t.isAtom()) {
					b = 'A';
					text = t.getFunctor(); // this should work with non-ascii chars as well
				} else if (t.isVariable()) {
					b = '_';
					text = String.valueOf(varCache.computeIfAbsent(t.getFunctor(), k -> varCache.size()));
				} else {
					throw new IllegalArgumentException("unsupported prolog term " + t.getClass().getSimpleName());
				}

				this.out.write(b);
				this.writeStringSicstus(text);
			}
		}
	}

	private void writeStringSicstus(String s) throws IOException {
		this.out.write(s.getBytes(StandardCharsets.UTF_8));
		this.out.write(0);
	}

	private void writeTermSWI(PrologTerm term) throws IOException {
		final int PL_REC_VERSION = 3;
		final int REC_VSHIFT = 5;

		final int REC_32 = 0x01;
		final int REC_64 = 0x02;
		final int REC_SZ;
		if (this.wordBytes == 8) {
			REC_SZ = REC_64;
		} else if (this.wordBytes == 4) {
			REC_SZ = REC_32;
		} else {
			throw new AssertionError();
		}
		final int REC_INT = 0x04;
		final int REC_ATOM = 0x08;
		final int REC_GROUND = 0x10;

		final int REC_HDR = REC_SZ | (PL_REC_VERSION << REC_VSHIFT);

		final int PL_TYPE_VARIABLE = 1;      /* variable */
		final int PL_TYPE_CONS = 8;          /* list-cell */
		final int PL_TYPE_EXT_COMPOUND = 13; /* External (inlined) functor */
		final int PL_TYPE_EXT_FLOAT = 14;    /* float in standard-byte order */

		final int WORDS_PER_DOUBLE = (Double.BYTES + wordBytes - 1) / wordBytes;

		// fast path for primitives
		if (term instanceof AIntegerPrologTerm) {
			AIntegerPrologTerm intTerm = (AIntegerPrologTerm) term;
			try {
				long value = intTerm.longValueExact();
				// this can also deal with numbers that are larger than the max tagged int but still fit into a long
				this.out.write(REC_HDR | REC_INT | REC_GROUND);
				writeInt64SWI(this.out, value);
				return;
			} catch (ArithmeticException ignored) {}
		} else if (term.isAtom()) { // also includes the empty list
			this.out.write(REC_HDR | REC_ATOM | REC_GROUND);
			this.writeAtomSWI(this.out, term);
			return;
		}

		ByteArrayOutputStream data = new ByteArrayOutputStream();
		Map varCache = new HashMap<>();
		int size = 0; // global stack size in words

		// write term data to data
		Deque q = new ArrayDeque<>();
		q.addFirst(term);
		while (!q.isEmpty()) {
			PrologTerm t = q.removeFirst();
			if (t.isVariable()) {
				data.write(PL_TYPE_VARIABLE);
				int varIndex = varCache.computeIfAbsent(t.getFunctor(), k -> varCache.size());
				writeSizeSWI(data, varIndex);
			} else if (t.isAtom()) { // also includes the empty list
				this.writeAtomSWI(data, t);
			} else if (t instanceof AIntegerPrologTerm) {
				this.writeIntSWI(data, (AIntegerPrologTerm) t);
			} else if (t instanceof FloatPrologTerm) {
				data.write(PL_TYPE_EXT_FLOAT);
				double value = ((FloatPrologTerm) t).getValue();
				ByteBuffer ieee754LE = ByteBuffer.allocate(8);
				ieee754LE.order(ByteOrder.LITTLE_ENDIAN);
				ieee754LE.putDouble(value);
				ieee754LE.flip();
				int len = ieee754LE.remaining();
				assert len == 8;
				data.write(ieee754LE.array(), ieee754LE.arrayOffset(), len);
				size += WORDS_PER_DOUBLE + 2;
			} else if (t.isList()) {
				ListPrologTerm l = (ListPrologTerm) t;
				data.write(PL_TYPE_CONS);
				q.addFirst(l.tail());
				q.addFirst(l.head());
				size += 3; // cons functor + head + tail
			} else if (t.isCompound()) {
				data.write(PL_TYPE_EXT_COMPOUND);
				int arity = t.getArity();
				writeSizeSWI(data, arity);
				this.writeAtomSWI(data, t);
				size += 1 + arity; // functor + arguments
				for (int i = arity; i >= 1; i--) {
					q.addFirst(t.getArgument(i));
				}
			} else {
				throw new IllegalArgumentException("unsupported prolog term " + t.getClass().getSimpleName());
			}
		}

		// magic code: REC_HDR (| REC_GROUND)
		int tag = REC_HDR;
		if (varCache.isEmpty()) {
			tag |= REC_GROUND;
		}
		this.out.write(tag);

		// code size
		writeSizeSWI(this.out, data.size());

		// (global) stack size
		writeSizeSWI(this.out, size);

		// if not ground: numvars
		if (!varCache.isEmpty()) {
			writeSizeSWI(this.out, varCache.size());
		}

		// data (code)
		this.out.write(data.toByteArray());
	}

	private void writeAtomSWI(OutputStream os, PrologTerm t) throws IOException {
		final int PL_TYPE_NIL = 9;        /* [] */
		final int PL_TYPE_EXT_ATOM = 11;  /* External (inlined) atom */
		final int PL_TYPE_EXT_WATOM = 12; /* External (inlined) wide atom */

		if (t.isList() && ((ListPrologTerm) t).isEmpty()) {
			os.write(PL_TYPE_NIL);
			return;
		}

		String atom = t.getFunctor();
		CharsetEncoder extendedAsciiEncoder = StandardCharsets.ISO_8859_1.newEncoder()
				.onMalformedInput(CodingErrorAction.REPORT)
				.onUnmappableCharacter(CodingErrorAction.REPORT);
		try {
			ByteBuffer result = extendedAsciiEncoder.encode(CharBuffer.wrap(atom));
			os.write(PL_TYPE_EXT_ATOM);

			int len = result.remaining();
			writeSizeSWI(os, len);
			os.write(result.array(), result.arrayOffset(), len);
		} catch (UnmappableCharacterException e) {
			if (this.allowWAtom) {
				os.write(PL_TYPE_EXT_WATOM);
				if (this.cachedWAtomCharset == null) {
					this.cachedWAtomCharset = this.wcharCharset();
				}
				byte[] bytes = atom.getBytes(this.cachedWAtomCharset);
				writeSizeSWI(os, bytes.length);
				os.write(bytes);
			} else {
				throw new IllegalArgumentException("atom contains non-latin characters", e);
			}
		}
	}

	private void writeIntSWI(OutputStream os, AIntegerPrologTerm t) throws IOException {
		final int TAG_BITS = 7;
		final int SIGN_BITS = 1;
		final long MAX_TAGGED_INT = (1L << (this.wordBytes * 8 - TAG_BITS - SIGN_BITS)) - 1;
		final long MIN_TAGGED_INT = -(1L << (this.wordBytes * 8 - TAG_BITS - SIGN_BITS));

		final int PL_TYPE_TAGGED_INTEGER = 4; /* tagged integer */

		long value;
		try {
			value = t.longValueExact();
			if (value <= MAX_TAGGED_INT && value >= MIN_TAGGED_INT) {
				os.write(PL_TYPE_TAGGED_INTEGER);
				writeInt64SWI(os, value);
				return;
			}
		} catch (ArithmeticException ignored) {
		}

		// TODO: support bigger integers
		throw new IllegalArgumentException("int out of range (" + t.getFunctor() + ")");
	}

	private static void writeSizeSWI(OutputStream os, int val) throws IOException {
		// this routine takes size_t in C and thus is dependent on the word size
		// here it takes int, so we can hardcode the integer size in the definition of "zips"
		if ((val & ~0x7f) == 0) { // fast path and 0: just a single byte
			os.write(val);
		} else {
			boolean leading = true;
			for (int zips = (Integer.SIZE + 7 - 1) / 7 - 1; zips >= 0; zips--) {
				int d = (val >>> zips * 7) & 0x7f;
				if (d != 0 || !leading) {
					if (zips != 0) {
						d |= 0x80;
					}
					os.write(d);
					leading = false;
				}
			}
		}
	}

	private static void writeInt64SWI(OutputStream os, long value) throws IOException {
		int bytes;
		if (value == 0) {
			bytes = 1;
		} else if (value == Long.MIN_VALUE) {
			bytes = Long.BYTES;
		} else {
			int msb = Long.SIZE - 1 - Long.numberOfLeadingZeros(Math.abs(value));
			bytes = (msb + 9) / 8;
		}
		os.write(bytes);

		while (--bytes >= 0) {
			int b = (int) (value >> bytes * 8) & 0xff;
			os.write(b);
		}
	}

	private Charset wcharCharset() {
		if (this.windows) {
			// https://learn.microsoft.com/en-us/cpp/cpp/char-wchar-t-char16-t-char32-t?view=msvc-170
			// Windows always uses UTF-16LE
			return StandardCharsets.UTF_16LE;
		} else if (this.endianness == ByteOrder.BIG_ENDIAN) {
			// While on Linux UCS-4 (which is UTF-32) is used, but it depends on the system's endianness
			return Charset.forName("UTF-32BE");
		} else {
			// Ditto
			return Charset.forName("UTF-32LE");
		}
	}

	private static boolean is64Bit() {
		String bits = System.getProperty("sun.arch.data.model", System.getProperty("com.ibm.vm.bitmode", System.getProperty("os.arch", "")));
		return bits.contains("64");
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy