All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.sandius.rembulan.ByteString Maven / Gradle / Ivy

There is a newer version: 1.0.3
Show newest version
/*
 * Copyright 2016 Miroslav Janíček
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package net.sandius.rembulan;

import net.sandius.rembulan.util.ByteIterator;

import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.Objects;

/**
 * Byte string, an immutable sequence of bytes.
 *
 * 

The purpose of this class is to serve as a bridge between Java strings (with their * characters corresponding to 16-bit code units in the Basic Multilingual Plane (BMP)) * and Lua strings (raw 8-bit sequences).

* *

Byte strings come in two flavours:

*
    *
  • one is a wrapper of {@link String java.lang.String} with a given {@link Charset} * — constructed using {@link #of(String)};
  • *
  • the other is a wrapper of a {@code byte} arrays * — constructed using {@link #wrap(byte[])}.
  • *
* *

The {@code ByteString} class provides the functionality for treating both cases * as sequences of bytes when they take part in Lua operations, and as Java strings when * used by an outer Java application. However, these perspectives are as lazy * as possible, in order to avoid doing unnecessary work.

* *

This class provides a natural lexicographical ordering that is consistent with equals.

*/ public abstract class ByteString implements Comparable { ByteString() { // no-op: package-private to restrict access } /** * Returns a new byte string corresponding to the bytes in {@code s} as encoded * by the specified {@code charset}. * * @param s the string to take the byte view of, must not be {@code null} * @param charset the charset to use for decoding {@code s} into bytes, must not be {@code null} * @return a byte string perspective of {@code s} using {@code charset} * * @throws NullPointerException if {@code s} or {@code charset} is {@code null} * @throws IllegalArgumentException if {@code charset} does not provide encoding * capability (see {@link Charset#canEncode()}) */ public static ByteString of(String s, Charset charset) { return new StringByteString(s, charset); } /** * Returns a new byte string corresponding to the bytes in {@code s} as encoded * by the default charset ({@link Charset#defaultCharset()}). * * @param s the string to take the perspective of, must not be {@code null} * @return a byte string perspective of {@code s} * * @throws NullPointerException if {@code s} is {@code null} */ public static ByteString of(String s) { return of(s, Charset.defaultCharset()); } /** * Returns a new byte string corresponding to bytes in {@code s} by taking the * least significant byte of each character. * * @param s the string to get bytes from, must not be {@code null} * @return a byte string based on {@code s} by taking the least significant * byte of each char * * @throws NullPointerException if {@code s} is {@code null} */ public static ByteString fromRaw(String s) { byte[] bytes = new byte[s.length()]; for (int i = 0; i < bytes.length; i++) { bytes[i] = (byte) (s.charAt(i) & 0xff); } return wrap(bytes); } /** * Returns a byte string corresponding to the bytes in {@code s} as encoded by the default * charset in a form suitable for use as a string constant. * *

This method differs from {@link #of(String)} in that it may force the computation * of lazily-evaluated properties of the resulting string at instantiation time and * cache them for use at runtime.

* * @param s the string to get bytes from, must not be {@code null} * @return a byte string based on a byte perspective of {@code s} */ public static ByteString constOf(String s) { return of(s); } static ByteString wrap(byte[] bytes) { return new ArrayByteString(bytes); } /** * Returns a byte string containing a copy of the byte array {@code bytes}. * * @param bytes the byte array to use as the byte string, must not be {@code null} * @return a byte string containing a copy of {@code bytes} * * @throws NullPointerException if {@code bytes} is {@code null} */ public static ByteString copyOf(byte[] bytes) { return copyOf(bytes, 0, bytes.length); } /** * Returns a byte string containing a copy of a slice of the byte array {@code bytes} * starting at the offset {@code offset} and consisting of {@code length} bytes. * * @param bytes the byte array to use as the byte string, must not be {@code null} * @param offset offset in {@code bytes} to start reading from * @param length the number of bytes to copy from {@code bytes} * @return a byte string containing a copy of {@code bytes} * * @throws NullPointerException if {@code bytes} is {@code null} * @throws IndexOutOfBoundsException if {@code offset} or {@code length} are negative, * or if {@code (offset + length)} is greater than * {@code bytes.length} */ public static ByteString copyOf(byte[] bytes, int offset, int length) { if (offset < 0 || length < 0 || (offset + length) > bytes.length) { throw new IndexOutOfBoundsException("offset=" + offset + ", length=" + length); } return wrap(Arrays.copyOfRange(bytes, offset, length)); } /** * Returns an empty byte string. * * @return an empty byte string */ public static ByteString empty() { return ArrayByteString.EMPTY_INSTANCE; } /** * Returns {@code true} if {@code o} is a byte string containing the same bytes as * this byte string. * *

Note: this method uses the strict interpretation of byte strings as byte * sequences. It is therefore not necessarily true that for two byte strings {@code a} * and {@code b}, the result of their comparison is the same as the result of comparing * their images provided by {@link #toString()}:

*
	 *     boolean byteEq = a.equals(b);
	 *     boolean stringEq = a.toString().equals(b.toString());
	 *
	 *     // may fail!
	 *     assert (byteEq == stringEq);
	 * 
* * @param o object to evaluate for equality with, may be {@code null} * @return {@code true} iff {@code o} is a byte string with equal contents to {@code this} */ @Override public final boolean equals(Object o) { return this == o || o instanceof ByteString && this.equals((ByteString) o); } /** * Returns the hash code of this byte string. The hash code is computed using the same * function as used by {@link String#hashCode()}, interpreting the byte string's bytes * as unsigned integers. * * @return the hash code of this byte string */ @Override public abstract int hashCode(); /** * Returns an integer i that corresponds to the hash code of this byte string * if i is non-zero. When i is zero, it may or may not be the hash code * of this string. * * @return the hash code of this byte string if non-zero */ abstract int maybeHashCode(); abstract boolean equals(ByteString that); /** * Returns a new byte array containing the bytes of this byte string. * * @return a new byte array */ public abstract byte[] getBytes(); /** * Returns the byte at position {@code index}. * * @param index the position in the string * @return the byte at position {@code index} * * @throws IndexOutOfBoundsException if {@code index < 0} or {@code index >= length()} */ public abstract byte byteAt(int index); /** * Returns an iterator over the bytes in this byte string. * * @return an iterator over the bytes in this byte string */ public abstract ByteIterator byteIterator(); /** * Returns an input stream that reads the contents of this string. * * @return an input stream that reads the contents of this string */ public InputStream asInputStream() { return new ByteStringInputStream(byteIterator()); } /** * Returns the length of this byte string, i.e., the number of bytes it contains. * * @return the length of this byte string */ public abstract int length(); /** * Returns an integer i that is equal to the length of this byte string if * i is non-negative. When i is negative, the length of this byte string * is not yet known. * * @return the length of this byte string if non-negative */ abstract int maybeLength(); /** * Returns {@code true} iff this byte string is empty, i.e., if the number of bytes it * contains is 0. * * @return {@code true} iff this byte string is empty */ public abstract boolean isEmpty(); /** * Returns a substring of this byte string starting at position {@code start} (inclusive), * ending at position {@code end} (exclusive). * *

The indices refer to the byte position in the byte string.

* * @param start the first index to include in the new substring (inclusive) * @param end the smallest index immediately following the new substring in this byte string * @return a substring of this byte string ranging from {@code start} (inclusive) * to {@code end} (exclusive) * * @throws IndexOutOfBoundsException if {@code start < 0}, {@code end > length()} * or {@code start > end} */ public abstract ByteString substring(int start, int end); /** * Puts the contents of this byte string to the specified {@code buffer}. * * @param buffer the buffer to use, must not be {@code null} * * @throws NullPointerException if {@code buffer} is {@code null} */ public abstract void putTo(ByteBuffer buffer); /** * Writes the contents of this byte string to the specified {@code stream}. * * @param stream the stream to use, must not be {@code null} * * @throws IOException when I/O error happens during the write * @throws NullPointerException if {@code stream} is {@code null} */ public abstract void writeTo(OutputStream stream) throws IOException; /** * Returns the interpretation of this byte string as a Java string. * * @return the string represented by this byte string */ @Override public abstract String toString(); /** * Returns a string representation of this byte string that uses the specified * charset {@code charset} to decode characters from bytes. * * @param charset the charset to use, must not be {@code null} * @return this byte string decoded into a string using {@code charset} * * @throws NullPointerException if {@code charset} is {@code null} */ public String decode(Charset charset) { if (isEmpty()) return ""; ByteBuffer byteBuffer = ByteBuffer.allocate(length()); putTo(byteBuffer); byteBuffer.flip(); return charset.decode(byteBuffer).toString(); } /** * Returns a string represented by this byte string decoded using the default charset * of the virtual machine. * *

This is effectively equivalent to {@link #decode(Charset)} * called with {@link Charset#defaultCharset()}.

* * @return a string decoded from this byte string using the platform's default * charset */ public String decode() { return decode(Charset.defaultCharset()); } /** * Returns a string in which all characters are directly mapped to the bytes in this * byte string by treating them as unsigned integers. * *

This method is the complement of {@link #fromRaw(String)}.

* * @return a raw string based on this byte string */ public abstract String toRawString(); /** * Compares this byte string lexicographically with {@code that}. Returns a negative * integer, zero, or a positive integer if {@code this} is lesser than, equal to or greater * than {@code that} in this ordering. * *

For the purposes of this ordering, bytes are interpreted as unsigned * integers.

* *

Note: this method uses the strict interpretation of byte strings as byte * sequences. It is therefore not necessarily true that for two byte strings {@code a} * and {@code b}, the result of their comparison is the same as the result of comparing * their images provided by {@link #toString()}:

*
	 *     int byteCmp = a.compareTo(b);
	 *     int stringCmp = a.toString().compareTo(b.toString());
	 *
	 *     // may fail!
	 *     assert(Integer.signum(byteCmp) == Integer.signum(stringCmp));
	 * 
* *

This is done in order to ensure that the natural ordering provided by this * {@code compareTo()} is consistent with equals.

* * @param that byte string to compare to, must not be {@code null} * @return a negative, zero, or positive integer if {@code this} is lexicographically * lesser than, equal to or greater than {@code that} * * @throws NullPointerException if {@code that} is {@code null} */ @Override public int compareTo(ByteString that) { Objects.requireNonNull(that); ByteIterator thisIterator = this.byteIterator(); ByteIterator thatIterator = that.byteIterator(); while (thisIterator.hasNext() && thatIterator.hasNext()) { int thisByte = thisIterator.nextByte() & 0xff; int thatByte = thatIterator.nextByte() & 0xff; int diff = thisByte - thatByte; if (diff != 0) return diff; } return thisIterator.hasNext() ? 1 // !thatIterator.hasNext() => that is shorter : thatIterator.hasNext() ? -1 // this is shorter : 0; // equal length } /** * Returns a byte string formed by a concatenating this byte string with the byte string * {@code other}. * *

Note: this method uses the non-strict interpretation and therefore * may (but might not necessarily) preserve unmappable and malformed characters * occurring in the two strings.

* * @param other the byte string to concatenate this byte string with, must not be {@code null} * @return this byte string concatenated with {@code other} * * @throws NullPointerException if {@code other} is {@code null} */ public ByteString concat(ByteString other) { if (other.isEmpty()) return this; else if (this.isEmpty()) return other; byte[] thisBytes = this.getBytes(); byte[] otherBytes = other.getBytes(); byte[] result = new byte[thisBytes.length + otherBytes.length]; System.arraycopy(thisBytes, 0, result, 0, thisBytes.length); System.arraycopy(otherBytes, 0, result, thisBytes.length, otherBytes.length); return ByteString.wrap(result); } /** * Returns a byte string formed by concatenating this byte string with the string * {@code other}. * *

This is a convenience method equivalent to

*
	 *     concat(ByteString.of(other))
	 * 
* * @param other the string to concatenate with, must not be {@code null} * @return this byte string concatenated with {@code other} * * @throws NullPointerException if {@code other} is {@code null} */ public ByteString concat(String other) { return this.concat(ByteString.of(other)); } // TODO: add startsWith(ByteString) /** * Returns {@code true} if the first byte of this byte string is {@code b}. * * @param b the byte to compare the first byte of this byte string to * @return {@code true} if this byte string starts with {@code b} */ public abstract boolean startsWith(byte b); // TODO: add contains(ByteString) /** * Returns {@code true} if the byte string contains the byte {@code b}. * * @param b the byte to search for in the byte string * @return {@code true} if this byte string contains {@code b} */ public boolean contains(byte b) { ByteIterator it = byteIterator(); while (it.hasNext()) { if (b == it.nextByte()) return true; } return false; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy