
org.metafacture.biblio.iso2709.Iso646ByteBuffer Maven / Gradle / Ivy
Show all versions of metafacture-biblio Show documentation
/*
* Copyright 2016 Christoph Böhme
*
* Licensed under the Apache License, Version 2.0 the "License";
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.metafacture.biblio.iso2709;
import org.metafacture.framework.FormatException;
import java.nio.charset.Charset;
/**
* Provides methods for reading and writing strings and integers in a byte
* array.
*
* All methods except for the {#stringAt(int, int, Charset)} method assume
* that the bytes in the buffer represent characters encoded with the
* encoding defined by ISO 646 (which is mostly equivalent with 7-bit ASCII).
*
* @author Christoph Böhme
*/
final class Iso646ByteBuffer {
private static final int RADIX = 10;
private final byte[] byteArray;
private int writePosition;
Iso646ByteBuffer(final int size) {
this(new byte[size]);
}
Iso646ByteBuffer(final byte[] byteArray) {
assert byteArray != null;
this.byteArray = byteArray;
}
byte[] getByteArray() {
return byteArray;
}
int getLength() {
return byteArray.length;
}
int getFreeSpace() {
return byteArray.length - writePosition;
}
void setWritePosition(final int writePosition) {
assert 0 <= writePosition && writePosition <= byteArray.length;
this.writePosition = writePosition;
}
int getWritePosition() {
return writePosition;
}
/**
* Returns the distance from {@code fromIndex} to the next occurrence of
* {@code byteValue}. If the byte at {@code fromIndex} is equal to {@code
* byteValue} zero is returned. If there are no matching bytes between
* {@code fromIndex} and the end of the buffer then the distance to the end
* of the buffer is returned.
*
* @param byteValue byte to search for.
* @param fromIndex the position in the buffer from which to start searching.
* @return the distance in bytes to the next byte with the given value or if
* none is found to the end of the buffer.
*/
int distanceTo(final byte byteValue, final int fromIndex) {
assert 0 <= fromIndex && fromIndex < byteArray.length;
int index = fromIndex;
for (; index < byteArray.length; ++index) {
if (byteValue == byteArray[index]) {
break;
}
}
return index - fromIndex;
}
/**
* Returns the distance from {@code fromIndex} to the next occurrence of one
* of the bytes in {@code bytes}. If the byte at {@code fromIndex} is in
* {@code bytes} zero is returned. If there are no matching bytes between
* {@code fromIndex} and the end of the buffer then the distance to the
* end of the buffer is returned.
*
* @param bytes bytes to search for.
* @param fromIndex the position in the buffer from which to start searching.
* @return the distance in bytes to the next byte with the given value or if
* none is found to the end of the buffer.
*/
int distanceTo(final byte[] bytes, final int fromIndex) {
assert 0 <= fromIndex && fromIndex < byteArray.length;
int index = fromIndex;
for (; index < byteArray.length; ++index) {
if (containsByte(bytes, byteArray[index])) {
break;
}
}
return index - fromIndex;
}
private boolean containsByte(final byte[] haystack, final byte needle) {
for (final byte byteValue : haystack) {
if (byteValue == needle) {
return true;
}
}
return false;
}
/**
* Returns a string containing the characters in the specified part of the
* record.
*
* @param fromIndex index of the first byte of the string.
* @param length number of bytes to include in the string. If zero an empty
* string is returned.
* @param charset used for decoding the byte sequence into characters. It is
* callers responsibility to make sure that the selected byte
* range contains a valid byte sequence when working with
* multi-byte encodings such as UTF-8.
* @return the string represented by the bytes in the given range
*/
String stringAt(final int fromIndex, final int length,
final Charset charset) {
return new String(byteArray, fromIndex, length, charset);
}
/**
* Returns the character value at {@code index}.
*
* @param index position of the byte in the buffer
* @return the character value of the byte at the given index
*/
char charAt(final int index) {
return byteToChar(index);
}
char[] charsAt(final int fromIndex, final int length) {
assert length >= 0;
assert 0 <= fromIndex && (fromIndex + length) <= byteArray.length;
final char[] chars = new char[length];
for (int i = 0; i < length; ++i) {
chars[i] = byteToChar(fromIndex + i);
}
return chars;
}
private char byteToChar(final int index) {
final byte value = byteArray[index];
if (value < 0) {
throw new FormatException("Invalid character code found at index " +
index);
}
return (char) value;
}
byte byteAt(final int index) {
return byteArray[index];
}
/**
* Parses the character at {@code index} into an integer.
*
* @param index position of the byte to convert into an integer
* @return the integer value represented by the character at the given
* position.
* @throws NumberFormatException if a non-digit character is encountered.
*/
int parseIntAt(final int index) {
return byteToDigit(index);
}
/**
* Parses characters in the specified part of the byte buffer into an integer
* This is a simplified version of {@link Integer#parseInt(String)}. It
* operates directly on the byte data and works only for positive numbers and
* a radix of 10.
*
* @param fromIndex position fo the byte range to convert into an integer
* @param length number of bytes to include in the range
* @return the integer value represented by the characters at the given
* range in the buffer.
* @throws NumberFormatException if a non-digit character was encountered or
* an overflow occurred.
*/
int parseIntAt(final int fromIndex, final int length) {
assert length >= 0;
assert 0 <= fromIndex && (fromIndex + length) <= byteArray.length;
final int multiplyMax = Integer.MAX_VALUE / RADIX;
int result = 0;
for (int i = 0; i < length; ++i) {
if (result > multiplyMax) {
throwNumberIsToLargeException(fromIndex);
}
result *= RADIX;
final int digit = byteToDigit(fromIndex + i);
if (result > Integer.MAX_VALUE - digit) {
throwNumberIsToLargeException(fromIndex);
}
result += digit;
}
return result;
}
private int byteToDigit(final int index) {
final byte digit = byteArray[index];
if (digit < Iso646Constants.ZERO || Iso646Constants.NINE < digit) {
throw new NumberFormatException("digit expected at index " + index +
" but got 0x" + Integer.toHexString(digit));
}
return digit - Iso646Constants.ZERO;
}
private void throwNumberIsToLargeException(final int index) {
throw new NumberFormatException("number starting at index " + index +
" is too large");
}
void writeChar(final char charValue) {
assert charValue <= Iso646Constants.MAX_CHAR_CODE;
byteArray[writePosition] = (byte) charValue;
writePosition += 1;
}
void writeChars(final char[] chars) {
assert (writePosition + chars.length) <= byteArray.length;
for (final char charValue : chars) {
writeChar(charValue);
}
}
void writeByte(final byte value) {
byteArray[writePosition] = value;
writePosition += 1;
}
void writeBytes(final byte[] array) {
System.arraycopy(array, 0, byteArray, writePosition, array.length);
writePosition += array.length;
}
void writeInt(final int value) {
assert 0 <= value && value < RADIX;
byteArray[writePosition] = (byte) (Iso646Constants.ZERO + value);
writePosition += 1;
}
void writeInt(final int value, final int digits) {
assert value >= 0;
assert digits >= 0;
assert (writePosition + digits) <= byteArray.length;
int head = value;
for (int i = writePosition + digits - 1; i >= writePosition; --i) {
byteArray[i] = (byte) (Iso646Constants.ZERO + head % RADIX);
head /= RADIX;
}
assert head == 0;
writePosition += digits;
}
@Override
public String toString() {
return stringAt(0, byteArray.length, Iso646Constants.CHARSET);
}
}