org.exist.util.CompressedWhitespace Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of exist-core Show documentation
Show all versions of exist-core Show documentation
eXist-db NoSQL Database Core
/*
* eXist-db Open Source Native XML Database
* Copyright (C) 2001 The eXist-db Authors
*
* [email protected]
* http://www.exist-db.org
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this library; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
package org.exist.util;
import java.io.Writer;
/**
* This class provides a compressed representation of a sequence of whitespace characters. The representation
* is a sequence of bytes: in each byte the top two bits indicate which whitespace character is used
* (x9, xA, xD, or x20) and the bottom six bits indicate the number of such characters. A zero byte is a filler.
* We don't compress the sequence if it would occupy more than 8 bytes, because that's the space we've got available
* in the TinyTree arrays.
*/
public class CompressedWhitespace implements CharSequence {
private static char[] WHITE_CHARS = {0x09, 0x0A, 0x0D, 0x20};
private long value;
public CompressedWhitespace(long compressedValue) {
this.value = compressedValue;
}
/**
* Attempt to compress a CharSequence
* @param in the CharSequence to be compressed
* @return the compressed sequence if it can be compressed; or the original CharSequence otherwise
*/
public static CharSequence compress(CharSequence in) {
final int inlen = in.length();
if (inlen == 0) {
return in;
}
int runlength = 1;
int outlength = 0;
for (int i=0; i= 0) {
if (i == inlen-1 || c != in.charAt(i+1) || runlength == 63) {
runlength = 1;
outlength++;
if (outlength > 8) {
return in;
}
} else {
runlength++;
}
} else {
return in;
}
}
int ix = 0;
runlength = 1;
final int[] out = new int[outlength];
for (int i=0; i=0; s-=8) {
final byte b = (byte)((val>>>s) & 0xff);
if (b == 0) {
break;
}
final char c = WHITE_CHARS[b>>>6 & 0x3];
final int len = (b & 0x3f);
for (int j=0; j=0; s-=8) {
int c = (int)((val>>>s) & 0x3f);
if (c == 0) {
break;
}
count += c;
}
return count;
}
/**
* Returns the char
value at the specified index. An index ranges from zero
* to length() - 1
. The first char
value of the sequence is at
* index zero, the next at index one, and so on, as for array
* indexing.
*
* If the char
value specified by the index is a
* surrogate, the surrogate
* value is returned.
*
* @param index the index of the char
value to be returned
* @return the specified char
value
* @throws IndexOutOfBoundsException if the index
argument is negative or not less than
* length()
*/
public char charAt(int index) {
int count = 0;
final long val = value;
for (int s=56; s>=0; s-=8) {
final byte b = (byte)((val>>>s) & 0xff);
if (b == 0) {
break;
}
count += (b & 0x3f);
if (count > index) {
return WHITE_CHARS[b>>>6 & 0x3];
}
}
throw new IndexOutOfBoundsException(index+"");
}
/**
* Returns a new CharSequence
that is a subsequence of this sequence.
* The subsequence starts with the char
value at the specified index and
* ends with the char
value at index end - 1
. The length
* (in char
s) of the
* returned sequence is end - start
, so if start == end
* then an empty sequence is returned.
*
* @param start the start index, inclusive
* @param end the end index, exclusive
* @return the specified subsequence
* @throws IndexOutOfBoundsException if start
or end
are negative,
* if end
is greater than length()
,
* or if start
is greater than end
*/
public CharSequence subSequence(int start, int end) {
return uncompress(null).subSequence(start, end);
}
/**
* Indicates whether some other object is "equal to" this one.
*/
public boolean equals(Object obj) {
if (obj instanceof CompressedWhitespace) {
return value == ((CompressedWhitespace)obj).value;
}
return uncompress(null).equals(obj);
}
/**
* Returns a hash code value for the object.
*/
public int hashCode() {
return uncompress(null).hashCode();
}
/**
* Returns a string representation of the object.
*/
public String toString() {
return uncompress(null).toString();
}
/**
* Write the value to a Writer
*
* @param writer the writer
*
* @throws java.io.IOException if an error occurs whilst writing
*/
public void write(Writer writer) throws java.io.IOException {
final long val = value;
for (int s=56; s>=0; s-=8) {
final byte b = (byte)((val>>>s) & 0xff);
if (b == 0) {
break;
}
final char c = WHITE_CHARS[b>>>6 & 0x3];
final int len = (b & 0x3f);
for (int j=0; j=0; s-=8) {
final byte b = (byte)((val>>>s) & 0xff);
if (b == 0) {
break;
}
final char c = WHITE_CHARS[b>>>6 & 0x3];
final int len = (b & 0x3f);
if (specialChars[c]) {
String e = "";
if (c=='\n') {
e = "
";
} else if (c=='\r') {
e = "
";
} else if (c=='\t') {
e = " ";
}
for (int j=0; j
© 2015 - 2025 Weber Informatics LLC | Privacy Policy