convex.core.data.AString Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of convex-core Show documentation
Show all versions of convex-core Show documentation
Convex core libraries and common utilities
The newest version!
package convex.core.data;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;
import convex.core.data.prim.CVMChar;
import convex.core.data.type.AType;
import convex.core.data.type.Types;
import convex.core.data.util.BlobBuilder;
import convex.core.util.Utils;
/**
* Abstract base Class representing a CVM String.
*
* CVM Strings are UTF-8 byte strings with an immutable, fixed count in bytes.
*
* CVM Strings are NOT enforced to be valid UTF-8, for reasons of performance, simplicity and
* consistent behaviour (e.g. in conversions to and from Blobs). It is up to clients to decide
* how to represent invalid UTF-8 if necessary.
*/
public abstract class AString extends ABlobLike {
protected long length;
protected AString(long length) {
this.length=length;
}
@Override
public AType getType() {
return Types.STRING;
}
@Override
public boolean print(BlobBuilder sb, long limit) {
long n=count();
if (!sb.check(limit-(n+2))) {
// Can't print full string, but attempt up to limit
long avail=limit-sb.count();
if (avail>0) {
sb.append('"');
sb.append(slice(0,avail-1));
}
return false;
}
sb.append('"');
printEscaped(sb,0,n);
sb.append('"');
return sb.check(limit);
}
@Override
public long count() {
return length;
}
/**
* Prints this string as escaped UTF-8
* @param sb
*/
protected abstract void printEscaped(BlobBuilder sb, long start, long end);
/**
* Returns the singleton empty String
*/
@Override
public final StringShort empty() {
return StringShort.EMPTY;
}
/**
* Gets the Unicode character at position i, or -1 if not valid
* @param i Index into String (byte position)
* @return Unicode code point, or -1 if not a valid code point at this position
*/
public final int charAt(long i) {
int utf=intAt(i);
int cp=CVMChar.codepointFromUTFInt(utf);
return cp;
}
/**
* Gets 32 bytes integer at given position. Extends with 255 (invalid UTF-8) if needed. The
* main purpose of this function is to enable fast peeking at UTF-8 characters
*
* @param index Index into String (byte position)
* @return Raw integer value
*/
public int intAt(long index) {
int r=0;
for (int i=0; i<4; i++) {
r|=(0xff&byteAt(index+i))<<(8*(3-i));
}
return r;
}
/**
* Gets a byte at the specified index. Returns -1 (0xff) if outside String.
* @param i Index into String (byte position)
* @return Raw byte value
*/
@Override
public abstract byte byteAt(long i);
/**
* Gets the Character at the specified point in the String, or null
* if there is no valid Character at this position.
*
* @return CVMChar instance, or null for invalid UTF-8 or any character out of the string bounds
*/
@Override
public CVMChar get(long i) {
return CVMChar.create(charAt((int)i));
}
@Override
public Ref getElementRef(long i) {
return get(i).getRef();
}
@Override
public int getBytes(byte[] dest, int destOffset) {
return toBlob().getBytes(dest, destOffset);
}
@Override
public String toString() {
int n=Utils.checkedInt(count());
ByteBuffer bb=toBlob().toByteBuffer();
int cn=Math.min(4096, n); // Guess sensible size for CharBuffer
CharBuffer cb=CharBuffer.allocate(cn);
CharsetDecoder dec=Strings.getDecoder();
StringBuilder sb=new StringBuilder(cn);
while (bb.hasRemaining()) {
CoderResult cr=dec.decode(bb, cb, false);
cb.flip();
sb.append(cb.toString());
cb.clear();
if (cr==CoderResult.UNDERFLOW) break;
}
dec.decode(bb,cb,true); // Mark end of input
cb.flip();
sb.append(cb.toString());
cb.clear();
return sb.toString();
}
@Override
public long hexMatch(ABlobLike> b, long start, long length) {
return toBlob().hexMatch(b, start, length);
}
@Override
public AString toCVMString(long limit) {
if (limit split(CVMChar c) {
long start=0;
AVector acc=Vectors.empty();
final long n=count();
int cp=c.getCodePoint();
int utfLength=CVMChar.utfLength(cp);
for (int pos=0; pos