All Downloads are FREE. Search and download functionalities are using the official Maven repository.

convex.core.data.AString Maven / Gradle / Ivy

The newest version!
package convex.core.data;

import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.charset.CharsetDecoder;
import java.nio.charset.CoderResult;

import convex.core.data.prim.CVMChar;
import convex.core.data.type.AType;
import convex.core.data.type.Types;
import convex.core.data.util.BlobBuilder;
import convex.core.util.Utils;

/**
 * Abstract base Class representing a CVM String. 
 * 
 * CVM Strings are UTF-8 byte strings with an immutable, fixed count in bytes. 
 * 
 * CVM Strings are NOT enforced to be valid UTF-8, for reasons of performance, simplicity and
 * consistent behaviour (e.g. in conversions to and from Blobs). It is up to clients to decide 
 * how to represent invalid UTF-8 if necessary.
 */
public abstract class AString extends ABlobLike {

	protected long length;
	
	protected AString(long length) {
		this.length=length;
	}
	
	@Override
	public AType getType() {
		return Types.STRING;
	}

	@Override
	public boolean print(BlobBuilder sb, long limit) {
		long n=count();
		if (!sb.check(limit-(n+2))) {
			// Can't print full string, but attempt up to limit
			long avail=limit-sb.count();
			if (avail>0) {
				sb.append('"');
				sb.append(slice(0,avail-1));
			}
			return false;
		}
		sb.append('"');
		printEscaped(sb,0,n);
		sb.append('"');
		return sb.check(limit);
	}
	
	@Override
	public long count() {
		return length;
	}
	
	/**
	 * Prints this string as escaped UTF-8
	 * @param sb
	 */
	protected abstract void printEscaped(BlobBuilder sb, long start, long end);
	
	/**
	 * Returns the singleton empty String
	 */
	@Override
	public final StringShort empty() {
		return StringShort.EMPTY;
	}

	/**
	 * Gets the Unicode character at position i, or -1 if not valid
	 * @param i Index into String (byte position)
	 * @return Unicode code point, or -1 if not a valid code point at this position
	 */
	public final int charAt(long i) {
		int utf=intAt(i);
		int cp=CVMChar.codepointFromUTFInt(utf);
		return cp;
	}
	
	/**
	 * Gets 32 bytes integer at given position. Extends with 255 (invalid UTF-8) if needed. The
	 * main purpose of this function is to enable fast peeking at UTF-8 characters
	 * 
	 * @param index Index into String (byte position)
	 * @return Raw integer value
	 */
	public int intAt(long index) {
		int r=0;
		for (int i=0; i<4; i++) {
			r|=(0xff&byteAt(index+i))<<(8*(3-i));
		}
		return r;
	}
	
	/**
	 * Gets a byte at the specified index. Returns -1 (0xff) if outside String.
	 * @param i Index into String (byte position)
	 * @return Raw byte value
	 */
	@Override
	public abstract byte byteAt(long i);
	
	/**
	 * Gets the Character at the specified point in the String, or null 
	 * if there is no valid Character at this position.
	 * 
	 * @return CVMChar instance, or null for invalid UTF-8 or any character out of the string bounds
	 */
	@Override
	public CVMChar get(long i) {
		return CVMChar.create(charAt((int)i));
	}
	
	@Override
	public Ref getElementRef(long i) {
		return get(i).getRef();
	}
	
	@Override
	public int getBytes(byte[] dest, int destOffset) {
		return toBlob().getBytes(dest, destOffset);
	}
	
	@Override 
	public String toString() {
		int n=Utils.checkedInt(count());
		ByteBuffer bb=toBlob().toByteBuffer();
		
		int cn=Math.min(4096, n); // Guess sensible size for CharBuffer
		CharBuffer cb=CharBuffer.allocate(cn);
		CharsetDecoder dec=Strings.getDecoder();
		StringBuilder sb=new StringBuilder(cn);
		
		while (bb.hasRemaining()) {
			CoderResult cr=dec.decode(bb, cb, false);
			cb.flip();
			sb.append(cb.toString());
			cb.clear();
			if (cr==CoderResult.UNDERFLOW) break;
		}
		dec.decode(bb,cb,true); // Mark end of input
		cb.flip();
		sb.append(cb.toString());
		cb.clear();
		
		return sb.toString();
	}
	
	@Override
	public long hexMatch(ABlobLike b, long start, long length) {
		return toBlob().hexMatch(b, start, length);
	}
	
	@Override
	public AString toCVMString(long limit) {
		if (limit split(CVMChar c) {
		long start=0;
		AVector acc=Vectors.empty();
		final long n=count();
		int cp=c.getCodePoint();
		int utfLength=CVMChar.utfLength(cp);
		for (int pos=0; pos




© 2015 - 2024 Weber Informatics LLC | Privacy Policy