All Downloads are FREE. Search and download functionalities are using the official Maven repository.

convex.core.data.ACell Maven / Gradle / Ivy

The newest version!
package convex.core.data;

import convex.core.Constants;
import convex.core.data.type.AType;
import convex.core.data.type.Types;
import convex.core.data.util.BlobBuilder;
import convex.core.exceptions.InvalidDataException;
import convex.core.util.Utils;

/**
 * Abstract base class for Cells.
 * 
 * Cells may contain Refs to other Cells, which can be tested with getRefCount()
 * 
 * All data objects intended for on-chain usage / serialisation should extend this. 
 * 
 * "It is better to have 100 functions operate on one data structure than 
 * to have 10 functions operate on 10 data structures." - Alan Perlis
 */
public abstract class ACell extends AObject implements IWriteable, IValidated {
	/**
	 * We cache the computed memorySize. May be 0 for embedded objects
	 * -1 is initial value for when size is not calculated
	 */
	protected long memorySize=-1;
	
	/**
	 * Cached Ref. This is useful to manage persistence. Also cached Ref MUST refer to canonical value
	 */
	protected Ref cachedRef=null;

	@Override
	public void validate() throws InvalidDataException {
		validateCell();
	}
	
	/**
	 * Validates the local structure and invariants of this cell. Called by validate() super implementation.
	 * 
	 * Should validate directly contained data, but should not validate all other structure of this cell. 
	 * 
	 * In particular, should not traverse potentially missing child Refs.
	 * 
	 * @throws InvalidDataException  If the Cell is invalid
	 */
	public abstract void validateCell() throws InvalidDataException;
	
	/**
	 * Hash of data Encoding of this cell, equivalent to the Value ID. Calling this method
	 * may force hash computation if needed.
	 * 
	 * @return The Hash of this cell's encoding.
	 */
	public final Hash getHash() {
		// final method to avoid any mistakes.
		return getEncoding().getContentHash();
	}
	
	/**
	 * Gets the tag byte for this cell. The tag byte is always equal to the 
	 * first byte of the Cell's canonical Encoding, and is sufficient to distinguish 
	 * how to read the rest of the encoding.
	 * 
	 * @return Tag byte for this Cell
	 */
	public abstract byte getTag();
	
	/**
	 * Gets the Hash if already computed, or null if not yet available
	 * @return Cached Hash value, or null if not available
	 */
	protected final Hash cachedHash() {
		Ref ref=cachedRef;
		if (ref!=null) {
			Hash h=ref.cachedHash();
			if (h!=null) return h;
		}
		Blob enc=encoding;
		if (enc==null) return null;
		return enc.contentHash;
	}

	/**
	 * Gets the Java hashCode for this cell. Must be consistent with equals. 
	 * 
	 * Default is the hashCode of the Encoding blob, since this is consistent with
	 * encoding-based equality. However, different Types may provide more efficient hashcodes provided that
	 * the usual invariants are preserved
	 * 
	 * @return integer hash code.
	 */
	@Override
	public int hashCode() {
		return getEncoding().hashCode();
	}
	
	@Override
	public boolean equals(Object a) {
		if (a==this) return true; // Fast path, avoids cast
		if (!(a instanceof ACell)) return false; // Handles null
		return equals((ACell)a);
	}
	
	/**
	 * Gets the canonical encoded byte representation of this cell.
	 * 
	 * @return A Blob representing this cell in encoded form
	 */
	public final Blob getEncoding() {
		if (encoding!=null) return encoding;
		encoding=createEncoding();
		return encoding;
	}
	
	/**
	 * Gets the canonical representation of this Cell. 
	 * 
	 * O(1) if canonical representation is already generated, may be O(n) otherwise.
	 * 
	 * @return Canonical version of this cell in encoded form
	 */
	@SuppressWarnings("unchecked")
	public final  T getCanonical() {
		if (isCanonical()) return (T) this;
		Ref ref=getRef().ensureCanonical();
		if (cachedRef!=ref) cachedRef=ref;
		T c= (T) ref.getValue();
		return c;
	}
	
	/**
	 * Checks for equality with another Cell. In general, Cells are considered equal
	 * if they have the same canonical representation, i.e. an identical encoding with the same hash value.
	 * 
	 * Subclasses SHOULD override this if they have a more efficient equals implementation. 
	 * 
	 * MUST NOT require reads from Store.
	 * 
	 * @param a Cell to compare with. May be null.
	 * @return True if this cell is equal to the other object
	 */
	public abstract boolean equals(ACell a);
	
	/**
	 * Generic Cell equality, use only if better implementation not available.
	 * @param a First cell to compare
	 * @param b Second cell to compare
	 * @return True if cells are equal, false otherwise
	 */
	protected static boolean genericEquals(ACell a, ACell b) {
		if (a==b) return true; // important optimisation for e.g. hashmap equality
		if ((b==null)||(a==null)) return false; // no non-null Cell is equal to null
		if (!(a.getTag()==b.getTag())) return false; // Different tags never equal
		
		// Check hashes for equality if they exist
		Hash ha=a.cachedHash();
		if (ha!=null) {
			Hash hb=b.cachedHash();
			if (hb!=null) return ha.equals(hb);
		}

		// Else default to checking encodings
		// We would need to get encodings anyway to compute a Hash....
		return a.getEncoding().equals(b.getEncoding());
	}
	
	/**
	 * Writes this Cell's encoding to a byte array, including a tag byte which will be written first.
	 * 
	 * Cell must be canonical, or else an error may occur.
	 *
	 * @param bs A byte array to which to write the encoding
	 * @param pos The offset into the byte array
	 * 
	 * @return New position after writing
	 */
	public abstract int encode(byte[] bs, int pos);
	
	/**
	 * Writes this Cell's encoding to a byte array, excluding the tag byte.
	 *
	 * @param bs A byte array to which to write the encoding
	 * @param pos The offset into the byte array
	 * @return New position after writing
	 */
	protected abstract int encodeRaw(byte[] bs, int pos);
	
	/**
	 * Creates the encoding for this cell. Cell must be canonical, or else an error may occur.
	 * 
	 * The encoding itself is a raw Blob, which may be non-canonical. 
	 */
	@Override
	protected final Blob createEncoding() {
		int capacity=estimatedEncodingSize();
		byte[] bs;
		int pos=0;
		while (true) {
			try {
				bs=new byte[capacity];
				pos=encode(bs,pos);
				break;
			} catch (IndexOutOfBoundsException be) {
				if (capacity>Format.LIMIT_ENCODING_LENGTH) throw new IllegalStateException("Encoding size limit exceeded in cell: "+this);
				
				// We really want to eliminate these, because exception handling is expensive
				// However don't want to be too conservative or we waste memory
				// System.out.println("Insufficient encoding size: "+capacity+ " for "+this.getClass());
				capacity=capacity*2+10;
			}
		}
		return Blob.wrap(bs,0,pos);
	}
	
	/**
	 * Returns the Java String representation of this Cell.
	 * 
	 * The String representation is intended to be a easy-to-read textual representation of the Cell's data content.
	 *
	 */
	@Override
	public String toString() {
		return print().toString();
	}
	
	/**
	 * Returns the CVM String representation of this Cell. Normally, this is as printed, but may be different for some types.
	 * 
	 * SHOULD return null in O(1) time if the length of the CVM String can be proved to exceed the limit.
	 * MUST complete in O(limit) time and space otherwise
	 * 
	 * The String representation is intended to be a easy-to-read textual representation of the Cell's data content.
	 * @param limit Limit of CVM String length in UTF-8 bytes
	 * @return CVM String, or null if limit exceeded
	 *
	 */
	public AString toCVMString(long limit) {
		BlobBuilder bb=new BlobBuilder();
		if (!print(bb, limit)) return null;
		return bb.getCVMString();
	}

	/**
	 * Gets the cached blob representing this Cell's Encoding in binary format, if it exists.
	 * 
	 * @return The cached blob for this cell, or null if not yet available. 
	 */
	public Blob cachedEncoding() {
		return encoding;
	}

	/**
	 * Calculates the Memory Size for this Cell. Assumes not already calculated
	 * 
	 * Requires any child Refs to be either direct or of persisted in store at minimum, 
	 * or you might get a MissingDataException
	 * 
	 * @return Memory Size of this Cell
	 */
	protected long calcMemorySize() {	
		// add extra size for each child Ref (might be zero if embedded)
		long result=0;
		int n=getRefCount();
		for (int i=0; i childRef=getRef(i);
			long childSize=childRef.getMemorySize();
			result=Utils.memoryAdd(result,childSize);
		}
		
		if (!isEmbedded()) {
			// We need to count this cell's own encoding length
			// Plus overhead for storage of non-embedded cell
			long encodingLength=getEncodingLength();
			result=Utils.memoryAdd(result,encodingLength+Constants.MEMORY_OVERHEAD);
		} 
		return result;
	}
	
	/**
	 * Method to calculate the encoding length of a Cell. May be overridden to avoid
	 * creating encodings during memory size calculations. This reduces hashing!
	 * 
	 * @return Exact encoding length of this Cell
	 */
	public int getEncodingLength() {
		return getEncoding().size();
	}

	/**
	 * Gets the Memory Size of this Cell, computing it if required.
	 * 
	 * The memory size is the total storage requirement for this cell. Embedded cells do not require storage for
	 * their own encoding, but may require storage for nested branches.
	 * 
	 * @return Memory Size of this Cell
	 */
	public final long getMemorySize() {
		long ms=memorySize;
		if (ms>=0) return ms;
		ms=calcMemorySize();
		this.memorySize=ms;
		return ms;
	}
	
	/**
	 * Gets the Memory Size of a Cell, computing it if required.
	 * 
	 * The memory size is the total storage requirement for this cell. Embedded cells do not require storage for
	 * their own encoding, but may require storage for nested branches.
	 * 
	 * @return Memory Size of this Cell
	 */
	public static long getMemorySize(ACell a) {
		return (a==null)?0:a.getMemorySize(); 
	}

	/**
	 * Determines if this Cell Represents an embedded object. Embedded objects are encoded directly into
	 * the encoding of the containing Cell (avoiding the need for a hashed reference). 
	 * 
	 * Subclasses should override this if they have a cheap O(1) 
	 * way to determine if they are embedded or otherwise. 
	 * 
	 * @return true if Cell is embedded, false otherwise
	 */
	public boolean isEmbedded() {
		if (memorySize==Format.FULL_EMBEDDED_MEMORY_SIZE) return true;
		if (cachedRef!=null) {
			int flags=cachedRef.flags;
			if ((flags&Ref.KNOWN_EMBEDDED_MASK)!=0) return true;
			if ((flags&Ref.NON_EMBEDDED_MASK)!=0) return false;
		} else {
			cachedRef=createRef();
		}
		boolean embedded= getEncodingLength()<=Format.MAX_EMBEDDED_LENGTH;
		cachedRef.flags|=(embedded)?Ref.KNOWN_EMBEDDED_MASK:Ref.NON_EMBEDDED_MASK;
		return embedded;
	}
	
	/**
	 * Returns true if this Cell is in a canonical representation.
	 * 
	 * Non-canonical objects may be used on a temporary internal basis, they should
	 * be converted to canonical representations for general purpose use.
	 * 
	 * @return true if the object is in canonical format, false otherwise
	 */
	public abstract boolean isCanonical();
	
	/**
	 * Converts this Cell to a canonical version. Must return this Cell if already canonical, may be O(n) in size of value otherwise.
	 * 
	 * Callers should usually use getCanonical(), which caches canonical instances once created 
	 * 
	 * @return Canonical version of Cell
	 */
	protected abstract ACell toCanonical();
	
	/**
	 * Returns true if this cell is a first class CVM Value allowable in the CVM state
	 * 
	 * Sub-structural cells that are not themselves first class values
	 * should return false
	 * 
	 * Records and types that are not permissible on the CVM should return false.
	 * 
	 * Pretty much everything else should return true.
	 * 
	 * Note: CVM values might not be in a canonical format, e.g. temporary data structures
	 * 
	 * @return true if the object is a CVM Value, false otherwise
	 */
	public abstract boolean isCVMValue();
	
	/**
	 * Returns true if this cell instance is a first class value, i.e. not a component of a larger data structure
	 * 
	 * Sub-structural cells that are not themselves first class values should return false
	 * 
	 * Everything else should return true.
	 * 
	 * @return true if the object is a Value, false otherwise
	 */
	public abstract boolean isDataValue();

	/**
	 * Gets the Ref for this Cell, creating a new direct reference if necessary
	 * 
	 * @param  Type of Cell
	 * @return Ref for this Cell
	 */
	@SuppressWarnings("unchecked")
	public final  Ref getRef() {
		if (cachedRef!=null) return (Ref) cachedRef;
		Ref newRef=createRef();
		cachedRef=(Ref) newRef;
		return newRef;
	}
	
	/**
	 * Creates a new Ref for this Cell
	 * @param  Type of Cell
	 * @return New Ref instance
	 */
	@SuppressWarnings("unchecked")
	protected  Ref createRef() {
		Ref newRef= RefDirect.create(this);
		cachedRef=newRef;
		return (Ref) newRef;
	}
	
	/**
	 * Gets the number of Refs contained within this Cell. This number is
	 * final / immutable for any given instance and is defined by the Cell encoding rules.
	 * 
	 * Contained Refs may be either external or embedded.
	 * 
	 * @return The number of Refs in this Cell
	 */
	public int getRefCount() {
		ACell canonical=getCanonical();
		assert (canonical!=this) : "Canonical getRefCount not implemented: " + this.getClass();
		return canonical.getRefCount();
	}
	
	/**
	 * Gets a numbered child Ref from within this Cell.
	 * WARNING: May need to convert to a canonical instance
	 * 
	 * @param  Type of referenced Cell
	 * @param i Index of ref to get
	 * @return The Ref at the specified index
	 * @throws IndexOutOfBoundsException if the Ref index is invalid
	 */
	public  Ref getRef(int i) {
		ACell canonical=getCanonical();
		assert (canonical!=this) : "Canonical getRef not implemented: " + this.getClass();
		return getCanonical().getRef(i);
	}
	
	/**
	 * Updates all child Refs in this object using the given function.
	 * 
	 * This clears the currently cached Ref if an update occurred. This is because, presumably, 
	 * a new Ref for this cell needs to be created.
	 * 
	 * The function *must not* change the hash value of Refs, in order to ensure
	 * structural integrity of modified data structures.
	 * 
	 * The implementation *should* re-attach any original encoding in order to
	 * prevent re-encoding or surplus hashing
	 * 
	 * This is a building block for a very sneaky trick that enables use to do a lot
	 * of efficient operations on large trees of smart references.
	 * 
	 * Must return the same object if no Refs are altered.
	 * @param func Ref update function
	 * @return Cell with updated Refs
	 */
	public ACell updateRefs(IRefFunction func) {
		ACell canonical=getCanonical();
		assert (canonical!=this) : "Canonical updateRefs not implemented: " + this.getClass();
		return canonical.updateRefs(func);
	}

	/**
	 * Gets an array of child Refs for this Cell, in the same order as order accessible by
	 * getRef. 
	 * 
	 * Concrete implementations may override this to optimise performance.
	 * 
	 * @param  Type of referenced Cell
	 * @return Array of Refs
	 */
	@SuppressWarnings("unchecked")
	public  Ref[] getChildRefs() {
		int n = getRefCount();
		Ref[] refs = new Ref[n];
		for (int i = 0; i < n; i++) {
			refs[i] = getRef(i);
		}
		return refs;
	}
	
	/**
	 * Gets the most specific known runtime Type for this Cell.
	 * @return The Type of this Call
	 */
	public AType getType() {
		return Types.ANY;
	}

	/**
	 * Updates the memorySize of this Cell
	 * 
	 * Not valid for embedded Cells, may throw IllegalOperationException()
	 * 
	 * @param memorySize Memory size to assign
	 */
	public void attachMemorySize(long memorySize) {
		if (this.memorySize<0) {
			this.memorySize=memorySize;
			assert (this.memorySize>0) : "Attempting to attach memory size "+memorySize+" to object of class "+Utils.getClassName(this);
		} else {
			assert (this.memorySize==memorySize) : "Trying to change memory size to "+memorySize+" with object of class "+Utils.getClassName(this)+" which already has memorySize "+this.memorySize;
		}
	}
	
	/**
	 * Sets the cached ref of this Cell if it is not already set. USe with caution.
	 * 
	 * @param ref Ref to assign
	 */
	@SuppressWarnings("unchecked")
	public void attachRef(Ref ref) {
		Ref current=this.cachedRef;
		if (current!=null) {
			// This solves problem of trashing internal cached refs
			if (ref.getStatus()<=current.getStatus()) return;
		//	return;
		//	// throw new IllegalStateException("Cell of type "+Utils.getClassName(this)+" already has cached Ref");
		}
		this.cachedRef=(Ref) ref;
	}

	/**
	 * Tests if this Cell is completely encoded, i.e. has no external Refs. This implies that the 
	 * complete Cell can be represented in a single encoding.
	 * @return true if completely encoded, false otherwise
	 */
	boolean isCompletelyEncoded() {
		if (memorySize==Format.FULL_EMBEDDED_MEMORY_SIZE) return true; // fast path for fully embedded
		int n=getRefCount();
		for (int i=0; i r=getRef(i);
			if (!r.isEmbedded()) return false;
			ACell child=r.getValue();
			if (child!=null) {
				if (!child.isCompletelyEncoded()) return false; // Should be safe from missing?
			}
		}
		return true;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy