All Downloads are FREE. Search and download functionalities are using the official Maven repository.

convex.core.data.SetTree Maven / Gradle / Ivy

The newest version!
package convex.core.data;

import convex.core.exceptions.BadFormatException;
import convex.core.exceptions.InvalidDataException;
import convex.core.exceptions.Panic;
import convex.core.util.Bits;
import convex.core.util.Utils;

/**
 * Persistent Set for large hash sets requiring tree structure.
 * 
 * Internally implemented as a radix tree, indexed by key hash. Uses an array of
 * child Maps, with a bitmap mask indicating which hex digits are present, i.e.
 * have non-empty children.
 *
 * @param  Type of Set elements
 */
public class SetTree extends AHashSet {

	/**
	 * Child maps, one for each present bit in the mask, max 16
	 */
	private final Ref>[] children;

	/**
	 * Shift position of this @link SetTree node in number of hex digits. 0 at top level, 1 at next level up etc
	 */
	final int shift;

	/**
	 * Mask indicating which hex digits are present in the child array e.g. 0x0001
	 * indicates all children are in the '0' digit. e.g. 0xFFFF indicates there are
	 * children for every digit.
	 */
	final short mask;

	private SetTree(Ref>[] children, int shift, short mask, long count) {
		super(count);
		this.children = children;
		this.shift = shift;
		this.mask = mask;
	}
	
	public static  SetTree unsafeCreate(Ref>[] children,int shift, short mask, long count) {
		return new SetTree(children,shift,mask,count);
	}

	/**
	 * Computes the total count from an array of Refs to sets. Ignores null Refs in
	 * child array
	 * 
	 * @param children
	 * @return The total count of all child maps
	 */
	private static  long computeCount(Ref>[] children) {
		long n = 0;
		for (Ref> cref : children) {
			if (cref == null) continue;
			ASet m = cref.getValue();
			n += m.count();
		}
		return n;
	}

	/**
	 * Create a SetTree given a number of element Refs to distribute among children.
	 * O(n) in number of elements.
	 * 
	 * @param  Type of elements
	 * @param elementRefs Array of Refs to elements
	 * @param shift Hex digit position at which to split children.
	 * @return New SetTree node
	 */
	@SuppressWarnings("unchecked")
	public static  SetTree create(Ref[] elementRefs, int shift) {
		int n = elementRefs.length;
		if (n <= SetLeaf.MAX_ELEMENTS) {
			throw new IllegalArgumentException(
					"Insufficient distinct entries for TreeMap construction: " + elementRefs.length);
		}

		// construct full child array
		Ref>[] children = new Ref[16];
		for (int i = 0; i < n; i++) {
			Ref e = elementRefs[i];
			int ix = e.getHash().getHexDigit(shift);
			Ref> ref = children[ix];
			if (ref == null) {
				children[ix] = SetLeaf.create(e).getRef();
			} else {
				AHashSet newChild=ref.getValue().includeRef(e,shift+1);
				children[ix] = newChild.getRef();
			}
		}
		return (SetTree) createFull(children, shift);
	}

	/**
	 * Creates a SetTree given child Refs for each digit
	 * 
	 * @param children An array of children, may be null or refer to empty Sets which
	 *                 will be filtered out
	 * @return
	 */
	private static  AHashSet createFull(Ref>[] children, int shift, long count) {
		if (children.length != 16) throw new IllegalArgumentException("16 children required!");
		Ref>[] newChildren = Utils.filterArray(children, a -> {
			if (a == null) return false;
			AHashSet m = a.getValue();
			return ((m != null) && !m.isEmpty());
		});

		if (children != newChildren) {
			return create(newChildren, shift, Utils.computeMask(children, newChildren), count);
		} else {
			return create(children, shift, (short) 0xFFFF, count);
		}
	}

	/**
	 * Create a SetTree with a full compliment of 16 children.
	 * @param  Type of Set elements
	 * @param newChildren
	 * @param shift Shift for child node
	 * @return
	 */
	private static  AHashSet createFull(Ref>[] newChildren, int shift) {
		long count=computeCount(newChildren);
		return createFull(newChildren, shift, count);
	}

	/**
	 * Creates a Set using specified child Set Refs. Removes empty Sets passed as
	 * children.
	 * 
	 * Returns a SetLeaf for small Sets.
	 * 
	 * @param children Array of Refs to child sets for each bit in mask
	 * @param shift    Shift position (hex digit in hashes for this node)
	 * @param mask     Mask specifying the hex digits included in the child array at
	 *                 this shift position
	 * @return A new set as required
	 */
	@SuppressWarnings("unchecked")
	private static  AHashSet create(Ref>[] children, int shift, short mask, long count) {
		int cLen = children.length;
		if (Integer.bitCount(mask & 0xFFFF) != cLen) {
			throw new IllegalArgumentException(
					"Invalid child array length " + cLen + " for bit mask " + Utils.toHexString(mask));
		}

		// compress small counts to SetLeaf
		if (count <= SetLeaf.MAX_ELEMENTS) {
			Ref[] entries = new Ref[Utils.checkedInt(count)];
			int ix = 0;
			for (Ref> childRef : children) {
				AHashSet child = childRef.getValue();
				long cc = child.count();
				for (long i = 0; i < cc; i++) {
					entries[ix++] = child.getElementRef(i);
				}
			}
			assert (ix == count);
			return SetLeaf.create(entries);
		}
		int sel = (1 << cLen) - 1;
		short newMask = mask;
		for (int i = 0; i < cLen; i++) {
			AHashSet child = children[i].getValue();
			if (child.isEmpty()) {
				newMask = (short) (newMask & ~(1 << digitForIndex(i, mask))); // remove from mask
				sel = sel & ~(1 << i); // remove from selection
			}
		}
		if (mask != newMask) {
			return new SetTree(Utils.filterSmallArray(children, sel), shift, newMask, count);
		}
		return new SetTree(children, shift, mask, count);
	}

	@Override
	public Ref getElementRef(long i) {
		long pos = i;
		for (Ref> c : children) {
			AHashSet child = c.getValue();
			long cc = child.count();
			if (pos < cc) return child.getElementRef(pos);
			pos -= cc;
		}
		throw new IndexOutOfBoundsException("Entry index: " + i);
	}

	@Override
	protected Ref getRefByHash(Hash hash) {
		int digit = hash.getHexDigit(shift);
		int i = Bits.indexForDigit(digit, mask);
		if (i < 0) return null; // not present
		return children[i].getValue().getRefByHash(hash);
	}

	@SuppressWarnings("unchecked")
	@Override
	public AHashSet exclude(ACell key) {
		return excludeRef((Ref) Ref.get(key));
	}

	@Override
	public AHashSet excludeRef(Ref keyRef) {
		int digit = keyRef.getHash().getHexDigit(shift);
		int i = Bits.indexForDigit(digit, mask);
		if (i < 0) return this; // not present

		// dissoc entry from child
		AHashSet child = children[i].getValue();
		AHashSet newChild = child.excludeRef(keyRef);
		if (child == newChild) return this; // no removal, no change

		AHashSet result=(newChild.isEmpty())?dissocChild(i):replaceChild(i, newChild.getRef());
		return result.toCanonical();
	}
	
	@Override
	public boolean isCanonical() {
		// We are canonical if and only if elements would not fit in a SetLeaf
		return (count > SetLeaf.MAX_ELEMENTS);
	}
	
	@Override
	public AHashSet toCanonical() {
		if (isCanonical()) return this;
		int n=(int)count; // safe since we know n is in range 0..16
		@SuppressWarnings("unchecked")
		Ref[] newEntries=new Ref[n];
		for (int i=0; i(newEntries);
	}

	@SuppressWarnings("unchecked")
	private AHashSet dissocChild(int i) {
		int bsize = children.length;
		AHashSet child = children[i].getValue();
		Ref>[] newBlocks = (Ref>[]) new Ref[bsize - 1];
		System.arraycopy(children, 0, newBlocks, 0, i);
		System.arraycopy(children, i + 1, newBlocks, i, bsize - i - 1);
		short newMask = (short) (mask & (~(1 << digitForIndex(i, mask))));
		long newCount = count - child.count();
		return create(newBlocks, shift, newMask, newCount);
	}

	@SuppressWarnings("unchecked")
	private SetTree insertChild(int digit, Ref> newChild) {
		int bsize = children.length;
		int i = Bits.positionForDigit(digit, mask);
		short newMask = (short) (mask | (1 << digit));
		if (mask == newMask) throw new Panic("Digit already present!");

		Ref>[] newChildren = (Ref>[]) new Ref[bsize + 1];
		System.arraycopy(children, 0, newChildren, 0, i);
		System.arraycopy(children, i, newChildren, i + 1, bsize - i);
		newChildren[i] = newChild;
		long newCount = count + newChild.getValue().count();
		return (SetTree) create(newChildren, shift, newMask, newCount);
	}

	/**
	 * Replaces the child ref at a given index position. Will return this if no change
	 * 
	 * @param i
	 * @param newChild
	 * @return Updated SetTree
	 */
	protected AHashSet replaceChild(int i, Ref> newChild) {
		if (children[i] == newChild) return this;
		AHashSet oldChild = children[i].getValue();
		Ref>[] newChildren = children.clone();
		newChildren[i] = newChild;
		long newCount = count + newChild.getValue().count() - oldChild.count();
		return create(newChildren, shift, mask, newCount);
	}

	public static int digitForIndex(int index, short mask) {
		// scan mask for specified index
		int found = 0;
		for (int i = 0; i < 16; i++) {
			if ((mask & (1 << i)) != 0) {
				if (found++ == index) return i;
			}
		}
		throw new IllegalArgumentException("Index " + index + " not available in mask map: " + Utils.toHexString(mask));
	}

	@SuppressWarnings("unchecked")
	@Override
	public SetTree include(ACell value) {
		Ref keyRef = (Ref) Ref.get(value);
		return includeRef(keyRef, shift);
	}

	@Override
	protected SetTree includeRef(Ref e, int shift) {
		if (this.shift != shift) {
			throw new Error("Invalid shift!");
		}
		Ref keyRef = e;
		int digit = keyRef.getHash().getHexDigit(shift);
		int i = Bits.indexForDigit(digit, mask);
		if (i < 0) {
			// location not present
			AHashSet newChild = SetLeaf.create(e);
			return insertChild(digit, newChild.getRef());
		} else {
			// location needs update
			AHashSet child = children[i].getValue();
			AHashSet newChild = child.includeRef(e, shift + 1);
			if (child == newChild) return this;
			return (SetTree) replaceChild(i, newChild.getRef());
		}
	}
	
	@Override
	public AHashSet includeRef(Ref ref) {
		return includeRef(ref,shift);
	}

	@Override
	public int encode(byte[] bs, int pos) {
		bs[pos++]=Tag.SET;
		return encodeRaw(bs,pos);
	}
	
	@Override
	public int encodeRaw(byte[] bs, int pos) {
		pos = Format.writeVLCLong(bs,pos, count);
		
		bs[pos++] = (byte) shift;
		pos = Utils.writeShort(bs, pos,mask);

		int ilength = children.length;
		for (int i = 0; i < ilength; i++) {
			pos = children[i].encode(bs,pos);
		}
		return pos;
	}

	@Override
	public int estimatedEncodingSize() {
		// allow space for tag, shift byte byte, 2 byte mask, embedded child refs
		return 4 + Format.MAX_EMBEDDED_LENGTH * children.length;
	}
	
	public static int MAX_ENCODING_LENGTH = 4 + Format.MAX_EMBEDDED_LENGTH * 16;

	/**
	 * Reads a SetTree from the provided Blob encoding
	 * 
	 * @param b Blob to read from
	 * @param pos Start position in Blob (location of tag byte)
	 * @param count Number of elements	 
	 * @return New decoded instance
	 * @throws BadFormatException In the event of any encoding error
	 */
	public static  SetTree read(Blob b, int pos, long count) throws BadFormatException {
		int headerLen=1+Format.getVLCLength(count);
		int epos=pos+headerLen;
		
		int shift=b.byteAt(epos++);
		short mask=b.shortAt(epos);
		epos+=2;
		
		int ilength = Integer.bitCount(mask & 0xFFFF);
		
		@SuppressWarnings("unchecked")
		Ref>[] blocks = (Ref>[]) new Ref[ilength];
		for (int i = 0; i < ilength; i++) {
			// need to read as a Ref
			Ref> ref = Format.readRef(b,epos);
			epos+=ref.getEncodingLength();
			blocks[i] = ref;
		}
		
		SetTree result = new SetTree(blocks, shift, mask, count);
		if (!result.isValidStructure()) throw new BadFormatException("Problem with TreeMap invariants");
		Blob enc=b.slice(pos,epos);
		result.attachEncoding(enc);
		return result;
	}


	
	@Override public final boolean isCVMValue() {
		return shift==0;
	}

	@Override
	public int getRefCount() {
		return children.length;
	}
	
	/**
	 * Returns the mask value of this SetTree node. Each set bit indicates the presence of a child set 
	 * with the given hex digit
	 * @return Mask value
	 */
	public short getMask() {
		return mask;
	}

	@SuppressWarnings("unchecked")
	@Override
	public  Ref getRef(int i) {
		return (Ref) children[i];
	}

	@SuppressWarnings("unchecked")
	@Override
	public SetTree updateRefs(IRefFunction func) {
		int n = children.length;
		if (n == 0) return this;
		Ref>[] newChildren = children;
		for (int i = 0; i < n; i++) {
			Ref> child = children[i];
			Ref> newChild = (Ref>) func.apply(child);
			if (child != newChild) {
				if (children == newChildren) {
					newChildren = children.clone();
				}
				newChildren[i] = newChild;
			}
		}
		if (newChildren == children) return this;
		// Note: we assume no key hashes have changed, so structure is the same
		return new SetTree<>(newChildren, shift, mask, count);
	}

	@Override
	public AHashSet mergeWith(AHashSet b, int setOp) {
		return mergeWith(b, setOp, this.shift);
	}

	@Override
	protected AHashSet mergeWith(AHashSet b, int setOp, int shift) {
		if ((b instanceof SetTree)) {
			SetTree bt = (SetTree) b;
			if (this.shift != bt.shift) throw new Panic("Misaligned shifts!");
			return mergeWith(bt, setOp, shift);
		}
		if ((b instanceof SetLeaf)) return mergeWith((SetLeaf) b, setOp, shift);
		throw new Panic("Unrecognised map type: " + b.getClass());
	}

	@SuppressWarnings("unchecked")
	private AHashSet mergeWith(SetTree b, int setOp, int shift) {
		// assume two TreeMaps with identical prefix and shift
		assert (b.shift == shift);
		int fullMask = mask | b.mask;
		// We are going to build full child list only if needed
		Ref>[] newChildren = null;
		for (int digit = 0; digit < 16; digit++) {
			int bitMask = 1 << digit;
			if ((fullMask & bitMask) == 0) continue; // nothing to merge at this index
			AHashSet ac = childForDigit(digit).getValue();
			AHashSet bc = b.childForDigit(digit).getValue();
			AHashSet rc = ac.mergeWith(bc, setOp, shift + 1);
			if (ac != rc) {
				if (newChildren == null) {
					newChildren = (Ref>[]) new Ref[16];
					for (int ii = 0; ii < digit; ii++) { // copy existing children up to this point
						int chi = Bits.indexForDigit(ii, mask);
						if (chi >= 0) newChildren[ii] = children[chi];
					}
				}
			}
			if (newChildren != null) newChildren[digit] = rc.getRef();
		}
		if (newChildren == null) return this;
		return createFull(newChildren, shift);
	}

	@SuppressWarnings("unchecked")
	private AHashSet mergeWith(SetLeaf b, int setOp, int shift) {
		Ref>[] newChildren = null;
		int ix = 0;
		for (int i = 0; i < 16; i++) {
			int imask = (1 << i); // mask for this digit
			if ((mask & imask) == 0) continue;
			Ref> cref = children[ix++];
			AHashSet child = cref.getValue();
			SetLeaf bSubset = b.filterHexDigits(shift, imask); // filter only relevant elements in b
			AHashSet newChild = child.mergeWith(bSubset, setOp, shift + 1);
			if (child != newChild) {
				if (newChildren == null) {
					newChildren = (Ref>[]) new Ref[16];
					for (int ii = 0; ii < children.length; ii++) { // copy existing children
						int chi = digitForIndex(ii, mask);
						newChildren[chi] = children[ii];
					}
				}
			}
			if (newChildren != null) {
				newChildren[i] = newChild.getRef();
			}
		}
		assert (ix == children.length);
		// if any new children created, create a new Map, else use this
		AHashSet result = (newChildren == null) ? this : createFull(newChildren, shift);

		SetLeaf extras = b.filterHexDigits(shift, ~mask);
		int en = extras.size();
		for (int i = 0; i < en; i++) {
			Ref e = extras.getRef(i);
			Ref newE = applyOp(setOp,null,e);
			if (newE != null) {
				// include only new keys where function result is not null. Re-use existing
				// entry if possible.
				result = result.includeRef(newE, shift);
			}
		}
		return result;
	}



	/**
	 * Gets the Ref for the child at the given digit, or an empty map if not found
	 * 
	 * @param digit The hex digit to query at this TreeMap's shift position
	 * @return The child map for this digit, or an empty map if the child does not
	 *         exist
	 */
	@SuppressWarnings({ "unchecked", "rawtypes" })
	private Ref> childForDigit(int digit) {
		int ix = Bits.indexForDigit(digit, mask);
		if (ix < 0) return (Ref)Sets.emptyRef();
		return children[ix];
	}

	@SuppressWarnings("unchecked")
	@Override
	public boolean equals(ACell a) {
		if (!(a instanceof SetTree)) return false;
		return equals((SetTree) a);
	}

	boolean equals(SetTree b) {
		if (this == b) return true;
		long n = count;
		if (n != b.count) return false;
		if (mask != b.mask) return false;
		if (shift != b.shift) return false;

		// Fall back to comparing hashes. Probably most efficient in general.
		if (getHash().equals(b.getHash())) return true;
		return false;
	}

	@Override
	public void validate() throws InvalidDataException {
		super.validate();

		validateWithPrefix(Hash.EMPTY_HASH,0,-1);
	}
	
	@Override
	protected void validateWithPrefix(Hash base, int digit, int position) throws InvalidDataException {
		if (mask == 0) throw new InvalidDataException("TreeMap must have children!", this);
		if ((shift <0)||(shift>MAX_SHIFT)) {
			throw new InvalidDataException("Invalid shift for SetTree", this);
		}
		
		if (count<=SetLeaf.MAX_ELEMENTS) {
			throw new InvalidDataException("Count too small [" + count + "] for SetTree", this);
		}

		Hash firstHash;
		try {
			firstHash=getElementRef(0).getHash();
		} catch (ClassCastException e) {
			throw new InvalidDataException("Bad child type:" +e.getMessage(), this);
		}
		
		int bsize = children.length;

		long childCount=0;;
		for (int i = 0; i < bsize; i++) {
			if (children[i] == null) {
				throw new InvalidDataException("Null child ref at index " + i,this);
			}
			
			ACell o = children[i].getValue();
			if (!(o instanceof AHashSet)) {
				throw new InvalidDataException(
						"Expected AHashSet child at index " + i +" but got "+Utils.getClassName(o), this);
			}
			@SuppressWarnings("unchecked")
			AHashSet child = (AHashSet) o;
			if (child.isEmpty())
				throw new InvalidDataException("Empty child at index " + i,this);
			
			if (child instanceof SetTree) {
				SetTree childTree=(SetTree) child;
				int expectedShift=shift+1;
				if (childTree.shift!=expectedShift) {
					throw new InvalidDataException("Wrong child shift ["+childTree.shift+"], expected ["+expectedShift+"]",this);
				}
			}
			
			Hash childHash=child.getElementRef(0).getHash();
			long pmatch=firstHash.hexMatch(childHash);
			if (pmatch b) {
		if (b instanceof SetTree) {
			return containsAll((SetTree)b);
		}
		// must be a SetLeaf
		long n=b.count;
		for (long i=0; i me=b.getElementRef(i);
			if (!this.containsHash(me.getHash())) return false;
		}
		return true;
	}
	
	protected boolean containsAll(SetTree other) {
		// fist check this mask contains all of target mask
		if ((this.mask|other.mask)!=this.mask) return false;
		
		for (int i=0; i<16; i++) {
			Ref> child=this.childForDigit(i);
			if (child==null) continue;
			
			Ref mchild = other.childForDigit(i);
			if (mchild==null) continue;
			
			if (!(child.getValue().containsAll((ASet) mchild.getValue()))) return false; 
		}
		return true;
	}

	@Override
	public Ref getValueRef(ACell k) {
		Hash h=Hash.get(k);
		return getRefByHash(h);
	}

	@Override
	protected  void copyToArray(R[] arr, int offset) {
		for (int i=0; i child=children[i].getValue();
			child.copyToArray(arr,offset);
			offset=Utils.checkedInt(offset+child.count());
		}
	}

	@Override
	public boolean containsHash(Hash hash) {
		return getRefByHash(hash)!=null;
	}

	@SuppressWarnings("unchecked")
	@Override
	public ASet slice(long start, long end) {
		if (start<0) return null;
		if (end>count) return null;
		long n=end-start;
		if (n<0) return null;
		if (n==count) return this;
		if (n==0) return empty();

		if (n<=SetLeaf.MAX_ELEMENTS) {
			int nc=(int)n;
			Ref[] elems=new Ref[nc];
			for (int i=0; i result=this;
		int nc=children.length;
		long cstart=0;
		for (int i=0; i c=result.children[i].getValue();
			long cc=c.count();
			long cend=cstart+cc;
			if ((cend<=start)||(cstart>=end)) {
				// Remove entire child
				result=(SetTree) result.dissocChild(i);
				i--;
				nc--;
			} else {
				long istart=Math.max(0, start-cstart);
				long iend=Math.min(cc, end-cstart);
				AHashSet nchild=(AHashSet) c.slice(istart,iend);
				if (nchild!=c) {
					result=(SetTree) result.replaceChild(i, nchild.getRef());
				}
			}
			cstart=cend;
		}
		return result;
	}

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy