All Downloads are FREE. Search and download functionalities are using the official Maven repository.

convex.core.data.Index Maven / Gradle / Ivy

The newest version!
package convex.core.data;

import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Predicate;

import convex.core.exceptions.BadFormatException;
import convex.core.exceptions.InvalidDataException;
import convex.core.lang.RT;
import convex.core.util.Bits;
import convex.core.util.Utils;

/**
 * Index node implementation, providing an efficient radix tree based immutable data structure for indexed access and sorting.
 * 
 * Supporting: 
 * 
 * 
    *
  • An optional prefix string
  • *
  • An optional entry with this exact prefix
  • *
  • Up to 16 child entries at the next level of depth
  • *
* @param Type of Keys * @param Type of values */ public final class Index, V extends ACell> extends AIndex { @SuppressWarnings({ "unchecked", "rawtypes" }) public static final Ref[] EMPTY_CHILDREN = new Ref[0]; /** * Maximum depth of index, in hex digits */ private static final int MAX_DEPTH=64; /** * Maximum usable size of keys, in bytes */ private static final int MAX_KEY_BYTES=MAX_DEPTH/2; /** * Empty Index singleton */ public static final Index EMPTY = Cells.intern(new Index(0, null, EMPTY_CHILDREN,(short) 0, 0L)); /** * Child entries, i.e. nodes with keys where this node is a common prefix. Only contains children where mask is set. * Child entries must have at least one entry. */ private final Ref>[] children; /** * Entry for this node of the radix tree. Invariant assumption that the prefix * is correct. May be null if there is no entry at this node. */ private final MapEntry entry; /** * Mask of child entries, 16 bits for each hex digit that may be present. */ private final short mask; /** * Depth of radix tree entry in number of hex digits. */ private final long depth; @SuppressWarnings({ "rawtypes", "unchecked" }) protected Index(long depth, MapEntry entry, Ref[] entries, short mask, long count) { super(count); this.depth = depth; this.entry = entry; this.children = (Ref[]) entries; this.mask = mask; } @SuppressWarnings("rawtypes") public static , V extends ACell> Index unsafeCreate(long depth, MapEntry entry, Ref[] entries, int mask, long count) { return new Index(depth,entry,entries,(short)mask,count); } @SuppressWarnings("unchecked") public static , V extends ACell> Index create(MapEntry me) { ACell k=me.getKey(); if (!(k instanceof ABlobLike)) return null; // check in case invalid key type long depth = effectiveLength((K)k); return new Index(depth, me, EMPTY_CHILDREN, (short) 0, 1L); } public static , V extends ACell> Index create(K k, V v) { MapEntry me = MapEntry.create(k, v); long hexLength = effectiveLength(k); return new Index(hexLength, me, EMPTY_CHILDREN, (short) 0, 1L); } public static , V extends ACell> Index of(Object k, Object v) { return create(RT.cvm(k),RT.cvm(v)); } @SuppressWarnings("unchecked") public static , V extends ACell> Index of(Object... kvs) { int n = kvs.length; if (Utils.isOdd(n)) throw new IllegalArgumentException("Even number of key + values required"); Index result = (Index) EMPTY; for (int i = 0; i < n; i += 2) { V value=RT.cvm(kvs[i + 1]); result = result.assoc((K) kvs[i], value); } return (Index) result; } @Override public boolean isCanonical() { return true; } @Override public final boolean isCVMValue() { return true; } @Override public boolean isDataValue() { return true; } @SuppressWarnings("unchecked") @Override public Index updateRefs(IRefFunction func) { MapEntry newEntry = Ref.update(entry,func); Ref>[] newChildren = Ref.updateRefs(children, func); if ((entry == newEntry) && (children == newChildren)) return this; Index result= new Index(depth, newEntry, (Ref[])newChildren, mask, count); result.attachEncoding(encoding); // this is an optimisation to avoid re-encoding return result; } @Override public V get(K key) { MapEntry me = getEntry(key); if (me == null) return null; return me.getValue(); } @Override public MapEntry getEntry(K key) { long kl = key.hexLength(); long pl = depth; if (kl < pl) return null; // key is too short to start with current prefix if (kl == pl) { if (entry!=null) { K ekey=entry.getKey(); if (keyMatch(key,ekey)) return entry; // we matched this key exactly! } if (pl cc = getChild(digit); if (cc == null) return null; return cc.getEntry(key); } /** * Gets the child for a specific digit, or null if not found * * @param digit * @return */ private Index getChild(int digit) { int i = Bits.indexForDigit(digit, mask); if (i < 0) return null; return (Index) children[i].getValue(); } @Override public int getRefCount() { // note entry might be null return Cells.refCount(entry) + children.length; } @SuppressWarnings("unchecked") @Override public Ref getRef(int i) { if (entry != null) { int erc = entry.getRefCount(); if (i < erc) return entry.getRef(i); i -= erc; } int cl = children.length; if (i < cl) return (Ref) children[i]; throw new IndexOutOfBoundsException("No ref for index:" + i); } @SuppressWarnings("unchecked") public Index assoc(ACell key, ACell value) { if (!(key instanceof ABlobLike)) return null; return assocEntry(MapEntry.create((K)key, (V)value)); } @SuppressWarnings({ "unchecked", "rawtypes" }) @Override public Index dissoc(K k) { if (count <= 1) { if (count == 0) return this; // Must already be empty singleton if (keyMatch(k,entry.getKey())) { return empty(); } return this; // leave existing entry in place } long pDepth = depth; // hex depth of this node including prefix long kl = effectiveLength(k);; // hex length of key to dissoc if (kl < pDepth) { // no match for sure, so no change return this; } if (kl == pDepth) { // need to check for match with current entry if (entry == null) return this; if (!keyMatch(k,entry.getKey())) return this; // at this point have matched entry exactly. So need to remove it safely while // preserving invariants if (children.length == 1) { Index c = (Index) children[0].getValue(); return c; } else { // Clearing current entry, keeping existing children (must be 2+) return new Index(depth, null, children, mask, count - 1); } } // dissoc beyond current prefix length, so need to check children int digit = k.getHexDigit(pDepth); int childIndex = Bits.indexForDigit(digit, mask); if (childIndex < 0) return this; // key miss // we know we need to replace a child Index oldChild = (Index) children[childIndex].getValue(); Index newChild = oldChild.dissoc(k); Index r=this.withChild(digit, oldChild, newChild); return r; } /** * Tests if two keys match (up to the maximum index key depth) * @param a First key * @param b second key * @return True if keys match */ public static >boolean keyMatch(K a, K b) { long n=a.count(); if (n getPrefix() { if (entry!=null) return entry.getKey(); int n=children.length; if (n==0) return Blob.EMPTY; return children[0].getValue().getPrefix(); } @Override protected void accumulateEntrySet(Set> h) { for (int i = 0; i < children.length; i++) { children[i].getValue().accumulateEntrySet(h); } if (entry != null) h.add(entry); } @Override protected void accumulateKeySet(Set h) { for (int i = 0; i < children.length; i++) { children[i].getValue().accumulateKeySet(h); } if (entry != null) h.add(entry.getKey()); } @Override protected void accumulateValues(java.util.List al) { // add this entry first, since we want lexicographic order if (entry != null) al.add(entry.getValue()); for (int i = 0; i < children.length; i++) { children[i].getValue().accumulateValues(al); } } @Override public void forEach(BiConsumer action) { if (entry != null) action.accept(entry.getKey(), entry.getValue()); for (int i = 0; i < children.length; i++) { children[i].getValue().forEach(action); } } @Override public Index assocEntry(MapEntry e) { return assocEntry(e,0); } @SuppressWarnings({ "unchecked", "rawtypes" }) private Index assocEntry(MapEntry e, long match) { if (count == 0L) return create(e); if (count == 1L) { assert (mask == (short) 0); // should be no children if (entry.keyEquals(e)) { if (entry == e) return this; // recreate, preserving current depth return create(e); } } ACell maybeValidKey=e.getKey(); if (!(maybeValidKey instanceof ABlobLike)) return null; // invalid key type! ABlobLike k = (ABlobLike)maybeValidKey; long newKeyLength = effectiveLength(k);; // hex length of new key, up to MAX_DEPTH long mkl; // matched key length ABlobLike prefix=getPrefix(); // prefix of current node (valid up to pDepth) if (newKeyLength >= depth) { // constrain relevant key length by match with current prefix mkl = match + k.hexMatch(prefix, match, depth-match); } else { mkl = match + k.hexMatch(prefix, match, newKeyLength - match); } if (mkl < depth) { // we collide at a point shorter than the current prefix length if (mkl == newKeyLength) { // new key is subset of the current prefix, so split prefix at key position mkl // doesn't need to adjust child depths, since they are splitting at the same // point int splitDigit = prefix.getHexDigit(mkl); short splitMask = (short) (1 << splitDigit); Index result = new Index(mkl, e, new Ref[] { this.getRef() }, splitMask, count + 1); return result; } else { // we need to fork the current prefix in two at position mkl Index branch1 = this; Index branch2 = create(e); int d1 = prefix.getHexDigit(mkl); int d2 = k.getHexDigit(mkl); if (d1 > d2) { // swap to get in right order Index temp = branch1; branch1 = branch2; branch2 = temp; } Ref[] newChildren = new Ref[] { branch1.getRef(), branch2.getRef() }; short newMask = (short) ((1 << d1) | (1 << d2)); Index fork = new Index(mkl, null, newChildren, newMask, count + 1L); return fork; } } assert (newKeyLength >= depth); if (newKeyLength == depth) { // we must have matched the current entry exactly if (entry == null) { // just add entry at this position return new Index(depth, e, (Ref[]) children, mask, count + 1); } if (entry == e) return this; // swap entry, no need to change count return new Index(depth, e, (Ref[]) children, mask, count); } // at this point we have matched full prefix, but new key length is longer. // so we need to update (or add) exactly one child int childDigit = k.getHexDigit(depth); Index oldChild = getChild(childDigit); Index newChild; if (oldChild == null) { newChild = create(e); // Must be at least 1 beyond current prefix. Safe because pDepth < MAX_DEPTH } else { newChild = oldChild.assocEntry(e); } return withChild(childDigit, oldChild, newChild); // can't be null since associng } /** * Updates this Index with a new child. * * Either oldChild or newChild may be null. Empty maps are treated as null. * * @param childDigit Digit for new child * @param newChild * @return Index with child removed, or null if Index was deleted entirely */ @SuppressWarnings({ "rawtypes", "unchecked", "null"}) private Index withChild(int childDigit, Index oldChild, Index newChild) { // consider empty children as null if (oldChild == EMPTY) oldChild = null; if (newChild == EMPTY) newChild = null; if (oldChild == newChild) return this; int n = children.length; // we need a new child array Ref[] newChildren = children; if (oldChild == null) { // definitely need a new entry newChildren = new Ref[n + 1]; int newPos = Bits.positionForDigit(childDigit, mask); short newMask = (short) (mask | (1 << childDigit)); System.arraycopy(children, 0, newChildren, 0, newPos); // earlier entries newChildren[newPos] = newChild.getRef(); System.arraycopy(children, newPos, newChildren, newPos + 1, n - newPos); // later entries return new Index(depth, entry, newChildren, newMask, count + newChild.count()); } else { // dealing with an existing child if (newChild == null) { // need to delete an existing child int delPos = Bits.positionForDigit(childDigit, mask); // handle special case where entry is null and we need to promote the one remaining child if (entry == null) { if (n == 2) { Index rm = (Index) children[1 - delPos].getValue(); return rm; } } newChildren = new Ref[n - 1]; short newMask = (short) (mask & ~(1 << childDigit)); System.arraycopy(children, 0, newChildren, 0, delPos); // earlier entries System.arraycopy(children, delPos + 1, newChildren, delPos, n - delPos - 1); // later entries return new Index(depth, entry, newChildren, newMask, count - oldChild.count()); } else { // need to replace a child int childPos = Bits.positionForDigit(childDigit, mask); newChildren = children.clone(); newChildren[childPos] = newChild.getRef(); long newCount = count + newChild.count() - oldChild.count(); return new Index(depth, entry, newChildren, mask, newCount); } } } @Override public R reduceValues(BiFunction func, R initial) { if (entry != null) initial = func.apply(initial, entry.getValue()); int n = children.length; for (int i = 0; i < n; i++) { initial = children[i].getValue().reduceValues(func, initial); } return initial; } @Override public R reduceEntries(BiFunction, ? extends R> func, R initial) { if (entry != null) initial = func.apply(initial, entry); int n = children.length; for (int i = 0; i < n; i++) { initial = children[i].getValue().reduceEntries(func, initial); } return initial; } @Override public Index filterValues(Predicate pred) { Index r=this; for (int i=0; i<16; i++) { if (r==null) break; // might be null from dissoc Index oldChild=r.getChild(i); if (oldChild==null) continue; Index newChild=oldChild.filterValues(pred); r=r.withChild(i, oldChild, newChild); } // check entry at this level. A child might have moved here during the above loop! if (r!=null) { if ((r.entry!=null)&&!pred.test(r.entry.getValue())) r=r.dissoc(r.entry.getKey()); } // check if whole Index was emptied if (r==null) { // everything deleted, but need return empty(); } return r; } @Override public int encode(byte[] bs, int pos) { bs[pos++]=Tag.INDEX; return encodeRaw(bs,pos); } @Override public int encodeRaw(byte[] bs, int pos) { pos = Format.writeVLCCount(bs,pos, count); if (count == 0) return pos; // nothing more to know... this must be the empty singleton pos = MapEntry.encodeCompressed(entry,bs,pos); // entry may be null if (count == 1) return pos; // must be a single entry // We only have a meaningful depth if more than one entry pos = Format.writeVLCCount(bs,pos, depth); // finally write children pos = Utils.writeShort(bs,pos,mask); int n = children.length; for (int i = 0; i < n; i++) { pos = encodeChild(bs,pos,i); } return pos; } private int encodeChild(byte[] bs, int pos, int i) { Ref> cref = children[i]; return cref.encode(bs, pos); // TODO: maybe compress single entries? // AIndex c=cref.getValue(); // if (c.count==1) { // MapEntry me=c.entryAt(0); // pos = me.getRef().encode(bs, pos); // } else { // pos = cref.encode(bs,pos); // } // return pos; } @Override public int estimatedEncodingSize() { return 100 + (children.length*2+1) * Format.MAX_EMBEDDED_LENGTH; } @SuppressWarnings({ "unchecked", "rawtypes" }) public static , V extends ACell> Index read(Blob b, int pos) throws BadFormatException { long count = Format.readVLCCount(b,pos+1); if (count < 0) throw new BadFormatException("Negative count!"); if (count == 0) return (Index) EMPTY; int epos=pos+1+Format.getVLCCountLength(count); byte etype=b.byteAt(epos++); MapEntry me; if (etype==Tag.NULL) { me=null; } else if (etype==Tag.VECTOR){ Ref kr=Format.readRef(b,epos); epos+=kr.getEncodingLength(); Ref vr=Format.readRef(b,epos); epos+=vr.getEncodingLength(); me=MapEntry.createRef(kr, vr); if (count == 1) { // single entry map, doesn't need separate depth encoding long depth=kr.isEmbedded()?kr.getValue().hexLength():MAX_DEPTH; Index result = new Index(depth, me, EMPTY_CHILDREN, (short) 0, 1L); result.attachEncoding(b.slice(pos, epos)); return result; } } else { throw new BadFormatException("Invalid MapEntry tag in Index: "+etype); } Index result; long depth = Format.readVLCCount(b,epos); if (depth < 0) throw new BadFormatException("Negative depth!"); if (depth >=MAX_DEPTH) { if (depth==MAX_DEPTH) throw new BadFormatException("More than one entry and MAX_DEPTH"); throw new BadFormatException("Excessive depth!"); } epos+=Format.getVLCCountLength(depth); // Need to include children short mask = b.shortAt(epos); epos+=2; int n = Utils.bitCount(mask); Ref[] children = new Ref[n]; for (int i = 0; i < n; i++) { Ref cr=Format.readRef(b,epos); epos+=cr.getEncodingLength(); children[i] =cr; } result= new Index(depth, me, children, mask, count); result.attachEncoding(b.slice(pos, epos)); return result; } @Override protected MapEntry getEntryByHash(Hash hash) { throw new UnsupportedOperationException(); } @SuppressWarnings("unchecked") @Override public void validate() throws InvalidDataException { super.validate(); if ((depth<0)||(depth>MAX_DEPTH)) throw new InvalidDataException("Invalid index depth",this); if (entry!=null) { ABlobLike k=RT.ensureBlobLike(entry.getKey()); if (k==null) throw new InvalidDataException("Invalid entry key type: "+Utils.getClassName(entry.getKey()),this); if (depth!=effectiveLength(k)) throw new InvalidDataException("Entry at inconsistent depth",this); } ABlobLike prefix=getPrefix(); if (depth>effectiveLength(prefix)) throw new InvalidDataException("depth longer than common prefix",this); long ecount = (entry == null) ? 0 : 1; int n = children.length; for (int i = 0; i < n; i++) { ACell o = children[i].getValue(); if (!(o instanceof Index)) throw new InvalidDataException("Illegal Index child type: " + Utils.getClass(o), this); Index c = (Index) o; long ccount=c.count(); if (ccount==0) { throw new InvalidDataException("Child "+i+" should not be empty! At depth "+depth,this); } if (c.getDepth() <= getDepth()) { throw new InvalidDataException("Child must have greater depth than parent", this); } ABlobLike childPrefix=c.getPrefix(); long ml=prefix.hexMatch(childPrefix, 0, depth); if (ml prefix) { return Math.min(MAX_DEPTH, prefix.hexLength()); } /** * Gets the depth of this Index node, i.e. the hex length of the common prefix (up to MAX_DEPTH) * * @return */ long getDepth() { return depth; } @Override public void validateCell() throws InvalidDataException { if (count == 0) { if (this != EMPTY) throw new InvalidDataException("Non-singleton empty Index", this); return; } else if (count == 1) { if (entry == null) throw new InvalidDataException("Single entry Index with null entry?", this); if (mask != 0) throw new InvalidDataException("Single entry Index with child mask?", this); return; } long pDepth=getDepth(); if (pDepth>MAX_DEPTH) throw new InvalidDataException("Excessive Prefix Depth beyond MAX_DEPTH", this); if (pDepth==MAX_DEPTH) { if (count!=1) throw new InvalidDataException("Can only have a single entry at MAX_DEPTH",this); } // at least count 2 from this point int cn = Utils.bitCount(mask); if (cn != children.length) throw new InvalidDataException( "Illegal mask: " + Utils.toHexString(mask) + " for given number of children: " + children.length, this); if (entry != null) { entry.validateCell(); long entryKeyLength=entry.getKey().hexLength(); if (entryKeyLengthMAX_DEPTH) { if (pDepth!=MAX_DEPTH) throw new InvalidDataException("Key too long at this prefix depth",this); } if (cn == 0) throw new InvalidDataException("Index with entry and count=" + count + " must have children", this); } else { if (cn <= 1) throw new InvalidDataException( "Index with no entry and count=" + count + " must have two or more children", this); } } @SuppressWarnings("unchecked") @Override public Index empty() { return (Index) EMPTY; } @SuppressWarnings("unchecked") public static , V extends ACell> Index none() { return (Index) EMPTY; } @Override public MapEntry entryAt(long ix) { if (entry != null) { if (ix == 0L) return entry; ix -= 1; } int n = children.length; for (int i = 0; i < n; i++) { Index c = children[i].getValue(); long cc = c.count(); if (ix < cc) return c.entryAt(ix); ix -= cc; } throw new IndexOutOfBoundsException((int)ix); } /** * Slices this Index, starting at the specified position * * Removes n leading entries from this Index, in key order. * * @param start Start position of entries to keep * @return Updated Index with leading entries removed, or null if invalid slice */ @Override public Index slice(long start) { return slice(start,count); } /** * Returns a slice of this Index * * @param start Start position of slice (inclusive) * @param end End position of slice (exclusive) * @return Slice of Index, or null if invalid slice */ @Override public Index slice(long start, long end) { if ((start<0)||(end>count)) return null; if (end bm = this; for (long i=count-1; i>=end; i--) { MapEntry me = bm.entryAt(i); bm = bm.dissoc(me.getKey()); } for (long i = 0; i < start; i++) { MapEntry me = bm.entryAt(0); bm = bm.dissoc(me.getKey()); } return bm; } @SuppressWarnings("unchecked") @Override public boolean equals(ACell a) { if (this == a) return true; // important optimisation for e.g. hashmap equality if (!(a instanceof Index)) return false; // Must be a Index return equals((Index)a); } /** * Checks this Index for equality with another Index * * @param a Index to compare with * @return true if maps are equal, false otherwise. */ public boolean equals(Index a) { if (a==null) return false; long n=this.count(); if (n != a.count()) return false; if (this.mask!=a.mask) return false; if (!Cells.equals(this.entry, a.entry)) return false; return getHash().equals(a.getHash()); } @Override public byte getTag() { return Tag.INDEX; } @Override public ACell toCanonical() { return this; } @Override public boolean containsValue(ACell value) { if ((entry!=null)&&Cells.equals(value, entry.getValue())) return true; for (Ref> cr : children) { if (cr.getValue().containsValue(value)) return true; } return false; } @SuppressWarnings("unchecked") public static , K extends ABlobLike, V extends ACell> R create(HashMap map) { Index result=(Index) EMPTY; for (Map.Entry me: map.entrySet()) { result=result.assoc(me.getKey(), me.getValue()); if (result==null) return null; } return (R) result; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy