convex.core.data.Index Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of convex-core Show documentation
Show all versions of convex-core Show documentation
Convex core libraries and common utilities
The newest version!
package convex.core.data;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.function.BiFunction;
import java.util.function.Predicate;
import convex.core.exceptions.BadFormatException;
import convex.core.exceptions.InvalidDataException;
import convex.core.lang.RT;
import convex.core.util.Bits;
import convex.core.util.Utils;
/**
* Index node implementation, providing an efficient radix tree based immutable data structure for indexed access and sorting.
*
* Supporting:
*
*
* - An optional prefix string
* - An optional entry with this exact prefix
* - Up to 16 child entries at the next level of depth
*
* @param Type of Keys
* @param Type of values
*/
public final class Index, V extends ACell> extends AIndex {
@SuppressWarnings({ "unchecked", "rawtypes" })
public static final Ref[] EMPTY_CHILDREN = new Ref[0];
/**
* Maximum depth of index, in hex digits
*/
private static final int MAX_DEPTH=64;
/**
* Maximum usable size of keys, in bytes
*/
private static final int MAX_KEY_BYTES=MAX_DEPTH/2;
/**
* Empty Index singleton
*/
public static final Index, ?> EMPTY = Cells.intern(new Index(0, null, EMPTY_CHILDREN,(short) 0, 0L));
/**
* Child entries, i.e. nodes with keys where this node is a common prefix. Only contains children where mask is set.
* Child entries must have at least one entry.
*/
private final Ref>[] children;
/**
* Entry for this node of the radix tree. Invariant assumption that the prefix
* is correct. May be null if there is no entry at this node.
*/
private final MapEntry entry;
/**
* Mask of child entries, 16 bits for each hex digit that may be present.
*/
private final short mask;
/**
* Depth of radix tree entry in number of hex digits.
*/
private final long depth;
@SuppressWarnings({ "rawtypes", "unchecked" })
protected Index(long depth, MapEntry entry, Ref[] entries,
short mask, long count) {
super(count);
this.depth = depth;
this.entry = entry;
this.children = (Ref[]) entries;
this.mask = mask;
}
@SuppressWarnings("rawtypes")
public static , V extends ACell> Index unsafeCreate(long depth, MapEntry entry, Ref[] entries,
int mask, long count) {
return new Index(depth,entry,entries,(short)mask,count);
}
@SuppressWarnings("unchecked")
public static , V extends ACell> Index create(MapEntry me) {
ACell k=me.getKey();
if (!(k instanceof ABlobLike)) return null; // check in case invalid key type
long depth = effectiveLength((K)k);
return new Index(depth, me, EMPTY_CHILDREN, (short) 0, 1L);
}
public static , V extends ACell> Index create(K k, V v) {
MapEntry me = MapEntry.create(k, v);
long hexLength = effectiveLength(k);
return new Index(hexLength, me, EMPTY_CHILDREN, (short) 0, 1L);
}
public static , V extends ACell> Index of(Object k, Object v) {
return create(RT.cvm(k),RT.cvm(v));
}
@SuppressWarnings("unchecked")
public static , V extends ACell> Index of(Object... kvs) {
int n = kvs.length;
if (Utils.isOdd(n)) throw new IllegalArgumentException("Even number of key + values required");
Index result = (Index) EMPTY;
for (int i = 0; i < n; i += 2) {
V value=RT.cvm(kvs[i + 1]);
result = result.assoc((K) kvs[i], value);
}
return (Index) result;
}
@Override
public boolean isCanonical() {
return true;
}
@Override public final boolean isCVMValue() {
return true;
}
@Override
public boolean isDataValue() {
return true;
}
@SuppressWarnings("unchecked")
@Override
public Index updateRefs(IRefFunction func) {
MapEntry newEntry = Ref.update(entry,func);
Ref>[] newChildren = Ref.updateRefs(children, func);
if ((entry == newEntry) && (children == newChildren)) return this;
Index result= new Index(depth, newEntry, (Ref[])newChildren, mask, count);
result.attachEncoding(encoding); // this is an optimisation to avoid re-encoding
return result;
}
@Override
public V get(K key) {
MapEntry me = getEntry(key);
if (me == null) return null;
return me.getValue();
}
@Override
public MapEntry getEntry(K key) {
long kl = key.hexLength();
long pl = depth;
if (kl < pl) return null; // key is too short to start with current prefix
if (kl == pl) {
if (entry!=null) {
K ekey=entry.getKey();
if (keyMatch(key,ekey)) return entry; // we matched this key exactly!
}
if (pl cc = getChild(digit);
if (cc == null) return null;
return cc.getEntry(key);
}
/**
* Gets the child for a specific digit, or null if not found
*
* @param digit
* @return
*/
private Index getChild(int digit) {
int i = Bits.indexForDigit(digit, mask);
if (i < 0) return null;
return (Index) children[i].getValue();
}
@Override
public int getRefCount() {
// note entry might be null
return Cells.refCount(entry) + children.length;
}
@SuppressWarnings("unchecked")
@Override
public Ref getRef(int i) {
if (entry != null) {
int erc = entry.getRefCount();
if (i < erc) return entry.getRef(i);
i -= erc;
}
int cl = children.length;
if (i < cl) return (Ref) children[i];
throw new IndexOutOfBoundsException("No ref for index:" + i);
}
@SuppressWarnings("unchecked")
public Index assoc(ACell key, ACell value) {
if (!(key instanceof ABlobLike)) return null;
return assocEntry(MapEntry.create((K)key, (V)value));
}
@SuppressWarnings({ "unchecked", "rawtypes" })
@Override
public Index dissoc(K k) {
if (count <= 1) {
if (count == 0) return this; // Must already be empty singleton
if (keyMatch(k,entry.getKey())) {
return empty();
}
return this; // leave existing entry in place
}
long pDepth = depth; // hex depth of this node including prefix
long kl = effectiveLength(k);; // hex length of key to dissoc
if (kl < pDepth) {
// no match for sure, so no change
return this;
}
if (kl == pDepth) {
// need to check for match with current entry
if (entry == null) return this;
if (!keyMatch(k,entry.getKey())) return this;
// at this point have matched entry exactly. So need to remove it safely while
// preserving invariants
if (children.length == 1) {
Index c = (Index) children[0].getValue();
return c;
} else {
// Clearing current entry, keeping existing children (must be 2+)
return new Index(depth, null, children, mask, count - 1);
}
}
// dissoc beyond current prefix length, so need to check children
int digit = k.getHexDigit(pDepth);
int childIndex = Bits.indexForDigit(digit, mask);
if (childIndex < 0) return this; // key miss
// we know we need to replace a child
Index oldChild = (Index) children[childIndex].getValue();
Index newChild = oldChild.dissoc(k);
Index r=this.withChild(digit, oldChild, newChild);
return r;
}
/**
* Tests if two keys match (up to the maximum index key depth)
* @param a First key
* @param b second key
* @return True if keys match
*/
public static >boolean keyMatch(K a, K b) {
long n=a.count();
if (n getPrefix() {
if (entry!=null) return entry.getKey();
int n=children.length;
if (n==0) return Blob.EMPTY;
return children[0].getValue().getPrefix();
}
@Override
protected void accumulateEntrySet(Set> h) {
for (int i = 0; i < children.length; i++) {
children[i].getValue().accumulateEntrySet(h);
}
if (entry != null) h.add(entry);
}
@Override
protected void accumulateKeySet(Set h) {
for (int i = 0; i < children.length; i++) {
children[i].getValue().accumulateKeySet(h);
}
if (entry != null) h.add(entry.getKey());
}
@Override
protected void accumulateValues(java.util.List al) {
// add this entry first, since we want lexicographic order
if (entry != null) al.add(entry.getValue());
for (int i = 0; i < children.length; i++) {
children[i].getValue().accumulateValues(al);
}
}
@Override
public void forEach(BiConsumer super K, ? super V> action) {
if (entry != null) action.accept(entry.getKey(), entry.getValue());
for (int i = 0; i < children.length; i++) {
children[i].getValue().forEach(action);
}
}
@Override
public Index assocEntry(MapEntry e) {
return assocEntry(e,0);
}
@SuppressWarnings({ "unchecked", "rawtypes" })
private Index assocEntry(MapEntry e, long match) {
if (count == 0L) return create(e);
if (count == 1L) {
assert (mask == (short) 0); // should be no children
if (entry.keyEquals(e)) {
if (entry == e) return this;
// recreate, preserving current depth
return create(e);
}
}
ACell maybeValidKey=e.getKey();
if (!(maybeValidKey instanceof ABlobLike)) return null; // invalid key type!
ABlobLike> k = (ABlobLike)maybeValidKey;
long newKeyLength = effectiveLength(k);; // hex length of new key, up to MAX_DEPTH
long mkl; // matched key length
ABlobLike prefix=getPrefix(); // prefix of current node (valid up to pDepth)
if (newKeyLength >= depth) {
// constrain relevant key length by match with current prefix
mkl = match + k.hexMatch(prefix, match, depth-match);
} else {
mkl = match + k.hexMatch(prefix, match, newKeyLength - match);
}
if (mkl < depth) {
// we collide at a point shorter than the current prefix length
if (mkl == newKeyLength) {
// new key is subset of the current prefix, so split prefix at key position mkl
// doesn't need to adjust child depths, since they are splitting at the same
// point
int splitDigit = prefix.getHexDigit(mkl);
short splitMask = (short) (1 << splitDigit);
Index result = new Index(mkl, e, new Ref[] { this.getRef() }, splitMask, count + 1);
return result;
} else {
// we need to fork the current prefix in two at position mkl
Index branch1 = this;
Index branch2 = create(e);
int d1 = prefix.getHexDigit(mkl);
int d2 = k.getHexDigit(mkl);
if (d1 > d2) {
// swap to get in right order
Index temp = branch1;
branch1 = branch2;
branch2 = temp;
}
Ref[] newChildren = new Ref[] { branch1.getRef(), branch2.getRef() };
short newMask = (short) ((1 << d1) | (1 << d2));
Index fork = new Index(mkl, null, newChildren, newMask, count + 1L);
return fork;
}
}
assert (newKeyLength >= depth);
if (newKeyLength == depth) {
// we must have matched the current entry exactly
if (entry == null) {
// just add entry at this position
return new Index(depth, e, (Ref[]) children, mask, count + 1);
}
if (entry == e) return this;
// swap entry, no need to change count
return new Index(depth, e, (Ref[]) children, mask, count);
}
// at this point we have matched full prefix, but new key length is longer.
// so we need to update (or add) exactly one child
int childDigit = k.getHexDigit(depth);
Index oldChild = getChild(childDigit);
Index newChild;
if (oldChild == null) {
newChild = create(e); // Must be at least 1 beyond current prefix. Safe because pDepth < MAX_DEPTH
} else {
newChild = oldChild.assocEntry(e);
}
return withChild(childDigit, oldChild, newChild); // can't be null since associng
}
/**
* Updates this Index with a new child.
*
* Either oldChild or newChild may be null. Empty maps are treated as null.
*
* @param childDigit Digit for new child
* @param newChild
* @return Index with child removed, or null if Index was deleted entirely
*/
@SuppressWarnings({ "rawtypes", "unchecked", "null"})
private Index withChild(int childDigit, Index oldChild, Index newChild) {
// consider empty children as null
if (oldChild == EMPTY) oldChild = null;
if (newChild == EMPTY) newChild = null;
if (oldChild == newChild) return this;
int n = children.length;
// we need a new child array
Ref[] newChildren = children;
if (oldChild == null) {
// definitely need a new entry
newChildren = new Ref[n + 1];
int newPos = Bits.positionForDigit(childDigit, mask);
short newMask = (short) (mask | (1 << childDigit));
System.arraycopy(children, 0, newChildren, 0, newPos); // earlier entries
newChildren[newPos] = newChild.getRef();
System.arraycopy(children, newPos, newChildren, newPos + 1, n - newPos); // later entries
return new Index(depth, entry, newChildren, newMask,
count + newChild.count());
} else {
// dealing with an existing child
if (newChild == null) {
// need to delete an existing child
int delPos = Bits.positionForDigit(childDigit, mask);
// handle special case where entry is null and we need to promote the one remaining child
if (entry == null) {
if (n == 2) {
Index rm = (Index) children[1 - delPos].getValue();
return rm;
}
}
newChildren = new Ref[n - 1];
short newMask = (short) (mask & ~(1 << childDigit));
System.arraycopy(children, 0, newChildren, 0, delPos); // earlier entries
System.arraycopy(children, delPos + 1, newChildren, delPos, n - delPos - 1); // later entries
return new Index(depth, entry, newChildren, newMask,
count - oldChild.count());
} else {
// need to replace a child
int childPos = Bits.positionForDigit(childDigit, mask);
newChildren = children.clone();
newChildren[childPos] = newChild.getRef();
long newCount = count + newChild.count() - oldChild.count();
return new Index(depth, entry, newChildren, mask, newCount);
}
}
}
@Override
public R reduceValues(BiFunction super R, ? super V, ? extends R> func, R initial) {
if (entry != null) initial = func.apply(initial, entry.getValue());
int n = children.length;
for (int i = 0; i < n; i++) {
initial = children[i].getValue().reduceValues(func, initial);
}
return initial;
}
@Override
public R reduceEntries(BiFunction super R, MapEntry, ? extends R> func, R initial) {
if (entry != null) initial = func.apply(initial, entry);
int n = children.length;
for (int i = 0; i < n; i++) {
initial = children[i].getValue().reduceEntries(func, initial);
}
return initial;
}
@Override
public Index filterValues(Predicate pred) {
Index r=this;
for (int i=0; i<16; i++) {
if (r==null) break; // might be null from dissoc
Index oldChild=r.getChild(i);
if (oldChild==null) continue;
Index newChild=oldChild.filterValues(pred);
r=r.withChild(i, oldChild, newChild);
}
// check entry at this level. A child might have moved here during the above loop!
if (r!=null) {
if ((r.entry!=null)&&!pred.test(r.entry.getValue())) r=r.dissoc(r.entry.getKey());
}
// check if whole Index was emptied
if (r==null) {
// everything deleted, but need
return empty();
}
return r;
}
@Override
public int encode(byte[] bs, int pos) {
bs[pos++]=Tag.INDEX;
return encodeRaw(bs,pos);
}
@Override
public int encodeRaw(byte[] bs, int pos) {
pos = Format.writeVLCCount(bs,pos, count);
if (count == 0) return pos; // nothing more to know... this must be the empty singleton
pos = MapEntry.encodeCompressed(entry,bs,pos); // entry may be null
if (count == 1) return pos; // must be a single entry
// We only have a meaningful depth if more than one entry
pos = Format.writeVLCCount(bs,pos, depth);
// finally write children
pos = Utils.writeShort(bs,pos,mask);
int n = children.length;
for (int i = 0; i < n; i++) {
pos = encodeChild(bs,pos,i);
}
return pos;
}
private int encodeChild(byte[] bs, int pos, int i) {
Ref> cref = children[i];
return cref.encode(bs, pos);
// TODO: maybe compress single entries?
// AIndex c=cref.getValue();
// if (c.count==1) {
// MapEntry me=c.entryAt(0);
// pos = me.getRef().encode(bs, pos);
// } else {
// pos = cref.encode(bs,pos);
// }
// return pos;
}
@Override
public int estimatedEncodingSize() {
return 100 + (children.length*2+1) * Format.MAX_EMBEDDED_LENGTH;
}
@SuppressWarnings({ "unchecked", "rawtypes" })
public static , V extends ACell> Index read(Blob b, int pos) throws BadFormatException {
long count = Format.readVLCCount(b,pos+1);
if (count < 0) throw new BadFormatException("Negative count!");
if (count == 0) return (Index) EMPTY;
int epos=pos+1+Format.getVLCCountLength(count);
byte etype=b.byteAt(epos++);
MapEntry me;
if (etype==Tag.NULL) {
me=null;
} else if (etype==Tag.VECTOR){
Ref kr=Format.readRef(b,epos);
epos+=kr.getEncodingLength();
Ref vr=Format.readRef(b,epos);
epos+=vr.getEncodingLength();
me=MapEntry.createRef(kr, vr);
if (count == 1) {
// single entry map, doesn't need separate depth encoding
long depth=kr.isEmbedded()?kr.getValue().hexLength():MAX_DEPTH;
Index result = new Index(depth, me, EMPTY_CHILDREN, (short) 0, 1L);
result.attachEncoding(b.slice(pos, epos));
return result;
}
} else {
throw new BadFormatException("Invalid MapEntry tag in Index: "+etype);
}
Index result;
long depth = Format.readVLCCount(b,epos);
if (depth < 0) throw new BadFormatException("Negative depth!");
if (depth >=MAX_DEPTH) {
if (depth==MAX_DEPTH) throw new BadFormatException("More than one entry and MAX_DEPTH");
throw new BadFormatException("Excessive depth!");
}
epos+=Format.getVLCCountLength(depth);
// Need to include children
short mask = b.shortAt(epos);
epos+=2;
int n = Utils.bitCount(mask);
Ref[] children = new Ref[n];
for (int i = 0; i < n; i++) {
Ref cr=Format.readRef(b,epos);
epos+=cr.getEncodingLength();
children[i] =cr;
}
result= new Index(depth, me, children, mask, count);
result.attachEncoding(b.slice(pos, epos));
return result;
}
@Override
protected MapEntry getEntryByHash(Hash hash) {
throw new UnsupportedOperationException();
}
@SuppressWarnings("unchecked")
@Override
public void validate() throws InvalidDataException {
super.validate();
if ((depth<0)||(depth>MAX_DEPTH)) throw new InvalidDataException("Invalid index depth",this);
if (entry!=null) {
ABlobLike k=RT.ensureBlobLike(entry.getKey());
if (k==null) throw new InvalidDataException("Invalid entry key type: "+Utils.getClassName(entry.getKey()),this);
if (depth!=effectiveLength(k)) throw new InvalidDataException("Entry at inconsistent depth",this);
}
ABlobLike> prefix=getPrefix();
if (depth>effectiveLength(prefix)) throw new InvalidDataException("depth longer than common prefix",this);
long ecount = (entry == null) ? 0 : 1;
int n = children.length;
for (int i = 0; i < n; i++) {
ACell o = children[i].getValue();
if (!(o instanceof Index))
throw new InvalidDataException("Illegal Index child type: " + Utils.getClass(o), this);
Index c = (Index) o;
long ccount=c.count();
if (ccount==0) {
throw new InvalidDataException("Child "+i+" should not be empty! At depth "+depth,this);
}
if (c.getDepth() <= getDepth()) {
throw new InvalidDataException("Child must have greater depth than parent", this);
}
ABlobLike> childPrefix=c.getPrefix();
long ml=prefix.hexMatch(childPrefix, 0, depth);
if (ml prefix) {
return Math.min(MAX_DEPTH, prefix.hexLength());
}
/**
* Gets the depth of this Index node, i.e. the hex length of the common prefix (up to MAX_DEPTH)
*
* @return
*/
long getDepth() {
return depth;
}
@Override
public void validateCell() throws InvalidDataException {
if (count == 0) {
if (this != EMPTY) throw new InvalidDataException("Non-singleton empty Index", this);
return;
} else if (count == 1) {
if (entry == null) throw new InvalidDataException("Single entry Index with null entry?", this);
if (mask != 0) throw new InvalidDataException("Single entry Index with child mask?", this);
return;
}
long pDepth=getDepth();
if (pDepth>MAX_DEPTH) throw new InvalidDataException("Excessive Prefix Depth beyond MAX_DEPTH", this);
if (pDepth==MAX_DEPTH) {
if (count!=1) throw new InvalidDataException("Can only have a single entry at MAX_DEPTH",this);
}
// at least count 2 from this point
int cn = Utils.bitCount(mask);
if (cn != children.length) throw new InvalidDataException(
"Illegal mask: " + Utils.toHexString(mask) + " for given number of children: " + children.length, this);
if (entry != null) {
entry.validateCell();
long entryKeyLength=entry.getKey().hexLength();
if (entryKeyLengthMAX_DEPTH) {
if (pDepth!=MAX_DEPTH) throw new InvalidDataException("Key too long at this prefix depth",this);
}
if (cn == 0)
throw new InvalidDataException("Index with entry and count=" + count + " must have children", this);
} else {
if (cn <= 1) throw new InvalidDataException(
"Index with no entry and count=" + count + " must have two or more children", this);
}
}
@SuppressWarnings("unchecked")
@Override
public Index empty() {
return (Index) EMPTY;
}
@SuppressWarnings("unchecked")
public static , V extends ACell> Index none() {
return (Index) EMPTY;
}
@Override
public MapEntry entryAt(long ix) {
if (entry != null) {
if (ix == 0L) return entry;
ix -= 1;
}
int n = children.length;
for (int i = 0; i < n; i++) {
Index c = children[i].getValue();
long cc = c.count();
if (ix < cc) return c.entryAt(ix);
ix -= cc;
}
throw new IndexOutOfBoundsException((int)ix);
}
/**
* Slices this Index, starting at the specified position
*
* Removes n leading entries from this Index, in key order.
*
* @param start Start position of entries to keep
* @return Updated Index with leading entries removed, or null if invalid slice
*/
@Override
public Index slice(long start) {
return slice(start,count);
}
/**
* Returns a slice of this Index
*
* @param start Start position of slice (inclusive)
* @param end End position of slice (exclusive)
* @return Slice of Index, or null if invalid slice
*/
@Override
public Index slice(long start, long end) {
if ((start<0)||(end>count)) return null;
if (end bm = this;
for (long i=count-1; i>=end; i--) {
MapEntry me = bm.entryAt(i);
bm = bm.dissoc(me.getKey());
}
for (long i = 0; i < start; i++) {
MapEntry me = bm.entryAt(0);
bm = bm.dissoc(me.getKey());
}
return bm;
}
@SuppressWarnings("unchecked")
@Override
public boolean equals(ACell a) {
if (this == a) return true; // important optimisation for e.g. hashmap equality
if (!(a instanceof Index)) return false;
// Must be a Index
return equals((Index)a);
}
/**
* Checks this Index for equality with another Index
*
* @param a Index to compare with
* @return true if maps are equal, false otherwise.
*/
public boolean equals(Index a) {
if (a==null) return false;
long n=this.count();
if (n != a.count()) return false;
if (this.mask!=a.mask) return false;
if (!Cells.equals(this.entry, a.entry)) return false;
return getHash().equals(a.getHash());
}
@Override
public byte getTag() {
return Tag.INDEX;
}
@Override
public ACell toCanonical() {
return this;
}
@Override
public boolean containsValue(ACell value) {
if ((entry!=null)&&Cells.equals(value, entry.getValue())) return true;
for (Ref> cr : children) {
if (cr.getValue().containsValue(value)) return true;
}
return false;
}
@SuppressWarnings("unchecked")
public static , K extends ABlobLike>, V extends ACell> R create(HashMap map) {
Index result=(Index) EMPTY;
for (Map.Entry me: map.entrySet()) {
result=result.assoc(me.getKey(), me.getValue());
if (result==null) return null;
}
return (R) result;
}
}