scala.collection.immutable.HashSet.scala Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of spark-core Show documentation
Show all versions of spark-core Show documentation
Shaded version of Apache Spark 2.x.x for Presto
The newest version!
/* __ *\
** ________ ___ / / ___ Scala API **
** / __/ __// _ | / / / _ | (c) 2003-2013, LAMP/EPFL **
** __\ \/ /__/ __ |/ /__/ __ | http://scala-lang.org/ **
** /____/\___/_/ |_/____/_/ | | **
** |/ **
\* */
package scala
package collection
package immutable
import generic._
import scala.collection.parallel.immutable.ParHashSet
import scala.collection.GenSet
import scala.annotation.tailrec
/** This class implements immutable sets using a hash trie.
*
* '''Note:''' The builder of this hash set may return specialized representations for small sets.
*
* @tparam A the type of the elements contained in this hash set.
*
* @author Martin Odersky
* @author Tiark Rompf
* @version 2.8
* @since 2.3
* @define Coll `immutable.HashSet`
* @define coll immutable hash set
*/
@SerialVersionUID(2L)
@deprecatedInheritance("The implementation details of immutable hash sets make inheriting from them unwise.", "2.11.0")
class HashSet[A] extends AbstractSet[A]
with Set[A]
with GenericSetTemplate[A, HashSet]
with SetLike[A, HashSet[A]]
with CustomParallelizable[A, ParHashSet[A]]
with Serializable
{
import HashSet.{nullToEmpty, bufferSize, LeafHashSet}
override def companion: GenericCompanion[HashSet] = HashSet
//class HashSet[A] extends Set[A] with SetLike[A, HashSet[A]] {
override def par = ParHashSet.fromTrie(this)
override def size: Int = 0
override def empty = HashSet.empty[A]
def iterator: Iterator[A] = Iterator.empty
override def foreach[U](f: A => U): Unit = ()
def contains(e: A): Boolean = get0(e, computeHash(e), 0)
override def subsetOf(that: GenSet[A]) = that match {
case that:HashSet[A] =>
// call the specialized implementation with a level of 0 since both this and that are top-level hash sets
subsetOf0(that, 0)
case _ =>
// call the generic implementation
super.subsetOf(that)
}
/**
* A specialized implementation of subsetOf for when both this and that are HashSet[A] and we can take advantage
* of the tree structure of both operands and the precalculated hashcodes of the HashSet1 instances.
* @param that the other set
* @param level the level of this and that hashset
* The purpose of level is to keep track of how deep we are in the tree.
* We need this information for when we arrive at a leaf and have to call get0 on that
* The value of level is 0 for a top-level HashSet and grows in increments of 5
* @return true if all elements of this set are contained in that set
*/
protected def subsetOf0(that: HashSet[A], level: Int) = {
// The default implementation is for the empty set and returns true because the empty set is a subset of all sets
true
}
override def + (e: A): HashSet[A] = updated0(e, computeHash(e), 0)
override def + (elem1: A, elem2: A, elems: A*): HashSet[A] =
this + elem1 + elem2 ++ elems
override def union(that: GenSet[A]): HashSet[A] = that match {
case that: HashSet[A] =>
val buffer = new Array[HashSet[A]](bufferSize(this.size + that.size))
nullToEmpty(union0(that, 0, buffer, 0))
case _ => super.union(that)
}
override def intersect(that: GenSet[A]): HashSet[A] = that match {
case that: HashSet[A] =>
val buffer = new Array[HashSet[A]](bufferSize(this.size min that.size))
nullToEmpty(intersect0(that, 0, buffer, 0))
case _ => super.intersect(that)
}
override def diff(that: GenSet[A]): HashSet[A] = that match {
case that: HashSet[A] =>
val buffer = new Array[HashSet[A]](bufferSize(this.size))
nullToEmpty(diff0(that, 0, buffer, 0))
case _ => super.diff(that)
}
/**
* Union with a leaf HashSet at a given level.
* @param that a leaf HashSet
* @param level the depth in the tree. We need this when we have to create a branch node on top of this and that
* @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained
* HashSet but needs to be stored at the correct depth
*/
private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = {
// the default implementation is for the empty set, so we just return that
that
}
/**
* Union with a HashSet at a given level
* @param that a HashSet
* @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
* @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
* @param offset0 the first offset into the buffer in which we are allowed to write
* @return The union of this and that at the given level. Unless level is zero, the result is not a self-contained
* HashSet but needs to be stored at the correct depth
*/
private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
// the default implementation is for the empty set, so we just return that
that
}
/**
* Intersection with another hash set at a given level
* @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
* @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
* @param offset0 the first offset into the buffer in which we are allowed to write
* @return The intersection of this and that at the given level. Unless level is zero, the result is not a
* self-contained HashSet but needs to be stored at the correct depth
*/
private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
// the default implementation is for the empty set, so we just return the empty set
null
}
/**
* Diff with another hash set at a given level
* @param level the depth in the tree. We need to keep track of the level to know how deep we are in the tree
* @param buffer a temporary buffer that is used for temporarily storing elements when creating new branch nodes
* @param offset0 the first offset into the buffer in which we are allowed to write
* @return The diff of this and that at the given level. Unless level is zero, the result is not a
* self-contained HashSet but needs to be stored at the correct depth
*/
private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
// the default implementation is for the empty set, so we just return the empty set
null
}
def - (e: A): HashSet[A] =
nullToEmpty(removed0(e, computeHash(e), 0))
override def filter(p: A => Boolean) = {
val buffer = new Array[HashSet[A]](bufferSize(size))
nullToEmpty(filter0(p, false, 0, buffer, 0))
}
override def filterNot(p: A => Boolean) = {
val buffer = new Array[HashSet[A]](bufferSize(size))
nullToEmpty(filter0(p, true, 0, buffer, 0))
}
protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = null
protected def elemHashCode(key: A) = key.##
protected final def improve(hcode: Int) = {
var h: Int = hcode + ~(hcode << 9)
h = h ^ (h >>> 14)
h = h + (h << 4)
h ^ (h >>> 10)
}
private[collection] def computeHash(key: A) = improve(elemHashCode(key))
protected def get0(key: A, hash: Int, level: Int): Boolean = false
def updated0(key: A, hash: Int, level: Int): HashSet[A] =
new HashSet.HashSet1(key, hash)
protected def removed0(key: A, hash: Int, level: Int): HashSet[A] = this
protected def writeReplace(): AnyRef = new HashSet.SerializationProxy(this)
override def toSet[B >: A]: Set[B] = this.asInstanceOf[HashSet[B]]
}
/** $factoryInfo
* @define Coll `immutable.HashSet`
* @define coll immutable hash set
*
* @author Tiark Rompf
* @since 2.3
* @define Coll `immutable.HashSet`
* @define coll immutable hash set
* @define mayNotTerminateInf
* @define willNotTerminateInf
*/
object HashSet extends ImmutableSetFactory[HashSet] {
/** $setCanBuildFromInfo */
implicit def canBuildFrom[A]: CanBuildFrom[Coll, A, HashSet[A]] = setCanBuildFrom[A]
private object EmptyHashSet extends HashSet[Any] { }
private[collection] def emptyInstance: HashSet[Any] = EmptyHashSet
// utility method to create a HashTrieSet from two leaf HashSets (HashSet1 or HashSetCollision1) with non-colliding hash code)
private def makeHashTrieSet[A](hash0:Int, elem0:HashSet[A], hash1:Int, elem1:HashSet[A], level:Int) : HashTrieSet[A] = {
val index0 = (hash0 >>> level) & 0x1f
val index1 = (hash1 >>> level) & 0x1f
if(index0 != index1) {
val bitmap = (1 << index0) | (1 << index1)
val elems = new Array[HashSet[A]](2)
if(index0 < index1) {
elems(0) = elem0
elems(1) = elem1
} else {
elems(0) = elem1
elems(1) = elem0
}
new HashTrieSet[A](bitmap, elems, elem0.size + elem1.size)
} else {
val elems = new Array[HashSet[A]](1)
val bitmap = (1 << index0)
val child = makeHashTrieSet(hash0, elem0, hash1, elem1, level + 5)
elems(0) = child
new HashTrieSet[A](bitmap, elems, child.size)
}
}
/**
* Common superclass of HashSet1 and HashSetCollision1, which are the two possible leaves of the Trie
*/
private[HashSet] sealed abstract class LeafHashSet[A] extends HashSet[A] {
private[HashSet] def hash:Int
}
class HashSet1[A](private[HashSet] val key: A, private[HashSet] val hash: Int) extends LeafHashSet[A] {
override def size = 1
override def get0(key: A, hash: Int, level: Int): Boolean =
(hash == this.hash && key == this.key)
override def subsetOf0(that: HashSet[A], level: Int) = {
// check if that contains this.key
// we use get0 with our key and hash at the correct level instead of calling contains,
// which would not work since that might not be a top-level HashSet
// and in any case would be inefficient because it would require recalculating the hash code
that.get0(key, hash, level)
}
override def updated0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash && key == this.key) this
else {
if (hash != this.hash) {
makeHashTrieSet(this.hash, this, hash, new HashSet1(key, hash), level)
} else {
// 32-bit hash collision (rare, but not impossible)
new HashSetCollision1(hash, ListSet.empty + this.key + key)
}
}
override private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = that match {
case that if that.hash != this.hash =>
// different hash code, so there is no need to investigate further.
// Just create a branch node containing the two.
makeHashTrieSet(this.hash, this, that.hash, that, level)
case that: HashSet1[A] =>
if (this.key == that.key) {
this
} else {
// 32-bit hash collision (rare, but not impossible)
new HashSetCollision1[A](hash, ListSet.empty + this.key + that.key)
}
case that: HashSetCollision1[A] =>
val ks1 = that.ks + key
// Could use eq check (faster) if ListSet was guaranteed to return itself
if (ks1.size == that.ks.size) {
that
} else {
new HashSetCollision1[A](hash, ks1)
}
}
override private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int) = {
// switch to the Leaf version of union
// we can exchange the arguments because union is symmetrical
that.union0(this, level)
}
override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
if (that.get0(key, hash, level)) this else null
override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
if (that.get0(key, hash, level)) null else this
override def removed0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash && key == this.key) null else this
override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] =
if (negate ^ p(key)) this else null
override def iterator: Iterator[A] = Iterator(key)
override def foreach[U](f: A => U): Unit = f(key)
}
private[immutable] class HashSetCollision1[A](private[HashSet] val hash: Int, val ks: ListSet[A]) extends LeafHashSet[A] {
override def size = ks.size
override def get0(key: A, hash: Int, level: Int): Boolean =
if (hash == this.hash) ks.contains(key) else false
override def subsetOf0(that: HashSet[A], level: Int) = {
// we have to check each element
// we use get0 with our hash at the correct level instead of calling contains,
// which would not work since that might not be a top-level HashSet
// and in any case would be inefficient because it would require recalculating the hash code
ks.forall(key => that.get0(key, hash, level))
}
override def updated0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash) new HashSetCollision1(hash, ks + key)
else makeHashTrieSet(this.hash, this, hash, new HashSet1(key, hash), level)
override def union0(that: LeafHashSet[A], level: Int): HashSet[A] = that match {
case that if that.hash != this.hash =>
// different hash code, so there is no need to investigate further.
// Just create a branch node containing the two.
makeHashTrieSet(this.hash, this, that.hash, that, level)
case that: HashSet1[A] =>
val ks1 = ks + that.key
// Could use eq check (faster) if ListSet was guaranteed to return itself
if (ks1.size == ks.size) {
this
} else {
// create a new HashSetCollision with the existing hash
// we don't have to check for size=1 because union is never going to remove elements
new HashSetCollision1[A](hash, ks1)
}
case that: HashSetCollision1[A] =>
val ks1 = this.ks ++ that.ks
ks1.size match {
case size if size == this.ks.size =>
// could this check be made faster by doing an eq check?
// I am not sure we can rely on ListSet returning itself when all elements are already in the set,
// so it seems unwise to rely on it.
this
case size if size == that.ks.size =>
// we have to check this as well, since we don't want to create a new instance if this is a subset of that
that
case _ =>
// create a new HashSetCollision with the existing hash
// we don't have to check for size=1 because union is never going to remove elements
new HashSetCollision1[A](hash, ks1)
}
}
override def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
case that: LeafHashSet[A] =>
// switch to the simpler Tree/Leaf implementation
this.union0(that, level)
case that: HashTrieSet[A] =>
// switch to the simpler Tree/Leaf implementation
// we can swap this and that because union is symmetrical
that.union0(this, level)
case _ => this
}
override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
// filter the keys, taking advantage of the fact that we know their hash code
val ks1 = ks.filter(that.get0(_, hash, level))
ks1.size match {
case 0 =>
// the empty set
null
case size if size == this.size =>
// unchanged
// We do this check first since even if the result is of size 1 since
// it is preferable to return the existing set for better structural sharing
this
case size if size == that.size =>
// the other set
// We do this check first since even if the result is of size 1 since
// it is preferable to return the existing set for better structural sharing
that
case 1 =>
// create a new HashSet1 with the hash we already know
new HashSet1(ks1.head, hash)
case _ =>
// create a new HashSetCollision with the hash we already know and the new keys
new HashSetCollision1(hash, ks1)
}
}
override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
val ks1 = ks.filterNot(that.get0(_, hash, level))
ks1.size match {
case 0 =>
// the empty set
null
case size if size == this.size =>
// unchanged
// We do this check first since even if the result is of size 1 since
// it is preferable to return the existing set for better structural sharing
this
case 1 =>
// create a new HashSet1 with the hash we already know
new HashSet1(ks1.head, hash)
case _ =>
// create a new HashSetCollision with the hash we already know and the new keys
new HashSetCollision1(hash, ks1)
}
}
override def removed0(key: A, hash: Int, level: Int): HashSet[A] =
if (hash == this.hash) {
val ks1 = ks - key
ks1.size match {
case 0 =>
// the empty set
null
case 1 =>
// create a new HashSet1 with the hash we already know
new HashSet1(ks1.head, hash)
case size if size == ks.size =>
// Should only have HSC1 if size > 1
this
case _ =>
// create a new HashSetCollision with the hash we already know and the new keys
new HashSetCollision1(hash, ks1)
}
} else this
override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
val ks1 = if(negate) ks.filterNot(p) else ks.filter(p)
ks1.size match {
case 0 =>
null
case 1 =>
new HashSet1(ks1.head, hash)
case x if x == ks.size =>
this
case _ =>
new HashSetCollision1(hash, ks1)
}
}
override def iterator: Iterator[A] = ks.iterator
override def foreach[U](f: A => U): Unit = ks.foreach(f)
private def writeObject(out: java.io.ObjectOutputStream) {
// this cannot work - reading things in might produce different
// hash codes and remove the collision. however this is never called
// because no references to this class are ever handed out to client code
// and HashTrieSet serialization takes care of the situation
sys.error("cannot serialize an immutable.HashSet where all items have the same 32-bit hash code")
//out.writeObject(kvs)
}
private def readObject(in: java.io.ObjectInputStream) {
sys.error("cannot deserialize an immutable.HashSet where all items have the same 32-bit hash code")
//kvs = in.readObject().asInstanceOf[ListSet[A]]
//hash = computeHash(kvs.)
}
}
/**
* A branch node of the HashTrieSet with at least one and up to 32 children.
*
* @param bitmap encodes which element corresponds to which child
* @param elems the up to 32 children of this node.
* the number of children must be identical to the number of 1 bits in bitmap
* @param size0 the total number of elements. This is stored just for performance reasons.
* @tparam A the type of the elements contained in this hash set.
*
* How levels work:
*
* When looking up or adding elements, the part of the hashcode that is used to address the children array depends
* on how deep we are in the tree. This is accomplished by having a level parameter in all internal methods
* that starts at 0 and increases by 5 (32 = 2^5) every time we go deeper into the tree.
*
* hashcode (binary): 00000000000000000000000000000000
* level=0 (depth=0) ^^^^^
* level=5 (depth=1) ^^^^^
* level=10 (depth=2) ^^^^^
* ...
*
* Be careful: a non-toplevel HashTrieSet is not a self-contained set, so e.g. calling contains on it will not work!
* It relies on its depth in the Trie for which part of a hash to use to address the children, but this information
* (the level) is not stored due to storage efficiency reasons but has to be passed explicitly!
*
* How bitmap and elems correspond:
*
* A naive implementation of a HashTrieSet would always have an array of size 32 for children and leave the unused
* children empty (null). But that would be very wasteful regarding memory. Instead, only non-empty children are
* stored in elems, and the bitmap is used to encode which elem corresponds to which child bucket. The lowest 1 bit
* corresponds to the first element, the second-lowest to the second, etc.
*
* bitmap (binary): 00010000000000000000100000000000
* elems: [a,b]
* children: ---b----------------a-----------
*/
class HashTrieSet[A](private val bitmap: Int, private[collection] val elems: Array[HashSet[A]], private val size0: Int)
extends HashSet[A] {
assert(Integer.bitCount(bitmap) == elems.length)
// assertion has to remain disabled until SI-6197 is solved
// assert(elems.length > 1 || (elems.length == 1 && elems(0).isInstanceOf[HashTrieSet[_]]))
override def size = size0
override def get0(key: A, hash: Int, level: Int): Boolean = {
val index = (hash >>> level) & 0x1f
val mask = (1 << index)
if (bitmap == - 1) {
elems(index & 0x1f).get0(key, hash, level + 5)
} else if ((bitmap & mask) != 0) {
val offset = Integer.bitCount(bitmap & (mask-1))
elems(offset).get0(key, hash, level + 5)
} else
false
}
override def updated0(key: A, hash: Int, level: Int): HashSet[A] = {
val index = (hash >>> level) & 0x1f
val mask = (1 << index)
val offset = Integer.bitCount(bitmap & (mask-1))
if ((bitmap & mask) != 0) {
val sub = elems(offset)
val subNew = sub.updated0(key, hash, level + 5)
if (sub eq subNew) this
else {
val elemsNew = new Array[HashSet[A]](elems.length)
Array.copy(elems, 0, elemsNew, 0, elems.length)
elemsNew(offset) = subNew
new HashTrieSet(bitmap, elemsNew, size + (subNew.size - sub.size))
}
} else {
val elemsNew = new Array[HashSet[A]](elems.length + 1)
Array.copy(elems, 0, elemsNew, 0, offset)
elemsNew(offset) = new HashSet1(key, hash)
Array.copy(elems, offset, elemsNew, offset + 1, elems.length - offset)
val bitmapNew = bitmap | mask
new HashTrieSet(bitmapNew, elemsNew, size + 1)
}
}
override private[immutable] def union0(that: LeafHashSet[A], level: Int): HashSet[A] = {
val index = (that.hash >>> level) & 0x1f
val mask = (1 << index)
val offset = Integer.bitCount(bitmap & (mask - 1))
if ((bitmap & mask) != 0) {
val sub = elems(offset)
val sub1 = sub.union0(that, level + 5)
if (sub eq sub1) this
else {
val elems1 = new Array[HashSet[A]](elems.length)
Array.copy(elems, 0, elems1, 0, elems.length)
elems1(offset) = sub1
new HashTrieSet(bitmap, elems1, size + (sub1.size - sub.size))
}
} else {
val elems1 = new Array[HashSet[A]](elems.length + 1)
Array.copy(elems, 0, elems1, 0, offset)
elems1(offset) = that
Array.copy(elems, offset, elems1, offset + 1, elems.length - offset)
val bitmap1 = bitmap | mask
new HashTrieSet(bitmap1, elems1, size + that.size)
}
}
override private[immutable] def union0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
case that if that eq this =>
// shortcut for when that is this
// this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
// e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
// then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
// at these nodes.
this
case that: LeafHashSet[A] =>
// when that is a leaf, we can switch to the simpler Tree/Leaf implementation
this.union0(that, level)
case that: HashTrieSet[A] =>
val a = this.elems
var abm = this.bitmap
var ai = 0
val b = that.elems
var bbm = that.bitmap
var bi = 0
// fetch a new temporary array that is guaranteed to be big enough (32 elements)
var offset = offset0
var rs = 0
// loop as long as there are bits left in either abm or bbm
while ((abm | bbm) != 0) {
// lowest remaining bit in abm
val alsb = abm ^ (abm & (abm - 1))
// lowest remaining bit in bbm
val blsb = bbm ^ (bbm & (bbm - 1))
if (alsb == blsb) {
val sub1 = a(ai).union0(b(bi), level + 5, buffer, offset)
rs += sub1.size
buffer(offset) = sub1
offset += 1
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb
ai += 1
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb
bi += 1
} else if (unsignedCompare(alsb - 1, blsb - 1)) {
// alsb is smaller than blsb, or alsb is set and blsb is 0
// in any case, alsb is guaranteed to be set here!
val sub1 = a(ai)
rs += sub1.size
buffer(offset) = sub1
offset += 1
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb
ai += 1
} else {
// blsb is smaller than alsb, or blsb is set and alsb is 0
// in any case, blsb is guaranteed to be set here!
val sub1 = b(bi)
rs += sub1.size
buffer(offset) = sub1
offset += 1
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb
bi += 1
}
}
if (rs == this.size) {
// if the result would be identical to this, we might as well return this
this
} else if (rs == that.size) {
// if the result would be identical to that, we might as well return that
that
} else {
// we don't have to check whether the result is a leaf, since union will only make the set larger
// and this is not a leaf to begin with.
val length = offset - offset0
val elems = new Array[HashSet[A]](length)
System.arraycopy(buffer, offset0, elems, 0, length)
new HashTrieSet(this.bitmap | that.bitmap, elems, rs)
}
case _ => this
}
override private[immutable] def intersect0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
case that if that eq this =>
// shortcut for when that is this
// this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
// e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
// then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
// at these nodes!
this
case that: LeafHashSet[A] =>
// when that is a leaf, we can switch to the simpler Tree/Leaf implementation
// it is OK to swap the arguments because intersect is symmetric
// (we can't do this in case of diff, which is not symmetric)
that.intersect0(this, level, buffer, offset0)
case that: HashTrieSet[A] =>
val a = this.elems
var abm = this.bitmap
var ai = 0
val b = that.elems
var bbm = that.bitmap
var bi = 0
// if the bitmasks do not overlap, the result is definitely empty so we can abort here
if ((abm & bbm) == 0)
return null
// fetch a new temporary array that is guaranteed to be big enough (32 elements)
var offset = offset0
var rs = 0
var rbm = 0
// loop as long as there are bits left that are set in both abm and bbm
while ((abm & bbm) != 0) {
// highest remaining bit in abm
val alsb = abm ^ (abm & (abm - 1))
// highest remaining bit in bbm
val blsb = bbm ^ (bbm & (bbm - 1))
if (alsb == blsb) {
val sub1 = a(ai).intersect0(b(bi), level + 5, buffer, offset)
if (sub1 ne null) {
rs += sub1.size
rbm |= alsb
buffer(offset) = sub1
offset += 1
}
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb;
ai += 1
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb;
bi += 1
} else if (unsignedCompare(alsb - 1, blsb - 1)) {
// alsb is smaller than blsb, or alsb is set and blsb is 0
// in any case, alsb is guaranteed to be set here!
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb;
ai += 1
} else {
// blsb is smaller than alsb, or blsb is set and alsb is 0
// in any case, blsb is guaranteed to be set here!
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb;
bi += 1
}
}
if (rbm == 0) {
// if the result bitmap is empty, the result is the empty set
null
} else if (rs == size0) {
// if the result has the same number of elements as this, it must be identical to this,
// so we might as well return this
this
} else if (rs == that.size0) {
// if the result has the same number of elements as that, it must be identical to that,
// so we might as well return that
that
} else {
val length = offset - offset0
if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]])
buffer(offset0)
else {
val elems = new Array[HashSet[A]](length)
System.arraycopy(buffer, offset0, elems, 0, length)
new HashTrieSet[A](rbm, elems, rs)
}
}
case _ => null
}
override private[immutable] def diff0(that: HashSet[A], level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = that match {
case that if that eq this =>
// shortcut for when that is this
// this happens often for nodes deeper in the tree, especially when that and this share a common "heritage"
// e.g. you have a large set A and do some small operations (adding and removing elements) to it to create B
// then A and B will have the vast majority of nodes in common, and this eq check will allow not even looking
// at these nodes!
null
case that: HashSet1[A] =>
removed0(that.key, that.hash, level)
case that: HashTrieSet[A] =>
val a = this.elems
var abm = this.bitmap
var ai = 0
val b = that.elems
var bbm = that.bitmap
var bi = 0
// fetch a new temporary array that is guaranteed to be big enough (32 elements)
var offset = offset0
var rs = 0
var rbm = 0
// loop until there are no more bits in abm
while(abm!=0) {
// highest remaining bit in abm
val alsb = abm ^ (abm & (abm - 1))
// highest remaining bit in bbm
val blsb = bbm ^ (bbm & (bbm - 1))
if (alsb == blsb) {
val sub1 = a(ai).diff0(b(bi), level + 5, buffer, offset)
if (sub1 ne null) {
rs += sub1.size
rbm |= alsb
buffer(offset) = sub1
offset += 1
}
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb; ai += 1
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb; bi += 1
} else if (unsignedCompare(alsb - 1, blsb - 1)) {
// alsb is smaller than blsb, or alsb is set and blsb is 0
// in any case, alsb is guaranteed to be set here!
val sub1 = a(ai)
rs += sub1.size
rbm |= alsb
buffer(offset) = sub1; offset += 1
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb; ai += 1
} else {
// blsb is smaller than alsb, or blsb is set and alsb is 0
// in any case, blsb is guaranteed to be set here!
// clear lowest remaining one bit in bbm and increase the b index
bbm &= ~blsb; bi += 1
}
}
if (rbm == 0) {
null
} else if (rs == this.size0) {
// if the result has the same number of elements as this, it must be identical to this,
// so we might as well return this
this
} else {
val length = offset - offset0
if (length == 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]])
buffer(offset0)
else {
val elems = new Array[HashSet[A]](length)
System.arraycopy(buffer, offset0, elems, 0, length)
new HashTrieSet[A](rbm, elems, rs)
}
}
case that: HashSetCollision1[A] =>
// we remove the elements using removed0 so we can use the fact that we know the hash of all elements
// to be removed
@tailrec def removeAll(s:HashSet[A], r:ListSet[A]) : HashSet[A] =
if(r.isEmpty || (s eq null)) s
else removeAll(s.removed0(r.head, that.hash, level), r.tail)
removeAll(this, that.ks)
case _ => this
}
override def removed0(key: A, hash: Int, level: Int): HashSet[A] = {
val index = (hash >>> level) & 0x1f
val mask = (1 << index)
val offset = Integer.bitCount(bitmap & (mask-1))
if ((bitmap & mask) != 0) {
val sub = elems(offset)
val subNew = sub.removed0(key, hash, level + 5)
if (sub eq subNew) this
else if (subNew eq null) {
val bitmapNew = bitmap ^ mask
if (bitmapNew != 0) {
val elemsNew = new Array[HashSet[A]](elems.length - 1)
Array.copy(elems, 0, elemsNew, 0, offset)
Array.copy(elems, offset + 1, elemsNew, offset, elems.length - offset - 1)
val sizeNew = size - sub.size
// if we have only one child, which is not a HashTrieSet but a self-contained set like
// HashSet1 or HashSetCollision1, return the child instead
if (elemsNew.length == 1 && !elemsNew(0).isInstanceOf[HashTrieSet[_]])
elemsNew(0)
else
new HashTrieSet(bitmapNew, elemsNew, sizeNew)
} else
null
} else if(elems.length == 1 && !subNew.isInstanceOf[HashTrieSet[_]]) {
subNew
} else {
val elemsNew = new Array[HashSet[A]](elems.length)
Array.copy(elems, 0, elemsNew, 0, elems.length)
elemsNew(offset) = subNew
val sizeNew = size + (subNew.size - sub.size)
new HashTrieSet(bitmap, elemsNew, sizeNew)
}
} else {
this
}
}
override def subsetOf0(that: HashSet[A], level: Int): Boolean = if (that eq this) true else that match {
case that: HashTrieSet[A] if this.size0 <= that.size0 =>
// create local mutable copies of members
var abm = this.bitmap
val a = this.elems
var ai = 0
val b = that.elems
var bbm = that.bitmap
var bi = 0
if ((abm & bbm) == abm) {
// I tried rewriting this using tail recursion, but the generated java byte code was less than optimal
while(abm!=0) {
// highest remaining bit in abm
val alsb = abm ^ (abm & (abm - 1))
// highest remaining bit in bbm
val blsb = bbm ^ (bbm & (bbm - 1))
// if both trees have a bit set at the same position, we need to check the subtrees
if (alsb == blsb) {
// we are doing a comparison of a child of this with a child of that,
// so we have to increase the level by 5 to keep track of how deep we are in the tree
if (!a(ai).subsetOf0(b(bi), level + 5))
return false
// clear lowest remaining one bit in abm and increase the a index
abm &= ~alsb; ai += 1
}
// clear lowermost remaining one bit in bbm and increase the b index
// we must do this in any case
bbm &= ~blsb; bi += 1
}
true
} else {
// the bitmap of this contains more one bits than the bitmap of that,
// so this can not possibly be a subset of that
false
}
case _ =>
// if the other set is a HashTrieSet but has less elements than this, it can not be a subset
// if the other set is a HashSet1, we can not be a subset of it because we are a HashTrieSet with at least two children (see assertion)
// if the other set is a HashSetCollision1, we can not be a subset of it because we are a HashTrieSet with at least two different hash codes
// if the other set is the empty set, we are not a subset of it because we are not empty
false
}
override protected def filter0(p: A => Boolean, negate: Boolean, level: Int, buffer: Array[HashSet[A]], offset0: Int): HashSet[A] = {
// current offset
var offset = offset0
// result size
var rs = 0
// bitmap for kept elems
var kept = 0
// loop over all elements
var i = 0
while (i < elems.length) {
val result = elems(i).filter0(p, negate, level + 5, buffer, offset)
if (result ne null) {
buffer(offset) = result
offset += 1
// add the result size
rs += result.size
// mark the bit i as kept
kept |= (1 << i)
}
i += 1
}
if (offset == offset0) {
// empty
null
} else if (rs == size0) {
// unchanged
this
} else if (offset == offset0 + 1 && !buffer(offset0).isInstanceOf[HashTrieSet[A]]) {
// leaf
buffer(offset0)
} else {
// we have to return a HashTrieSet
val length = offset - offset0
val elems1 = new Array[HashSet[A]](length)
System.arraycopy(buffer, offset0, elems1, 0, length)
val bitmap1 = if (length == elems.length) {
// we can reuse the original bitmap
bitmap
} else {
// calculate new bitmap by keeping just bits in the kept bitmask
keepBits(bitmap, kept)
}
new HashTrieSet(bitmap1, elems1, rs)
}
}
override def iterator = new TrieIterator[A](elems.asInstanceOf[Array[Iterable[A]]]) {
final override def getElem(cc: AnyRef): A = cc.asInstanceOf[HashSet1[A]].key
}
override def foreach[U](f: A => U): Unit = {
var i = 0
while (i < elems.length) {
elems(i).foreach(f)
i += 1
}
}
}
/**
* Calculates the maximum buffer size given the maximum possible total size of the trie-based collection
* @param size the maximum size of the collection to be generated
* @return the maximum buffer size
*/
@inline private def bufferSize(size: Int): Int = (size + 6) min (32 * 7)
/**
* In many internal operations the empty set is represented as null for performance reasons. This method converts
* null to the empty set for use in public methods
*/
@inline private def nullToEmpty[A](s: HashSet[A]): HashSet[A] = if (s eq null) empty[A] else s
/**
* Utility method to keep a subset of all bits in a given bitmap
*
* Example
* bitmap (binary): 00000001000000010000000100000001
* keep (binary): 1010
* result (binary): 00000001000000000000000100000000
*
* @param bitmap the bitmap
* @param keep a bitmask containing which bits to keep
* @return the original bitmap with all bits where keep is not 1 set to 0
*/
private def keepBits(bitmap: Int, keep: Int): Int = {
var result = 0
var current = bitmap
var kept = keep
while (kept != 0) {
// lowest remaining bit in current
val lsb = current ^ (current & (current - 1))
if ((kept & 1) != 0) {
// mark bit in result bitmap
result |= lsb
}
// clear lowest remaining one bit in abm
current &= ~lsb
// look at the next kept bit
kept >>>= 1
}
result
}
// unsigned comparison
@inline private[this] def unsignedCompare(i: Int, j: Int) =
(i < j) ^ (i < 0) ^ (j < 0)
@SerialVersionUID(2L) private class SerializationProxy[A,B](@transient private var orig: HashSet[A]) extends Serializable {
private def writeObject(out: java.io.ObjectOutputStream) {
val s = orig.size
out.writeInt(s)
for (e <- orig) {
out.writeObject(e)
}
}
private def readObject(in: java.io.ObjectInputStream) {
orig = empty
val s = in.readInt()
for (i <- 0 until s) {
val e = in.readObject().asInstanceOf[A]
orig = orig + e
}
}
private def readResolve(): AnyRef = orig
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy