dev.tauri.choam.internal.mcas.emcas.Emcas.scala Maven / Gradle / Ivy
/*
* SPDX-License-Identifier: Apache-2.0
* Copyright 2016-2024 Daniel Urban and contributors listed in NOTICE.txt
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package dev.tauri.choam
package internal
package mcas
package emcas
import java.lang.ref.{ Reference, WeakReference }
private[mcas] object Emcas {
/** For testing */
val inst: Emcas =
Mcas.internalEmcas
}
/**
* Efficient Multi-word Compare and Swap (EMCAS):
* https://arxiv.org/pdf/2008.02527.pdf
*/
private[mcas] final class Emcas extends GlobalContext { global =>
/*
* This implementation has a few important
* differences from the one described in the paper.
*
*
* ### Markers ###
*
* The paper which describes EMCAS omits detailed
* description of the memory management technique.
* However, as mentioned in the paper, memory management
* is important not just for the performance, but for the
* correctness of the algorithm.
*
* A finished EMCAS operation leaves `EmcasWordDesc`s
* in the `MemoryLocation`s it touches. These can only
* be removed (i.e., detached, replaced by the final value
* or a new descriptor) if no other thread (helpers, or the
* original thread, in case a helper completes the op) uses
* the `EmcasWordDesc` any more. If they are removed while
* still in use, that can cause ABA-problems. (Also, if
* existing descriptor objects would be reused, it would cause
* other problems too. However, currently only fresh
* descriptors are used.)
*
* A specific scenario which would cause a problem:
* - T1 starts an op [(r1, "a", "b"), (r2, "x", "y")]
* - Checks version and current value of r1, ok, so it
* installs the WD into r1.
* - Checks version and current value of r2, ok.
* - T1 "pauses" right before the CAS to install WD
* into r2.
* - T2 helps, finalizes the op.
* - T2 executes unrelated op, which changes r2 back
* to "x", finalizes.
* - T2 detaches the WD from r2, so now r2="x".
* - T1 continues execution, executes the CAS to
* install WD into r2, which succeeds (since r2="x").
* But this is incorrect, the version of r2 changed
* since it checked.
* So versions by themselves don't save us from ABA-problems,
* because we can't CAS them together with the values.
* TODO: We could, if we had a double-word-CAS (or would implement
* TODO: something like RDCSS or GCAS).
* But instead we use marks...
*
* To guarantee that in-use descriptors are never replaced,
* every thread (the original and any helpers) must always
* hold a strong reference to the "mark(er)" associated with
* the descriptor. This way, if the marker is collected by
* the JVM GC, we can be sure, that the corresponding descriptor
* can be replaced.
*
* Markers are manipulated with the `unsafeGetMarkerV` and
* `unsafeCasMarkerV` methods of `MemoryLocation`. The
* marker of a `MemoryLocation` can be in the following states:
*
* null - `unsafeGetMarkerV` returns null, i.e.,
* not even a `WeakReference` object exists.
*
* empty - `unsafeGetMarkerV` returns an empty,
* i.e., cleared `WeakReference` object.
*
* full - `unsafeGetMarkerV` returns a full, i.e.,
* not cleared `WeakReference` object.
*
* The other methods (e.g., `unsafeGetV`) manipulate the "content"
* of the `MemoryLocation`. If the content is a "value", that can be
* freely replaced (with a CAS) by a descriptor during an operation.
* (But the new descriptor must have a mark.) The content can have the
* following possible states:
*
* value - a user-supplied value (possibly including `null`);
* this state includes anything that is not a `EmcasWordDesc`.
*
* descriptor - a (non-null) `EmcasWordDesc` object.
*
* Thus, the possible states (and their meaning) of a
* `MemoryLocation` are as follows:
*
* content = value
* marker = null
* Meaning: this is the initial state, or the one
* after full cleanup. A new descriptor can be freely
* stored, but before that, a new mark also needs to be
* installed.
*
* content = value
* marker = empty
* Meaning: a descriptor previously was in the location,
* but it was already replaced. The empty marker can be
* freely deleted or replaced by a new one.
*
* content = value
* marker = full
* Meaning: a new marker have been installed, but then the
* descriptor wasn't installed (due to a race; see comment at
* the end of `tryWord`); the marker can be reused next time.
*
* content = descriptor
* marker = null
* Meaning: the descriptor is not in use any more, and the
* `WeakReference` object was also cleared up; otherwise
* see below.
*
* content = descriptor
* marker = empty
* Meaning: the descriptor is not in use any more, and can
* be replaced by the final value, or another descriptor
* (but before installing a new descriptor, a new marker must
* be also installed).
*
* content = descriptor
* marker = full
* Meaning: the descriptor is (possibly) in use, and cannot be
* replaced, except CAS-ing another descriptor with the
* same mark in its place.
*
*
* ### Versions ###
*
* To provide opacity, we need to validate previously read
* values when we read from a new ref. To be able to do this
* quickly, refs also store their version. The EMCAS impl.
* has a global commit counter (commit-ts, commit version),
* which is a `Long` (a few values are reserved and have a special
* meaning, see `Version.isValid`). The version of a ref is the
* commit version which last changed the ref (or `Version.Start` if it
* was never changed). This system is based on the one in SwissTM
* (https://web.archive.org/web/20220215230304/https://www.researchgate.net/profile/Aleksandar-Dragojevic/publication/37470225_Stretching_Transactional_Memory/links/0912f50d430e2cf991000000/Stretching-Transactional-Memory.pdf),
* although this implementation is lock-free.
*
* On top of this version-based validation system, we implement
* an optimization from the paper "Commit Phase in Timestamp-based STM"
* (https://web.archive.org/web/20220302005715/https://www.researchgate.net/profile/Zoran-Budimlic/publication/221257687_Commit_phase_in_timestamp-based_stm/links/004635254086f87ab9000000/Commit-phase-in-timestamp-based-stm.pdf).
* We allow ops to *share* a commit-ts (if they do not conflict).
* Our implementation is a lock-free version of algorithm "V1" from
* the paper.
*
* TODO: Consider using V4 (instead of V1) from the paper.
*
* The proof of correctness in the paper needs some changes for our system:
* - The proof of "Lemma 1" considers three possible scenarios:
* - The first one is not possible for us not due to read-locking,
* but because at t₂ the validation performed by T₁ would help
* T₂ finish, and then the validation would fail (since O is
* already in the log of T₁).
* - The second one works the same way, except that if T₂ hasn't
* committed yet, the OPEN by T₁ will help it commit.
* - In the third scenario, if t₄ < t₅, then the OPEN at t₄ will
* help the commit (so in fact t₄ = t₅), and revalidate T₁, so
* that's fine. The other 2 cases work essentially the same.
* (Note: the third scenario in the paper seems to have a typo,
* "t₃ ≤ t₄ < t₂" should be "t₂ < t₄ ≤ t₃".)
* - The proof of "Theorem 1" works essentially the same.
*
* Versions (both of a ref and the global one) are always
* monotonically increasing.
*
* To support reading/writing of versions, a ref has the
* `unsafeGetVersionV` and `unsafeCmpxchgVersionV`
* methods. However, the version accessible by these is
* only correct, if the ref currently stores a value (and not
* a descriptor, see above). If it stores a descriptor, the
* current (logical) version is the one in the descriptor
* (the old or new version, depending on the state of the op).
* Thus, refs can have the following states:
*
* content = value
* version = any valid version
* Meaning: the currently physically stored version is
* the same as the logical version.
*
* content = descriptor with parent status `McasStatus.Active`
* version = (don't care)
* Meaning: the logical version is the OLD version in the desc
* (although, we never use the old version in this case,
* instead we always help the active op; this is
* important to allow version sharing); this is an op
* which is still active
*
* content = descriptor with parent status `McasStatus.FailedVal`
* version = (don't care)
* Meaning: the logical version is the OLD version in the desc;
* this is an op which already failed
*
* content = descriptor with any parent status `s` for which
* `EmcasStatus.isSuccessful(s)` is true
* version = (don't care)
* Meaning: the logical version is the NEW version in the desc,
* which is stored indirectly: the version is the parent
* status itself; this is a successful op
*/
// TODO: Most accesses here (and elsewhere) are volatile;
// TODO: figure out if we can use acq/rel, and still remain
// TODO: correct.
// Listing 2 in the paper:
/**
* A specialized version of `readInternal` from the paper
*
* Only returns the actual value (after possibly helping).
* Cannot be called from an ongoing MCAS operation (but
* can be called when we're only reading).
*
* (The other version of `readInternal`, specialized for
* an ongoing MCAS operation is inlined into `tryWord` below,
* see the `while` loop.)
*
* @param ref: The [[MemoryLocation]] to read from.
* @param ctx: The [[ThreadContext]] of the current thread.
* @param replace: Pass `false` to not do any replacing/clearing.
*/
private[this] final def readValue[A](ref: MemoryLocation[A], ctx: EmcasThreadContext, replace: Boolean): LogEntry[A] = {
@tailrec
def go(mark: AnyRef, ver1: Long): LogEntry[A] = {
ref.unsafeGetV() match {
case wd: EmcasWordDesc[_] =>
if (mark eq null) {
// not holding it yet
val weakref = ref.unsafeGetMarkerV()
val m = if (weakref ne null) weakref.get() else null
if (m ne null) {
// we're holding it, re-read the descriptor:
go(mark = m, ver1 = ver1)
} else { // m eq null (from either a cleared or a removed weakref)
// descriptor can be detached
val parent = wd.parent
val parentStatus = parent.getStatusV()
if (parentStatus == McasStatus.Active) {
// active op without a mark: this can
// happen if a thread died during an op;
// we help the active op, then retry ours:
helpMCASnoMCAS(parent, ctx = ctx)
go(mark = null, ver1 = ver1)
} else { // finalized op
val successful = (parentStatus != McasStatus.FailedVal) && (parentStatus != EmcasStatus.CycleDetected)
val a = if (successful) wd.cast[A].nv else wd.cast[A].ov
val currVer = if (successful) parentStatus else wd.oldVersion
// marker is null, so we can replace the descriptor:
this.maybeReplaceDescriptor[A](
ref,
wd.cast[A],
a,
weakref = weakref,
replace = replace,
currentVersion = currVer,
)
LogEntry(ref, ov = a, nv = a, version = currVer)
}
}
} else { // mark ne null
// OK, we're already holding the descriptor
val parent = wd.parent
val parentStatus = parent.getStatusV()
if (parentStatus == McasStatus.Active) {
helpMCASnoMCAS(parent, ctx = ctx) // help the other op
go(mark = mark, ver1 = ver1) // retry
} else { // finalized
val successful = (parentStatus != McasStatus.FailedVal) && (parentStatus != EmcasStatus.CycleDetected)
val a = if (successful) wd.cast[A].nv else wd.cast[A].ov
val currVer = if (successful) parentStatus else wd.oldVersion
Reference.reachabilityFence(mark)
LogEntry(ref, ov = a, nv = a, version = currVer)
}
}
case a =>
val ver2 = ref.unsafeGetVersionV()
if (ver1 == ver2) {
LogEntry(ref, ov = a, nv = a, version = ver1)
} else {
go(mark = null, ver1 = ver2)
}
}
}
go(mark = null, ver1 = ref.unsafeGetVersionV())
}
private[this] final def maybeReplaceDescriptor[A](
ref: MemoryLocation[A],
ov: EmcasWordDesc[A],
nv: A,
weakref: WeakReference[AnyRef],
replace: Boolean,
currentVersion: Long,
): Unit = {
if (replace) {
replaceDescriptor[A](ref, ov, nv, weakref, currentVersion)
}
}
private[this] final def replaceDescriptor[A](
ref: MemoryLocation[A],
ov: EmcasWordDesc[A],
nv: A,
weakref: WeakReference[AnyRef],
currentVersion: Long,
): Unit = {
// *Before* replacing a finalized descriptor, we
// must write back the current version into the
// ref. (If we'd just replace the descriptor
// then we'd have an invalid (possibly really old)
// version.) We use CAS to write the version; this way
// if another thread starts and finishes another op,
// we don't overwrite the newer version. (Versions
// are always monotonically increasing.)
assert(currentVersion >= ov.oldVersion)
val currentInRef = ref.unsafeGetVersionV()
if (currentInRef < currentVersion) {
val wit = ref.unsafeCmpxchgVersionV(currentInRef, currentVersion)
if (wit == currentInRef) {
// We've successfully updated the version.
// Now we'll replace the descriptor with the final value.
// If this CAS fails, someone else might've
// replaced the desc with the final value, or
// maybe started another operation; in either case,
// there is nothing to do here.
ref.unsafeCmpxchgR(ov.castToData, nv) : Unit
// Possibly also clean up the weakref:
cleanWeakRef(ref, weakref)
} else {
assert(wit >= currentVersion)
// concurrent write, no need to replace the
// descriptor (see the comment below)
}
} else if (currentInRef == currentVersion) {
// version is already correct, but we'll still replace the desc;
// we don't care if this fails, see above:
ref.unsafeCmpxchgR(ov.castToData, nv) : Unit
cleanWeakRef(ref, weakref)
} // else:
// either a concurrent write to a newer version, in which
// case there is no need to replace the descriptor, as
// the newer operation will install a newer descriptor;
// or it is already correct, in which case there is a
// concurrent `replaceDescriptor` going on, so we let
// that one win and replace the descriptor
}
private[this] final def cleanWeakRef[A](ref: MemoryLocation[A], weakref: WeakReference[AnyRef]): Unit = {
if (weakref ne null) {
assert(weakref.get() eq null)
// We also delete the (now empty) `WeakReference`
// object, to help the GC. If this CAS fails,
// that means a new op already installed a new
// weakref; nothing to do here.
ref.unsafeCmpxchgMarkerR(weakref, null) : Unit
}
}
// TODO: this could have an optimized version, without creating a hwd
private[mcas] final def readDirect[A](ref: MemoryLocation[A], ctx: EmcasThreadContext): A = {
val hwd = readIntoHwd(ref, ctx)
hwd.nv
}
private[mcas] final def readIntoHwd[A](ref: MemoryLocation[A], ctx: EmcasThreadContext): LogEntry[A] = {
readValue(ref, ctx, replace = true)
}
private[mcas] final def readVersion[A](ref: MemoryLocation[A], ctx: EmcasThreadContext): Long = {
val v = readVersionInternal(ref, ctx, forMCAS = false, seen = 0L, instRo = false)
assert(Version.isValid(v))
v
}
@tailrec
private[this] final def readVersionInternal[A](
ref: MemoryLocation[A],
ctx: EmcasThreadContext,
forMCAS: Boolean,
seen: Long,
instRo: Boolean,
): Long = {
val ver1 = ref.unsafeGetVersionV()
ref.unsafeGetV() match {
case wd: EmcasWordDesc[_] =>
// TODO: we may need to hold the marker here!
val parent = wd.parent
val s = parent.getStatusV()
if (s == McasStatus.Active) {
// help:
if (forMCAS) {
if (helpMCASforMCAS(parent, ctx = ctx, seen = seen, instRo = instRo)) {
// Note: `forMCAS` is true here, so we can return a reserved version
EmcasStatus.CycleDetected
} else {
// retry:
readVersionInternal(ref, ctx, forMCAS = forMCAS, seen = seen, instRo = instRo)
}
} else {
helpMCASnoMCAS(parent, ctx = ctx)
// retry:
readVersionInternal(ref, ctx, forMCAS = forMCAS, seen = seen, instRo = instRo)
}
} else if ((s == McasStatus.FailedVal) || (s == EmcasStatus.CycleDetected)) {
wd.oldVersion
} else { // successful
s
}
case _ => // value
val ver2 = ref.unsafeGetVersionV()
if (ver1 == ver2) ver1
else readVersionInternal(ref, ctx, forMCAS = forMCAS, seen = seen, instRo = instRo) // retry
}
}
/** Returns `true` iff the helper MCAS should retry (due to a cycle) */
private[this] final def helpMCASforMCAS(
desc: EmcasDescriptor,
ctx: EmcasThreadContext,
seen: Long,
instRo: Boolean, // the helper's `instRo`!
): Boolean = {
if (MCAS(desc, ctx, seen) == EmcasStatus.CycleDetected) {
if (instRo) {
// we don't care that the op we helped has
// a cycle, we certainly don't have one
false
} else {
// cycle detected, and the helper could
// be part of the cycle, so it should retry:
true
}
} else {
// no cycle detected:
false
}
}
private[this] final def helpMCASnoMCAS(desc: EmcasDescriptor, ctx: EmcasThreadContext): Unit = {
// if we're NOT called from an ongoing MCAS,
// we don't really care if there is a cycle
// detected; we just want the descriptor
// out of the way to do our thing (whoever
// started the op which got into the cycle
// WILL care, and will retry):
helpMCASforMCAS(desc, ctx, seen = 0L, instRo = true) : Unit
}
/**
* Performs an MCAS operation ("Listing 3" in the EMCAS paper).
*
* @param desc The main descriptor.
* @param ctx The [[EMCASThreadContext]] of the current thread.
* @param seen A Bloom filter, which contains the `EmcasDescriptor`s
* we have seen so far (during the recursive helping).
* If it (seemingly) contains `desc`, then we return
* with `CycleDetected`. Otherwise we add `desc` to the
* set for further helping. (If `desc.instRo` is `true`,
* `desc` is excluded from this cycle detection, because
* it is certainly not part of a cycle.)
* @return The result of the MCAS operation on `desc`: the new version
* iff it was successful, `FailedVal` iff it failed, and `CycleDetected`
* iff a cycle was detected during helping (can only happen if
* `desc.instRo` is `false`). Postcondition: `desc` have been
* already finalized with the result which is returned.
*/
private[this] final def MCAS(desc: EmcasDescriptor, ctx: EmcasThreadContext, seen: Long): Long = {
val instRo = desc.instRo
@tailrec
def tryWord[A](wordDesc: EmcasWordDesc[A], newSeen: Long): Long = {
var content: A = nullOf[A]
var value: A = nullOf[A]
var weakref: WeakReference[AnyRef] = null
var mark: AnyRef = null
val address = wordDesc.address
var version: Long = address.unsafeGetVersionV()
var go = true
// Read `content`, and `value` if necessary;
// this is a specialized and inlined version
// of `readInternal` from the paper. We're
// using a `while` loop instead of a tail-recursive
// function (like in the paper), because we may
// need both `content` and `value`, and returning
// them would require allocating a tuple (like in
// the paper).
while (go) {
content = address.unsafeGetV()
content match {
case wd: EmcasWordDesc[_] =>
if (mark eq null) {
// not holding it yet
weakref = address.unsafeGetMarkerV()
mark = if (weakref ne null) weakref.get() else null
if (mark ne null) {
// continue with another iteration, and re-read the
// descriptor, while holding the mark
} else { // mark eq null
// the old descriptor is unused, could be detached
val parent = wd.parent
val parentStatus = parent.getStatusV()
if (parentStatus == McasStatus.Active) {
// active op without a mark: this can
// happen if a thread died during an op
if (wd eq wordDesc) {
// this is us!
// already points to the right place, early return:
return McasStatus.Successful // scalafix:ok
} else {
// we help the active op (who is not us)
if (helpMCASforMCAS(parent, ctx = ctx, seen = newSeen, instRo = instRo)) {
// oops, we'll have to finalize ourselves with CycleDetected too
// TODO: Do we really have to? The other one was finalized, doesn't that break the cycle?
return EmcasStatus.CycleDetected // scalafix:ok
} // else: then continue with another iteration
}
} else { // finalized op
if ((parentStatus == McasStatus.FailedVal) || (parentStatus == EmcasStatus.CycleDetected)) {
value = wd.cast[A].ov
version = wd.oldVersion
} else { // successful
value = wd.cast[A].nv
version = parentStatus
}
go = false
}
}
} else { // mark ne null
if (wd eq wordDesc) {
// this is us!
// already points to the right place, early return:
return McasStatus.Successful // scalafix:ok
} else {
// At this point, we're sure that `wd` belongs to another op
// (not `desc`), because otherwise it would've been equal to
// `wordDesc` (we're assuming that any EmcasWordDesc only
// appears at most once in an EmcasDescriptor).
val parent = wd.parent
val parentStatus = parent.getStatusV()
if (parentStatus == McasStatus.Active) {
// Help the other op; note: we're not "helping" ourselves
// for sure, see the comment above.
if (helpMCASforMCAS(parent, ctx = ctx, seen = newSeen, instRo = instRo)) {
// oops, we'll have to finalize ourselves with CycleDetected too
// TODO: Do we really have to? The other one was finalized, doesn't that break the cycle?
return EmcasStatus.CycleDetected // scalafix:ok
} // else: we helped, but we still don't have the value, so the loop must retry
} else if ((parentStatus == McasStatus.FailedVal) || (parentStatus == EmcasStatus.CycleDetected)) {
value = wd.cast[A].ov
version = wd.oldVersion
go = false
} else { // successful
value = wd.cast[A].nv
version = parentStatus
go = false
}
}
}
case a =>
value = a
val version2 = address.unsafeGetVersionV()
if (version == version2) {
// ok, we have a version that belongs to `value`
go = false
weakref = address.unsafeGetMarkerV()
// we found a value (i.e., not a descriptor)
if (weakref ne null) {
// in rare cases, `mark` could be non-null here
// (see below); but that is not a problem, we
// hold it here, and will use it for our descriptor
mark = weakref.get()
} else {
// we need to clear a possible non-null mark from
// a previous iteration when we found a descriptor:
mark = null
}
} else {
// couldn't read consistent versions for
// the value, will try again; start from
// the latest version we've read:
version = version2
}
}
}
// just to be safe:
assert((mark eq null) || (mark eq weakref.get()))
assert(Version.isValid(version))
val wordDescOv = wordDesc.ov
if (equ(wordDescOv, EmcasDescriptorBase.CLEARED)) {
Reference.reachabilityFence(mark)
// we have been finalized (by a helping thread), no reason to continue
EmcasStatus.Break
} else if (!equ(value, wordDescOv)) {
Reference.reachabilityFence(mark)
// Expected value is different:
McasStatus.FailedVal
} else if (version != wordDesc.oldVersion) {
Reference.reachabilityFence(mark)
// The expected value is the same,
// but the expected version isn't:
McasStatus.FailedVal
} else if (desc.getStatusV() != McasStatus.Active) {
Reference.reachabilityFence(mark)
// we have been finalized (by a helping thread), no reason to continue
EmcasStatus.Break
} else {
// before installing our descriptor, make sure a valid mark exists:
val weakRefOk = if (mark eq null) {
assert((weakref eq null) || (weakref.get() eq null))
// there was no old descriptor, or it was already unused;
// we'll need a new mark:
mark = ctx.getReusableMarker()
val weakref2 = ctx.getReusableWeakRef()
assert(weakref2.get() eq mark)
address.unsafeCasMarkerV(weakref, weakref2)
// if this fails, we'll retry, see below
} else {
// we have a valid mark from reading
true
}
// If *right now* (after the CAS), another thread, which started
// reading before we installed a new weakref above, finishes its
// read, and detaches the *previous* descriptor (since we
// haven't installed ours yet, and that one was unused);
// then the following CAS will fail (not a problem), and
// on our next retry, we may see a ref with a value *and*
// a non-empty weakref (but this case is also handled, see above).
if (weakRefOk && address.unsafeCasV(content, wordDesc.castToData)) {
Reference.reachabilityFence(mark)
McasStatus.Successful
} else {
// either we couldn't install the new mark, or
// the CAS on the `Ref` failed; in either case,
// we'll retry:
Reference.reachabilityFence(mark)
tryWord(wordDesc, newSeen)
}
}
} // tryWord
def acquire(words: Array[WdLike[_]], newSeen: Long): Long = {
@tailrec
def go(words: Array[WdLike[_]], next: Int, len: Int, needsValidation: Boolean): Long = {
if (next < len) {
words(next) match {
case null =>
// Another thread already finalized the descriptor,
// and cleaned up this word descriptor (hence the `null`);
// thus, we should not continue:
EmcasStatus.Break
case wd: EmcasWordDesc[_] =>
assert(instRo || (!wd.readOnly))
val twr = tryWord(wd, newSeen)
assert(
(twr == McasStatus.Successful) ||
(twr == McasStatus.FailedVal) ||
(twr == EmcasStatus.Break) ||
(twr == EmcasStatus.CycleDetected)
)
if (twr == McasStatus.Successful) {
go(words, next = next + 1, len = len, needsValidation = needsValidation)
} else {
twr
}
case wd: LogEntry[_] =>
// read-only WD, which we don't
// need to install; continue, but
// we'll need to revalidate later:
assert(wd.readOnly)
go(words, next = next + 1, len = len, needsValidation = true)
}
} else {
if (needsValidation) {
// this is ugly, but we use Active to signify that we'll need to validate:
McasStatus.Active
} else {
McasStatus.Successful
}
}
}
if (words ne null) {
go(words, next = 0, len = words.length, needsValidation = false)
} else {
// Already finalized descriptor, see above
EmcasStatus.Break
}
} // acquire
def validate(words: Array[WdLike[_]], newSeen: Long): Long = {
@tailrec
def go(words: Array[WdLike[_]], next: Int, len: Int): Long = {
if (next < len) {
words(next) match {
case null =>
// already finalized
EmcasStatus.Break
case _: EmcasWordDesc[_] =>
// this WD have been already installed by `acquire`
go(words, next = next + 1, len = len)
case wd: LogEntry[_] =>
assert(wd.readOnly)
// revalidate:
val currVer = this.readVersionInternal(wd.address, ctx, forMCAS = true, seen = newSeen, instRo = false)
if (currVer == wd.oldVersion) {
// OK, continue:
go(words, next = next + 1, len = len)
} else if (currVer == EmcasStatus.CycleDetected) {
EmcasStatus.CycleDetected
} else {
// validation failed:
McasStatus.FailedVal
}
}
} else {
McasStatus.Successful
}
}
assert(!instRo)
if (words ne null) {
go(words, next = 0, len = words.length)
} else {
// already finalized
EmcasStatus.Break
}
} // validate
def getFinalResultFromHelper(): Long = {
val readStatus = desc.getStatusA() // optimistic read
val result = if (readStatus != McasStatus.Active) {
readStatus
} else {
// we don't see it yet, need to force
// (see the long comment below)
desc.cmpxchgStatus(McasStatus.Active, McasStatus.FailedVal)
}
assert(
Version.isValid(result) ||
(result == McasStatus.FailedVal) ||
(result == EmcasStatus.CycleDetected)
)
result
}
var seen2: Long = seen
val r = if (!instRo) {
// Cycle detection: we need this to preserve
// lock-freedom. Because if we have 2 EMCAS like
// this: [(r1, "a", "b"), (r2, "x", "x")]
// and [(r1, "a", "a"), (r2, "x", "y")],
// then both can ACQUIRE, and then when
// revalidating, both would try to help
// recursively themselves. That's an
// infinite loop (or possibly a stack overflow).
BloomFilter64.insertIfAbsent(seen, desc.hashCode) match {
case 0L =>
// We (probably) detected a cycle, need to fall
// back to `instRo = true`. Bloom filter is
// probabilistic, so there is some chance that
// there is no actual cycle; but falling back
// doesn't affect correctness, only performance.
// (Actually, not falling back when needed _would_
// affect correctness.)
// Note: we still have to finalize `desc` with
// the `CycleDetected` result.
EmcasStatus.CycleDetected
case bf =>
// fine, no cycle, we've added `desc` to `seen`
seen2 = bf
acquire(desc.getWordDescArrOrNull(), seen2)
}
} else {
// we're installing every WD, no chance of cycles:
acquire(desc.getWordDescArrOrNull(), seen2)
}
assert(
(r == McasStatus.Successful) ||
(r == McasStatus.FailedVal) ||
(r == EmcasStatus.Break) ||
(r == EmcasStatus.CycleDetected) ||
(r == McasStatus.Active) // means we'll need to validate
)
if (r == EmcasStatus.Break) {
// Someone else finalized the descriptor, we must read its status;
// however, a volatile read is NOT sufficient here, because it
// might not actually see the finalized status. Indeed, it COULD
// return `Active`, which is useless here.
//
// The reason for this problem is the plain mode (non-volatile) writes
// to the array elements in `desc` (when clearing with `null`s).
// Those are not properly synchronized. And a volatile read of status
// would not save us here: volatile-reading the new value written
// by a successful volatile-CAS creates a happens-before relationship
// (JLS 17.4.4.); however, volatile-reading the OLD value does NOT
// create a "happens-after" relationship. So, here, after plain-reading
// a `null` from the array, we could volatile-read `Active` (and this
// actually happens with JCStress).
//
// So, instead of a volatile read, we do a volatile-CAS (with
// `cmpxchgStatus`) which must necessarily fail, but creates the
// necessary ordering constraints to get the actual current status
// (as the witness value of the CAS).
//
// To see why this is correct, there are 2 (possible) cases: this
// `cmpxchgStatus` either reads (1) `Active`, or (2) non-`Active`.
//
// If (1), it reads `Active`, then we atomically write `FailedVal`,
// *and* the finalizing CAS in another thread MUST READ this
// `FailedVal`, as 2 such CAS-es cannot both succeed. Thus, this
// CAS happens-before the finalizing CAS, which happens-before
// writing the `null` to the array. As reading `null` happened-before
// this CAS, reading the `null` happens-before writing it. Which
// is not allowed (JLS 17.4.5.). Thus, case (1) is actually impossible.
//
// So case (2) is what must happen: this `cmpxchgStatus` reads a
// non-`Active` value as the witness value (which is what we need).
//
// (Besides reading `null`, another reason for the `Break` could be
// that we already volatile-read a non-`Active` status. In that case
// the `cmpxchgStatus` will also fail, and we will get the final
// status as the witness. Which is fine.)
getFinalResultFromHelper()
} else {
val r2 = if ((r == McasStatus.Successful) || (r == McasStatus.Active)) {
val needsValidation = (r == McasStatus.Active)
assert((!instRo) || (!needsValidation))
if (!needsValidation) {
// successfully installed every descriptor (ACQUIRE)
// we'll need a new commit-ts, which we will
// CAS into the descriptor:
retrieveFreshTs()
} else {
// successfully installed all read-write
// descriptors (ACQUIRE), but still need to
// validate our read-set (the read-only
// descriptors, which were not installed):
val vr = validate(desc.getWordDescArrOrNull(), newSeen = seen2)
assert(
(vr == McasStatus.Successful) ||
(vr == McasStatus.FailedVal) ||
(vr == EmcasStatus.Break) ||
(vr == EmcasStatus.CycleDetected)
)
if (vr == EmcasStatus.Break) {
// we're already finalized, see the long comment above
EmcasStatus.Break
} else if (vr == McasStatus.Successful) {
// validation succeeded; we'll need a new
// commit-ts, which we will
// CAS into the descriptor:
retrieveFreshTs()
} else if (vr == EmcasStatus.CycleDetected) {
EmcasStatus.CycleDetected
} else {
// validation failed
McasStatus.FailedVal
}
}
} else {
r
}
if (r2 == EmcasStatus.Break) {
// we're already finalized, see the long comment above
getFinalResultFromHelper()
} else {
val finalRes = r2
assert(
Version.isValid(finalRes) ||
(finalRes == McasStatus.FailedVal) ||
(finalRes == EmcasStatus.CycleDetected)
)
val witness: Long = desc.cmpxchgStatus(McasStatus.Active, finalRes)
if (witness == McasStatus.Active) {
// we finalized the descriptor
desc.wasFinalized(finalRes)
if (Consts.statsEnabled) {
ctx.recordEmcasFinalizedO()
if (finalRes == EmcasStatus.CycleDetected) {
// TODO: Note: Our Bloom filter `seen2` isn't necessarily
// TODO: correct here, since it could be that it wasn't this
// TODO: op who detected the cycle, but we could have detected
// TODO: it during helping. This is not a big problem, since
// TODO: the Bloom filter size is just for statistical/informational
// TODO: purposes. (We could fix this, if we somehow got back the
// TODO: filter from helping. But we only get back a `Long`, which
// TODO: is `CycleDetected` in this case.)
// We finalized `desc` with a cycle, so record it for stats:
ctx.recordCycleDetected(BloomFilter64.estimatedSize(seen2))
}
}
finalRes
} else {
// someone else already finalized the descriptor, we return its status:
assert(
Version.isValid(witness) ||
(witness == McasStatus.FailedVal) ||
(witness == EmcasStatus.CycleDetected)
)
witness
}
}
}
} // MCAS
/**
* Retrieves the (possibly new) version, which
* will (tentatively) be the commit-version of
* the current op.
*
* (Note: if we later fail to CAS this version
* into the status of the descriptor, then it
* might have a different commit-version. This
* can happen if another helper thread wins the
* race and finalizes the op before us.)
*
* This method retrieves a "fresh" version number
* (commit-ts) by possibly incrementing the global
* version. However, it doesn't necessarily needs
* to *actually* increment it. It is enough if it
* *observes* an increment of the version. The
* act of incrementing could be performed by another
* thread (even by a completely unrelated op).
*
* This is safe, because this method runs *after*
* all word descriptors in the current op were
* successfully installed into the refs. This means
* that any concurrent read of any of the refs
* would help finalize *us* before doing anything
* with the ref. (In effect, it is as if we've
* "acquired" the refs. Of course, since we're
* lock-free, this will not block anybody, but
* they will help us first.)
*
* This mechanism is the one which allows unrelated
* (i.e., non-conflicting, disjoint) ops to *share*
* a commit-version. Unrelated ops can do this
* safely, because (as explained above) by the
* time the version incrementing happens, the refs
* are already "acquired". Related (i.e., conflicting)
* ops will not share a version number (that would
* be unsafe). This is guaranteed, because
* installing 2 conflicting word descriptors into
* one ref is not possible (the loser helps the
* winner to finalize, then retries).
*/
private[this] final def retrieveFreshTs(): Long = {
val ts1 = global.getCommitTs()
val ts2 = global.getCommitTs()
if (ts1 != ts2) {
// we've observed someone else changing the version:
assert(ts2 > ts1)
ts2
} else {
// we try to increment it:
val candidate = ts1 + Version.Incr
assert(Version.isValid(candidate)) // detect version overflow
val ctsWitness = global.cmpxchgCommitTs(ts1, candidate) // TODO: could this be `getAndAdd`? is it faster?
if (ctsWitness == ts1) {
// ok, successful CAS:
candidate
} else {
// failed CAS, but this means that someone else incremented it:
assert(ctsWitness > ts1)
ctsWitness
}
}
}
final override def currentContext(): Mcas.ThreadContext =
global.currentContextInternal()
final override def isCurrentContext(ctx: Mcas.ThreadContext): Boolean = {
if (ctx.isInstanceOf[EmcasThreadContext]) {
val etc = ctx.asInstanceOf[EmcasThreadContext]
etc.isCurrentContext()
} else {
false
}
}
private[choam] final override def isThreadSafe =
true
private[mcas] final def tryPerformInternal(desc: AbstractDescriptor, ctx: EmcasThreadContext, optimism: Long): Long = {
tryPerformDebug(desc = desc, ctx = ctx, optimism = optimism)
}
private[mcas] final def tryPerformDebug(desc: AbstractDescriptor, ctx: EmcasThreadContext, optimism: Long): Long = {
if (desc.nonEmpty) {
assert(!desc.readOnly)
val instRo = (optimism.toInt : @switch) match {
case 0 => true
case 1 => false
case _ => throw new IllegalArgumentException
}
val fullDesc = new EmcasDescriptor(desc, instRo = instRo)
if (fullDesc.getWordsP() ne null) {
val res = MCAS(desc = fullDesc, ctx = ctx, seen = 0L)
if (EmcasStatus.isSuccessful(res)) {
// `Emcas` stores a version in the descriptor,
// to signify success; however, here we return
// a constant, to follow the `Mcas` API:
McasStatus.Successful
} else if (res == EmcasStatus.CycleDetected) {
assert(!instRo)
// we detected a (possible) cycle, so
// we'll fall back to the method which
// is certainly lock free (always installing
// every WD, even the read-only ones):
val fallback = fullDesc.fallback
assert(fallback.instRo)
val fbRes = MCAS(fallback, ctx = ctx, seen = 0L)
if (EmcasStatus.isSuccessful(fbRes)) {
McasStatus.Successful
} else {
// now we can't get CycleDetected for sure
assert(fbRes == McasStatus.FailedVal)
// but we signal, that previously there WAS a cycle:
Version.Reserved
}
} else {
assert(res == McasStatus.FailedVal)
McasStatus.FailedVal
}
} else {
// The `readOnly` status of the `AbstractDescriptor`
// is only an approximation; if every non-read-only
// HWD "becomes" read-only, `desc.readOnly` could still
// be false. We detect this when copying the HAMT
// into an array, and return a `null` array. This
// happened here; since the descriptor is read-only,
// and we validated every read, we're done (i.e.,
// this is a read-only reaction, we just didn't
// realize it until now).
McasStatus.Successful
}
} else {
McasStatus.Successful
}
}
/** Only for testing! */
@throws[InterruptedException]
private[emcas] final def spinUntilCleanup[A](ref: MemoryLocation[A], max: Long = Long.MaxValue): A = {
val ctx = this.currentContextInternal()
var ctr: Long = 0L
while (ctr < max) {
ref.unsafeGetV() match {
case wd: EmcasWordDesc[_] =>
if (wd.parent.getStatusV() == McasStatus.Active) {
// CAS in progress, retry
} else {
// CAS finalized, but no cleanup yet, read and retry
readDirect(ref, ctx = ctx) : Unit
}
case a =>
// descriptor have been cleaned up:
return a // scalafix:ok
}
Thread.onSpinWait()
ctr += 1L
if ((ctr % 128L) == 0L) {
if (Thread.interrupted()) {
throw new InterruptedException
} else {
if ((ctr % 1024L) == 0L) {
if ((ctr % 0x100000L) == 0L) {
// the GC is really not doing what we
// want it to, so we do a new allocation,
// maybe that causes the GC to run:
val arr = new java.util.concurrent.atomic.AtomicLongArray(8192)
arr.compareAndExchange(4321, 0L, 42L)
} else {
System.gc()
}
} else {
Thread.sleep(32L)
}
}
}
}
nullOf[A]
}
// JMX MBean for stats:
if (Consts.statsEnabled) {
val oName = new javax.management.ObjectName(
f"${GlobalContextBase.emcasJmxStatsNamePrefix}%s-${System.identityHashCode(this)}%08x"
)
java.lang.management.ManagementFactory.getPlatformMBeanServer().registerMBean(
new EmcasJmxStats(this),
oName,
)
// TODO: we never unregister this...
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy