org.basex.query.up.atomic.AtomicUpdateCache Maven / Gradle / Ivy
package org.basex.query.up.atomic;
import java.util.*;
import org.basex.data.*;
import org.basex.util.*;
import org.basex.util.hash.*;
/**
* Implementation of the Atomic Update Cache (AUC).
*
* A container/list for atomic updates. Updates must be added from the lowest to
* the highest PRE value (regarding the location of the update). Updates are finally
* applied by this container from the highest to the lowest PRE value (reverse document
* order) to support efficient structural bulk updates etc.
*
* If a collection of updates is carried out via the AUC there are several
* benefits:
*
*
* - Efficient distance adjustments after structural changes.
* - Tree-Aware Updates (TAU): identification of superfluous updates (like updating
* the descendants of a deleted node).
* - Resolution of text node adjacency.
* - Merging of atomic updates to reduce number of I/Os.
*
*
* To avoid ambiguity it is not allowed to add:
*
* - more than one destructive update like {@link Delete} or {@link Replace} operating
* on the same node.
* - more than one {@link Rename} or {@link UpdateValue} operating
* on the same node.
* - sequences like [delete X, insert N at X]: This sequence would be carried out back
* to front: first the insert, then the delete. This would lead to the inserted node N
* being deleted by the 'delete X' statement. The correct order for this sequence would
* be [insert N at X, delete X].
* - and so forth ... see check() function for details.
*
*
* Updates are added in a streaming fashion where the most recently added update is
* remembered. This avoids additional traversals of the AUC during consistency checks and
* further optimizations.
*
* @author BaseX Team 2005-22, BSD License
* @author Lukas Kircher
*/
public final class AtomicUpdateCache {
/** List of structural updates (nodes are inserted to / deleted from the table). */
private final List structUpdates = new ArrayList<>(1);
/** Value / non-structural updates like rename. */
private final List valueUpdates = new ArrayList<>(1);
/** Most recently added update buffer. Used to merge/discard updates and to detect
* inconsistencies on-the-fly eliminating the need to traverse all updates. */
private BasicUpdate recent;
/** Most recently added structural atomic update - if there is any. Used to calculate accumulated
* pre value shifts on-the-fly, as {@link BasicUpdate} don't carry this information. */
private BasicUpdate recentStruct;
/** Target data reference. */
public final Data data;
/**
* Constructor.
* @param data target data reference
*/
public AtomicUpdateCache(final Data data) {
this.data = data;
}
/**
* Adds a DELETE atomic to the list.
* @param pre PRE value of the target node/update location
*/
public void addDelete(final int pre) {
considerAtomic(Delete.getInstance(data, pre), false);
}
/**
* Adds an INSERT atomic to the list.
* @param pre PRE value of the target node/update location
* @param par new parent of the inserted nodes
* @param clip insertion sequence data clip
*/
public void addInsert(final int pre, final int par, final DataClip clip) {
considerAtomic(clip.data.kind(clip.start) == Data.ATTR
? InsertAttr.getInstance(pre, par, clip) : Insert.getInstance(pre, par, clip), false);
}
/**
* Adds a REPLACE atomic to the list.
* @param pre PRE value of the target node/update location
* @param clip insertion sequence data clip
*/
public void addReplace(final int pre, final DataClip clip) {
considerAtomic(Replace.getInstance(data, pre, clip), false);
}
/**
* Adds a RENAME atomic to the list.
* @param pre PRE value of the target node/update location
* @param name new name for the target node
* @param uri new uri for the target node
*/
public void addRename(final int pre, final byte[] name, final byte[] uri) {
considerAtomic(Rename.getInstance(data, pre, name, uri), false);
}
/**
* Adds an UPDATEVALUE atomic to the list.
* @param pre PRE value of the target node/update location
* @param value new value for the target node
*/
public void addUpdateValue(final int pre, final byte[] value) {
considerAtomic(UpdateValue.getInstance(data, pre, value), false);
}
/**
* Resets the list.
*/
public void clear() {
structUpdates.clear();
valueUpdates.clear();
recent = null;
recentStruct = null;
}
/**
* Adds an update to the corresponding list.
* @param candidate atomic update
* @param slack skip consistency checks etc. if {@code true} (used during text node merging)
*/
private void considerAtomic(final BasicUpdate candidate, final boolean slack) {
// fill the one-atomic-update buffer
if(recent == null) {
recent = candidate;
if(recent instanceof StructuralUpdate) recentStruct = candidate;
return;
}
if(candidate instanceof StructuralUpdate && recentStruct != null) {
((StructuralUpdate) candidate).accumulatedShifts += recentStruct.accumulatedShifts();
}
// prepare & optimize incoming update
if(slack) {
add(candidate, false);
} else {
check(recent, candidate);
if(treeAwareUpdates(recent, candidate)) return;
final BasicUpdate m = recent.merge(data, candidate);
if(m != null) add(m, true);
else add(candidate, false);
}
}
/**
* Adds the given update to the updates/buffer depending on the type and whether it's
* been merged or not.
*
* @param update update
* @param merged if true, the given update has been merged w/ the recent one
*/
private void add(final BasicUpdate update, final boolean merged) {
if(update == null) return;
if(!merged) {
if(recent instanceof StructuralUpdate) structUpdates.add((StructuralUpdate) recent);
else valueUpdates.add(recent);
}
recent = update;
if(update instanceof StructuralUpdate) recentStruct = update;
}
/**
* Flushes the buffer that contains the most previously added atomic update.
*/
private void flush() {
if(recent != null) {
add(recent, false);
recent = null;
recentStruct = null;
}
}
/**
* Returns the number of structural updates.
* @return number of structural updates
*/
public int updatesSize() {
flush();
return structUpdates.size() + valueUpdates.size();
}
/**
* Checks the given sequence of two updates for violations.
*
* Updates must be ordered strictly from the lowest to the highest PRE value.
* Deletes must follow inserts.
*
* A single node must not be affected by more than one {@link Rename},
* {@link UpdateValue} operation.
*
* A single node must not be affected by more than one destructive operation. These
* operations include {@link Replace}, {@link Delete}.
*
* @param bu1 first update in sequence
* @param bu2 second update in sequence
*/
private static void check(final BasicUpdate bu1, final BasicUpdate bu2) {
// check order of location PRE, must be strictly ordered low-to-high
if(bu2.location < bu1.location)
throw Util.notExpected("Invalid order at location " + bu1.location);
if(bu2.location == bu1.location) {
// check invalid sequence of {@link Delete}, {@link Insert}
// - the inserted node would directly be deleted without this restriction
if(bu2 instanceof Insert || bu2 instanceof InsertAttr)
if(bu1 instanceof Delete)
throw Util.notExpected("Invalid sequence of delete, insert at location " + bu1.location);
else if(bu1 instanceof Replace)
throw Util.notExpected("Invalid sequence of replace, insert at location " + bu1.location);
// check multiple {@link Delete}, {@link Replace}
if(bu2.destructive() && bu1.destructive())
throw Util.notExpected("Multiple deletes/replaces on node " + bu1.location);
// check multiple {@link Rename}
if(bu2 instanceof Rename && bu1 instanceof Rename)
throw Util.notExpected("Multiple renames on node " + bu1.location);
// check multiple {@link UpdateValue}
if(bu2 instanceof UpdateValue && bu1 instanceof UpdateValue)
throw Util.notExpected("Multiple updates on node " + bu1.location);
/* Check invalid order of destructive/non-destructive updates to support TAU
* cases like: : node X would be deleted and then X+1 renamed,
* as this shifts down to X.
*/
if(bu2.destructive() && !(bu1 instanceof StructuralUpdate))
throw Util.notExpected("Invalid sequence of value update and destructive update at" +
" location " + bu1.location);
}
}
/**
* Checks if the second update is superfluous. An update is considered to be superfluous
* if it targets a position in the subtree of a to-be-removed node.
* @param bu1 first update in sequence
* @param bu2 second update in sequence
* @return true if second update superfluous
*/
private boolean treeAwareUpdates(final BasicUpdate bu1, final BasicUpdate bu2) {
if(bu1.destructive()) {
// we determine the lowest and highest PRE values of a superfluous update
final int pre = bu1.location;
final int fol = pre + data.size(pre, data.kind(pre));
/* CASE 1: candidate operates on the subtree of T and appends a node to the end of
* the subtree (target PRE may be equal)...
* CASE 2: operates within subtree of T */
return bu2.location <= fol && (bu2 instanceof Insert || bu2 instanceof InsertAttr) &&
bu2.parent >= pre && bu2.parent < fol || bu2.location < fol;
}
return false;
}
/**
* Executes the updates. Resolving text node adjacency can be skipped if adjacent text
* nodes are not to be expected.
* @param mergeTexts adjacent text nodes are to be expected and must be merged
*/
public void execute(final boolean mergeTexts) {
data.updateDists = false;
applyUpdates();
adjustDistances();
if(mergeTexts) resolveTextAdjacency();
data.updateDists = true;
clear();
}
/**
* Carries out structural updates.
*/
public void applyUpdates() {
// check if previous update still in buffer
flush();
// value updates applied front-to-back, doens't matter as there are no row shifts
for(final BasicUpdate update : valueUpdates) update.apply(data);
// structural updates are applied back-to-front
for(int i = structUpdates.size() - 1; i >= 0; i--) structUpdates.get(i).apply(data);
}
/**
* Adjusts distances to restore parent-child relationships that have been invalidated
* by structural updates.
*
* Each structural update (insert/delete) leads to a shift of higher PRE values. This
* invalidates parent-child relationships. Distances are only adjusted after all
* structural updates have been carried out to make sure each node (that has to be
* updated) is only touched once.
*/
private void adjustDistances() {
// check if any distance has changed at all
boolean shifts = false;
for(final StructuralUpdate update : structUpdates) {
if(update.accumulatedShifts != 0) {
shifts = true;
break;
}
}
if(!shifts) return;
final IntSet updatedNodes = new IntSet();
for(final StructuralUpdate update : structUpdates) {
/* Update distance for the affected node and all following siblings of nodes
* on the ancestor-or-self axis. */
int pre = update.preOfAffectedNode + update.accumulatedShifts;
while(pre < data.meta.size && !updatedNodes.contains(pre)) {
final int kind = data.kind(pre);
data.dist(pre, kind, calculateNewDistance(pre, kind));
updatedNodes.add(pre);
pre += data.size(pre, kind);
}
}
}
/**
* Calculates the new distance value for the given node after updates have been applied.
* @param pre the new PRE value of the node after structural updates have been applied
* @param kind the KIND value
* @return new distance for the given PRE node
*/
private int calculateNewDistance(final int pre, final int kind) {
int distanceBefore = data.dist(pre, kind);
final int preBefore = calculatePreValue(pre, true);
// document distances are not stored in table but calculated on the fly (always pre+1)
if(kind == Data.DOC) distanceBefore = preBefore + 1;
final int parentBefore = preBefore - distanceBefore;
final int parentAfter = calculatePreValue(parentBefore, false);
return pre - parentAfter;
}
/**
* Calculates the PRE value of a given node before/after updates.
*
* Finds all updates that affect the given node N. The result is than calculated based
* on N and the accumulated PRE value shifts introduced by these updates.
*
* If a node has been inserted at position X and this method is used to calculate the
* PRE value of X before updates, X is the result. As the node at position X has not
* existed before the insertion, its PRE value is unchanged. If in contrast the PRE
* value is calculated after updates, the result is X+1, as the node with the original
* position X has been shifted by the insertion at position X.
*
* Make sure accumulated shifts have been calculated before calling this method!
*
* @param pre PRE value
* @param beforeUpdates calculate PRE value before shifts/updates have been applied
* @return index of update, or -1
*/
public int calculatePreValue(final int pre, final boolean beforeUpdates) {
// find update that affects the given PRE value
int i = find(pre, beforeUpdates);
// given PRE not changed by updates
if(i == -1) return pre;
// refine the search to determine accumulated shifts for the given PRE
i = refine(structUpdates, i, beforeUpdates);
final int acm = structUpdates.get(i).accumulatedShifts;
return beforeUpdates ? pre - acm : pre + acm;
}
/**
* Used to find the update that holds the accumulated shift value that is needed to
* recalculate the given PRE value. In a low-to-high ordered list this is the right-most
* update with a target PRE value smaller or equal the given PRE value, v.v.
*
* Finds the position of the update that affects the given PRE value P.
* If there are multiple updates whose affected PRE value equals P, the search
* has to be further refined as this method returns only the first match.
* @param pre given PRE value
* @param beforeUpdates compare based on PRE values before/after updates
* @return index of update
*/
private int find(final int pre, final boolean beforeUpdates) {
int left = 0;
int right = structUpdates.size() - 1;
while(left <= right) {
if(left == right) {
if(recalculate(structUpdates, left, beforeUpdates) <= pre) return left;
return -1;
}
if(right - left == 1) {
if(recalculate(structUpdates, right, beforeUpdates) <= pre) return right;
if(recalculate(structUpdates, left, beforeUpdates) <= pre) return left;
return -1;
}
final int middle = left + right >>> 1;
final int value = recalculate(structUpdates, middle, beforeUpdates);
if(value == pre) return middle;
else if(value > pre) right = middle - 1;
else left = middle;
}
// empty array
return -1;
}
/**
* Finds the update with the lowest index in the given list that affects the same
* PRE value as the update with the given index.
* @param updates list of updates
* @param index of update
* @param beforeUpdates find update for PRE values before updates have been applied
* @return update with the highest index that invalidates the distance of the given
* node
*/
private static int refine(final List updates, final int index,
final boolean beforeUpdates) {
int u = index;
final int value = recalculate(updates, u++, beforeUpdates);
final int us = updates.size();
while(u < us && recalculate(updates, u, beforeUpdates) == value) u++;
return u - 1;
}
/**
* Recalculates the PRE value of the first node whose distance is affected by the
* given update.
* @param updates list of updates
* @param index index of the update
* @param beforeUpdates calculate PRE value before or after updates
* @return PRE value
*/
private static int recalculate(final List updates, final int index,
final boolean beforeUpdates) {
final StructuralUpdate u = updates.get(index);
return u.preOfAffectedNode + (beforeUpdates ? u.accumulatedShifts : 0);
}
/**
* Resolves unwanted text node adjacency which can result from structural changes in
* the database. Adjacent text nodes are two text nodes A and B, where
* PRE(B)=PRE(A)+1 and PARENT(A)=PARENT(B).
*/
private void resolveTextAdjacency() {
// Text node merges are also gathered on a separate list to leverage optimizations.
final List deletes = new LinkedList<>();
// keep track of the visited locations to avoid superfluous checks
int smallestVisited = Integer.MAX_VALUE;
// Text nodes have to be merged from the highest to the lowest pre value
for(int i = structUpdates.size() - 1; i >= 0; i--) {
final StructuralUpdate u = structUpdates.get(i);
final DataClip insseq = u.getInsertionData();
// calculate the new location of the update, here we have to check for adjacency
final int newLocation = u.location + u.accumulatedShifts - u.shifts;
final int beforeNewLocation = newLocation - 1;
// check surroundings of this location for adjacent text nodes depending on the
// kind of update, first the one with higher PRE values (due to shifts!)
// ... for insert/replace ...
if(insseq != null) {
// calculate the current following node
final int followingNode = newLocation + insseq.size();
final int beforeFollowingNode = followingNode - 1;
// check the nodes at the end of/after the insertion sequence
if(beforeFollowingNode < smallestVisited) {
final Delete del = mergeTextNodes(beforeFollowingNode);
if(del != null) deletes.add(0, del);
smallestVisited = beforeFollowingNode;
}
}
// check nodes for delete and for insert before the updated location
if(beforeNewLocation < smallestVisited) {
final Delete del = mergeTextNodes(beforeNewLocation);
if(del != null) deletes.add(0, del);
smallestVisited = beforeNewLocation;
}
}
final AtomicUpdateCache auc = new AtomicUpdateCache(data);
for(final Delete delete : deletes) auc.considerAtomic(delete, true);
deletes.clear();
auc.applyUpdates();
auc.adjustDistances();
auc.clear();
}
/**
* Returns atomic text node merging operations if necessary for the given node PRE and
* its right neighbor PRE+1.
* @param pre node PRE value
* @return list of text merging operations or {@code null}
*/
private Delete mergeTextNodes(final int pre) {
final int s = data.meta.size;
final int b = pre + 1;
// don't leave table
if(pre >= s || b >= s || pre < 0 || b < 0) return null;
// only merge texts and neighboring texts
if(data.kind(pre) != Data.TEXT || data.kind(b) != Data.TEXT ||
data.parent(pre, Data.TEXT) != data.parent(b, Data.TEXT)) return null;
// apply text node updates on the fly and throw them away
UpdateValue.getInstance(data, pre, Token.concat(data.text(pre, true),
data.text(b, true))).
apply(data);
// deletes must be cached to add them front-to-back to atomic update list
return Delete.getInstance(data, b);
}
}