Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.plugins.document;
import java.io.Closeable;
import java.io.IOException;
import java.util.EnumSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import com.google.common.base.Function;
import com.google.common.base.Joiner;
import com.google.common.base.Predicate;
import com.google.common.base.Stopwatch;
import com.google.common.collect.Iterators;
import com.google.common.collect.Maps;
import com.google.common.collect.Sets;
import org.apache.jackrabbit.oak.commons.sort.StringSort;
import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Condition;
import org.apache.jackrabbit.oak.plugins.document.UpdateOp.Key;
import org.apache.jackrabbit.oak.plugins.document.util.Utils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.StandardSystemProperty.LINE_SEPARATOR;
import static com.google.common.collect.Iterables.all;
import static com.google.common.collect.Iterators.partition;
import static com.google.common.util.concurrent.Atomics.newReference;
import static java.util.Collections.singletonMap;
import static org.apache.jackrabbit.oak.plugins.document.Collection.NODES;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.MODIFIED_IN_SECS;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.COMMIT_ROOT_ONLY;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.SplitDocType.DEFAULT_LEAF;
import static org.apache.jackrabbit.oak.plugins.document.UpdateOp.Condition.newEqualsCondition;
public class VersionGarbageCollector {
//Kept less than MongoDocumentStore.IN_CLAUSE_BATCH_SIZE to avoid re-partitioning
private static final int DELETE_BATCH_SIZE = 450;
private static final int PROGRESS_BATCH_SIZE = 10000;
private static final Key KEY_MODIFIED = new Key(MODIFIED_IN_SECS, null);
private final DocumentNodeStore nodeStore;
private final DocumentStore ds;
private final VersionGCSupport versionStore;
private int overflowToDiskThreshold = 100000;
private final AtomicReference collector = newReference();
private static final Logger log = LoggerFactory.getLogger(VersionGarbageCollector.class);
/**
* Split document types which can be safely garbage collected
*/
private static final Set GC_TYPES = EnumSet.of(
DEFAULT_LEAF, COMMIT_ROOT_ONLY);
VersionGarbageCollector(DocumentNodeStore nodeStore,
VersionGCSupport gcSupport) {
this.nodeStore = nodeStore;
this.versionStore = gcSupport;
this.ds = nodeStore.getDocumentStore();
}
public VersionGCStats gc(long maxRevisionAge, TimeUnit unit) throws IOException {
long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge);
GCJob job = new GCJob(maxRevisionAgeInMillis);
if (collector.compareAndSet(null, job)) {
try {
return job.run();
} finally {
collector.set(null);
}
} else {
throw new IOException("Revision garbage collection is already running");
}
}
public void cancel() {
GCJob job = collector.get();
if (job != null) {
job.cancel();
}
}
public void setOverflowToDiskThreshold(int overflowToDiskThreshold) {
this.overflowToDiskThreshold = overflowToDiskThreshold;
}
public static class VersionGCStats {
boolean ignoredGCDueToCheckPoint;
boolean canceled;
int deletedDocGCCount;
int splitDocGCCount;
int intermediateSplitDocGCCount;
final Stopwatch collectDeletedDocs = Stopwatch.createUnstarted();
final Stopwatch deleteDeletedDocs = Stopwatch.createUnstarted();
final Stopwatch collectAndDeleteSplitDocs = Stopwatch.createUnstarted();
@Override
public String toString() {
return "VersionGCStats{" +
"ignoredGCDueToCheckPoint=" + ignoredGCDueToCheckPoint +
", canceled=" + canceled+
", deletedDocGCCount=" + deletedDocGCCount +
", splitDocGCCount=" + splitDocGCCount +
", intermediateSplitDocGCCount=" + intermediateSplitDocGCCount +
", timeToCollectDeletedDocs=" + collectDeletedDocs +
", timeTakenToDeleteDeletedDocs=" + deleteDeletedDocs +
", timeTakenToCollectAndDeleteSplitDocs=" + collectAndDeleteSplitDocs +
'}';
}
}
private class GCJob {
private final long maxRevisionAgeMillis;
private AtomicBoolean cancel = new AtomicBoolean();
GCJob(long maxRevisionAgeMillis) {
this.maxRevisionAgeMillis = maxRevisionAgeMillis;
}
VersionGCStats run() throws IOException {
return gc(maxRevisionAgeMillis);
}
void cancel() {
log.info("Canceling revision garbage collection.");
cancel.set(true);
}
private VersionGCStats gc(long maxRevisionAgeInMillis) throws IOException {
Stopwatch sw = Stopwatch.createStarted();
VersionGCStats stats = new VersionGCStats();
final long oldestRevTimeStamp = nodeStore.getClock().getTime() - maxRevisionAgeInMillis;
final RevisionVector headRevision = nodeStore.getHeadRevision();
log.info("Starting revision garbage collection. Revisions older than [{}] will be " +
"removed", Utils.timestampToString(oldestRevTimeStamp));
//Check for any registered checkpoint which prevent the GC from running
Revision checkpoint = nodeStore.getCheckpoints().getOldestRevisionToKeep();
if (checkpoint != null && checkpoint.getTimestamp() < oldestRevTimeStamp) {
log.info("Ignoring revision garbage collection because a valid " +
"checkpoint [{}] was found, which is older than [{}].",
checkpoint.toReadableString(),
Utils.timestampToString(oldestRevTimeStamp)
);
stats.ignoredGCDueToCheckPoint = true;
return stats;
}
collectDeletedDocuments(stats, headRevision, oldestRevTimeStamp);
collectSplitDocuments(stats, oldestRevTimeStamp);
sw.stop();
stats.canceled = cancel.get();
log.info("Revision garbage collection finished in {}. {}", sw, stats);
return stats;
}
private void collectSplitDocuments(VersionGCStats stats, long oldestRevTimeStamp) {
stats.collectAndDeleteSplitDocs.start();
versionStore.deleteSplitDocuments(GC_TYPES, oldestRevTimeStamp, stats);
stats.collectAndDeleteSplitDocs.stop();
}
private void collectDeletedDocuments(VersionGCStats stats,
RevisionVector headRevision,
long oldestRevTimeStamp)
throws IOException {
int docsTraversed = 0;
DeletedDocsGC gc = new DeletedDocsGC(headRevision, cancel);
try {
stats.collectDeletedDocs.start();
Iterable itr = versionStore.getPossiblyDeletedDocs(oldestRevTimeStamp);
try {
for (NodeDocument doc : itr) {
// continue with GC?
if (cancel.get()) {
break;
}
// Check if node is actually deleted at current revision
// As node is not modified since oldestRevTimeStamp then
// this node has not be revived again in past maxRevisionAge
// So deleting it is safe
docsTraversed++;
if (docsTraversed % PROGRESS_BATCH_SIZE == 0){
log.info("Iterated through {} documents so far. {} found to be deleted",
docsTraversed, gc.getNumDocuments());
}
gc.possiblyDeleted(doc);
}
} finally {
Utils.closeIfCloseable(itr);
}
stats.collectDeletedDocs.stop();
if (gc.getNumDocuments() == 0){
return;
}
stats.deleteDeletedDocs.start();
gc.removeDocuments(stats);
stats.deleteDeletedDocs.stop();
} finally {
gc.close();
}
}
}
/**
* A helper class to remove document for deleted nodes.
*/
private class DeletedDocsGC implements Closeable {
private final RevisionVector headRevision;
private final AtomicBoolean cancel;
private final StringSort docIdsToDelete = newStringSort();
private final StringSort prevDocIdsToDelete = newStringSort();
private final Set exclude = Sets.newHashSet();
private boolean sorted = false;
public DeletedDocsGC(@Nonnull RevisionVector headRevision,
@Nonnull AtomicBoolean cancel) {
this.headRevision = checkNotNull(headRevision);
this.cancel = checkNotNull(cancel);
}
/**
* @return the number of documents gathers so far that have been
* identified as garbage via {@link #possiblyDeleted(NodeDocument)}.
* This number does not include the previous documents.
*/
long getNumDocuments() {
return docIdsToDelete.getSize();
}
/**
* Informs the GC that the given document is possibly deleted. The
* implementation will check if the node still exists at the head
* revision passed to the constructor to this GC. The implementation
* will keep track of documents representing deleted nodes and remove
* them together with associated previous document
*
* @param doc the candidate document.
*/
void possiblyDeleted(NodeDocument doc)
throws IOException {
// construct an id that also contains
// the _modified time of the document
String id = doc.getId() + "/" + doc.getModified();
// check if id is valid
try {
Utils.getDepthFromId(id);
} catch (IllegalArgumentException e) {
log.warn("Invalid GC id {} for document {}", id, doc);
return;
}
if (doc.getNodeAtRevision(nodeStore, headRevision, null) == null) {
addDocument(id);
// Collect id of all previous docs also
addPreviousDocuments(previousDocIdsFor(doc));
}
}
/**
* Removes the documents that have been identified as garbage. This
* also includes previous documents. This method will only remove
* documents that have not been modified since they were passed to
* {@link #possiblyDeleted(NodeDocument)}.
*
* @param stats to track the number of removed documents.
*/
void removeDocuments(VersionGCStats stats) throws IOException {
stats.deletedDocGCCount += removeDeletedDocuments();
// FIXME: this is incorrect because that method also removes intermediate docs
stats.splitDocGCCount += removeDeletedPreviousDocuments();
}
public void close() {
try {
docIdsToDelete.close();
} catch (IOException e) {
log.warn("Failed to close docIdsToDelete", e);
}
try {
prevDocIdsToDelete.close();
} catch (IOException e) {
log.warn("Failed to close prevDocIdsToDelete", e);
}
}
//------------------------------< internal >----------------------------
private Iterator previousDocIdsFor(NodeDocument doc) {
Map prevRanges = doc.getPreviousRanges(true);
if (prevRanges.isEmpty()) {
return Iterators.emptyIterator();
} else if (all(prevRanges.values(), FIRST_LEVEL)) {
// all previous document ids can be constructed from the
// previous ranges map. this works for first level previous
// documents only.
final String path = doc.getPath();
return Iterators.transform(prevRanges.entrySet().iterator(),
new Function, String>() {
@Override
public String apply(Map.Entry input) {
int h = input.getValue().getHeight();
return Utils.getPreviousIdFor(path, input.getKey(), h);
}
});
} else {
// need to fetch the previous documents to get their ids
return Iterators.transform(doc.getAllPreviousDocs(),
new Function() {
@Override
public String apply(NodeDocument input) {
return input.getId();
}
});
}
}
private void addDocument(String id) throws IOException {
docIdsToDelete.add(id);
}
private long getNumPreviousDocuments() {
return prevDocIdsToDelete.getSize() - exclude.size();
}
private void addPreviousDocuments(Iterator ids) throws IOException {
while (ids.hasNext()) {
prevDocIdsToDelete.add(ids.next());
}
}
private Iterator getDocIdsToDelete() throws IOException {
ensureSorted();
return docIdsToDelete.getIds();
}
private void concurrentModification(NodeDocument doc) {
Iterator it = doc.getAllPreviousDocs();
while (it.hasNext()) {
exclude.add(it.next().getId());
}
}
private Iterator getPrevDocIdsToDelete() throws IOException {
ensureSorted();
return Iterators.filter(prevDocIdsToDelete.getIds(),
new Predicate() {
@Override
public boolean apply(String input) {
return !exclude.contains(input);
}
});
}
private int removeDeletedDocuments() throws IOException {
Iterator docIdsToDelete = getDocIdsToDelete();
log.info("Proceeding to delete [{}] documents", getNumDocuments());
Iterator> idListItr = partition(docIdsToDelete, DELETE_BATCH_SIZE);
int deletedCount = 0;
int lastLoggedCount = 0;
int recreatedCount = 0;
while (idListItr.hasNext() && !cancel.get()) {
Map> deletionBatch = Maps.newLinkedHashMap();
for (String s : idListItr.next()) {
int idx = s.lastIndexOf('/');
String id = s.substring(0, idx);
long modified = -1;
try {
modified = Long.parseLong(s.substring(idx + 1));
} catch (NumberFormatException e) {
log.warn("Invalid _modified {} for {}", s.substring(idx + 1), id);
}
deletionBatch.put(id, singletonMap(KEY_MODIFIED, newEqualsCondition(modified)));
}
if (log.isDebugEnabled()) {
StringBuilder sb = new StringBuilder("Performing batch deletion of documents with following ids. \n");
Joiner.on(LINE_SEPARATOR.value()).appendTo(sb, deletionBatch.keySet());
log.debug(sb.toString());
}
int nRemoved = ds.remove(NODES, deletionBatch);
if (nRemoved < deletionBatch.size()) {
// some nodes were re-created while GC was running
// find the document that still exist
for (String id : deletionBatch.keySet()) {
NodeDocument d = ds.find(NODES, id);
if (d != null) {
concurrentModification(d);
}
}
recreatedCount += (deletionBatch.size() - nRemoved);
}
deletedCount += nRemoved;
log.debug("Deleted [{}] documents so far", deletedCount);
if (deletedCount + recreatedCount - lastLoggedCount >= PROGRESS_BATCH_SIZE){
lastLoggedCount = deletedCount + recreatedCount;
double progress = lastLoggedCount * 1.0 / getNumDocuments() * 100;
String msg = String.format("Deleted %d (%1.2f%%) documents so far", deletedCount, progress);
log.info(msg);
}
}
return deletedCount;
}
private int removeDeletedPreviousDocuments() throws IOException {
log.info("Proceeding to delete [{}] previous documents", getNumPreviousDocuments());
int deletedCount = 0;
int lastLoggedCount = 0;
Iterator> idListItr =
partition(getPrevDocIdsToDelete(), DELETE_BATCH_SIZE);
while (idListItr.hasNext() && !cancel.get()) {
List deletionBatch = idListItr.next();
deletedCount += deletionBatch.size();
if (log.isDebugEnabled()) {
StringBuilder sb = new StringBuilder("Performing batch deletion of previous documents with following ids. \n");
Joiner.on(LINE_SEPARATOR.value()).appendTo(sb, deletionBatch);
log.debug(sb.toString());
}
ds.remove(NODES, deletionBatch);
log.debug("Deleted [{}] previous documents so far", deletedCount);
if (deletedCount - lastLoggedCount >= PROGRESS_BATCH_SIZE){
lastLoggedCount = deletedCount;
double progress = deletedCount * 1.0 / (prevDocIdsToDelete.getSize() - exclude.size()) * 100;
String msg = String.format("Deleted %d (%1.2f%%) previous documents so far", deletedCount, progress);
log.info(msg);
}
}
return deletedCount;
}
private void ensureSorted() throws IOException {
if (!sorted) {
docIdsToDelete.sort();
prevDocIdsToDelete.sort();
sorted = true;
}
}
}
@Nonnull
private StringSort newStringSort() {
return new StringSort(overflowToDiskThreshold,
NodeDocumentIdComparator.INSTANCE);
}
private static final Predicate FIRST_LEVEL = new Predicate() {
@Override
public boolean apply(@Nullable Range input) {
return input != null && input.height == 0;
}
};
}