All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.document.JournalGarbageCollector Maven / Gradle / Ivy

There is a newer version: 1.64.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.plugins.document;

import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Stopwatch;

/**
 * The JournalGarbageCollector can clean up JournalEntries that are older than a
 * particular age.
 * 

* It would typically be invoked in conjunction with the VersionGarbageCollector * but must not be confused with that one - 'journal' refers to the separate * collection that contains changed paths per background writes used for * observation. */ public class JournalGarbageCollector { //copied from VersionGarbageCollector: private static final int DELETE_BATCH_SIZE = 450; private final DocumentStore ds; private static final Logger log = LoggerFactory.getLogger(JournalGarbageCollector.class); public JournalGarbageCollector(DocumentNodeStore nodeStore) { this.ds = nodeStore.getDocumentStore(); } /** * Deletes entries in the journal that are older than the given * maxRevisionAge. * * @param maxRevisionAge entries older than this age will be removed * @param unit the timeunit for maxRevisionAge * @return the number of entries that have been removed */ public int gc(long maxRevisionAge, TimeUnit unit) { long maxRevisionAgeInMillis = unit.toMillis(maxRevisionAge); if (log.isDebugEnabled()) { log.debug("gc: Journal garbage collection starts with maxAge: {} min.", TimeUnit.MILLISECONDS.toMinutes(maxRevisionAgeInMillis)); } Stopwatch sw = Stopwatch.createStarted(); // the journal has ids of the following format: // 1-0000014db9aaf710-00000001 // whereas the first number is the cluster node id. // now, this format prevents from doing a generic // query to get all 'old' entries, as the documentstore // can only query for a sequential list of entries. // (and the cluster node id here partitions the set // of entries that we have to delete) // To account for that, we simply iterate over all // cluster node ids and clean them up individually. // Note that there are possible alternatives, such // as: let each node clean up its own old entries // but the chosen path is also quite simple: it can // be started on any instance - but best on only one. // if it's run on multiple concurrently, then they // will compete at deletion, which is not optimal // due to performance, but does not harm. // 1. get the list of cluster node ids final List clusterNodeInfos = ClusterNodeInfoDocument.all(ds); int numDeleted = 0; for (ClusterNodeInfoDocument clusterNodeInfoDocument : clusterNodeInfos) { // current algorithm is to simply look at all cluster nodes // irrespective of whether they are active or inactive etc. // this could be optimized for inactive ones: at some point, all // journal entries of inactive ones would have been cleaned up // and at that point we could stop including those long-time-inactive ones. // that 'long time' aspect would have to be tracked though, to be sure // we don't leave garbage. // so simpler is to quickly do a query even for long-time inactive ones final int clusterNodeId = clusterNodeInfoDocument.getClusterId(); // 2. iterate over that list and do a query with // a limit of 'batch size' boolean branch = false; long startPointer = 0; while (true) { String fromKey = JournalEntry.asId(new Revision(startPointer, 0, clusterNodeId, branch)); String toKey = JournalEntry.asId(new Revision(System.currentTimeMillis() - maxRevisionAgeInMillis, Integer.MAX_VALUE, clusterNodeId, branch)); int limit = DELETE_BATCH_SIZE; List deletionBatch = ds.query(Collection.JOURNAL, fromKey, toKey, limit); if (deletionBatch.size() > 0) { ds.remove(Collection.JOURNAL, asKeys(deletionBatch)); numDeleted += deletionBatch.size(); } if (deletionBatch.size() < limit) { if (!branch) { // do the same for branches: // this will start at the beginning again with branch set to true // and eventually finish too startPointer = 0; branch = true; continue; } break; } startPointer = deletionBatch.get(deletionBatch.size() - 1).getRevisionTimestamp(); } } sw.stop(); log.info("gc: Journal garbage collection took {}, deleted {} entries that were older than {} min.", sw, numDeleted, TimeUnit.MILLISECONDS.toMinutes(maxRevisionAgeInMillis)); return numDeleted; } private List asKeys(List deletionBatch) { final List keys = new ArrayList(deletionBatch.size()); for (JournalEntry e : deletionBatch) { keys.add(e.getId()); } return keys; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy