All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.segment.file.CompactionGainEstimate Maven / Gradle / Ivy

There is a newer version: 1.74.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.segment.file;

import static org.apache.jackrabbit.oak.api.Type.BINARIES;
import static org.apache.jackrabbit.oak.commons.IOUtils.humanReadableByteCount;

import java.io.File;
import java.util.UUID;

import com.google.common.base.Supplier;
import com.google.common.hash.BloomFilter;
import com.google.common.hash.Funnel;
import com.google.common.hash.PrimitiveSink;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.segment.RecordIdSet;
import org.apache.jackrabbit.oak.segment.SegmentBlob;
import org.apache.jackrabbit.oak.segment.SegmentId;
import org.apache.jackrabbit.oak.segment.SegmentNodeState;
import org.apache.jackrabbit.oak.segment.SegmentPropertyState;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;

class CompactionGainEstimate implements TarEntryVisitor, GCEstimation {

    private static final Funnel UUID_FUNNEL = new Funnel() {
        @Override
        public void funnel(UUID from, PrimitiveSink into) {
            into.putLong(from.getMostSignificantBits());
            into.putLong(from.getLeastSignificantBits());
        }
    };

    private final BloomFilter uuids;

    private final int gainThreshold;

    private long totalSize = 0;

    private long reachableSize = 0;

    private boolean gcNeeded;

    private String gcInfo = "unknown";

    private boolean finished = false;

    /**
     * Create a new instance of gain estimator. The estimation process can be stopped
     * by switching the supplier {@code stop} to {@code true}, in which case the returned
     * estimates are undefined.
     *
     * @param node  root node state
     * @param estimatedBulkCount
     * @param stop  stop signal
     */
    CompactionGainEstimate(SegmentNodeState node, int estimatedBulkCount,
            Supplier stop, int gainThreshold) {
        uuids = BloomFilter.create(UUID_FUNNEL, estimatedBulkCount);
        this.gainThreshold = gainThreshold;
        collectReferencedSegments(node, new RecordIdSet(), stop);
    }

    private void collectReferencedSegments(SegmentNodeState node, RecordIdSet visited, Supplier stop) {
        if (!stop.get() && visited.addIfNotPresent(node.getRecordId())) {
            collectUUID(node.getRecordId().getSegmentId());
            for (PropertyState property : node.getProperties()) {
                if (property instanceof SegmentPropertyState) {
                    collectUUID(((SegmentPropertyState) property)
                            .getRecordId().getSegmentId());
                }

                // Get the underlying value as stream so we can collect
                // the segments ids involved in storing the value.
                // This works as primitives are stored as strings and strings
                // as binaries of their UTF-8 encoding.
                for (Blob blob : property.getValue(BINARIES)) {
                    for (SegmentId id : SegmentBlob.getBulkSegmentIds(blob)) {
                        collectUUID(id);
                    }
                }
            }
            for (ChildNodeEntry child : node.getChildNodeEntries()) {
                collectReferencedSegments((SegmentNodeState) child.getNodeState(),
                        visited, stop);
            }
        }
    }

    private void collectUUID(SegmentId segmentId) {
        uuids.put(new UUID(
            segmentId.getMostSignificantBits(),
            segmentId.getLeastSignificantBits()));
    }

    /**
     * Returns a percentage estimate (scale 0-100) for how much disk space
     * running compaction (and cleanup) could potentially release.
     *
     * @return percentage of disk space that could be freed with compaction
     */
    public long estimateCompactionGain() {
        if (totalSize == 0) {
            return 0;
        }
        return 100 * (totalSize - reachableSize) / totalSize;
    }

    private void run() {
        if (finished) {
            return;
        }
        long gain = estimateCompactionGain();
        gcNeeded = gain >= gainThreshold;
        if (gcNeeded) {
            gcInfo = String
                    .format("Gain is %s%% or %s/%s (%s/%s bytes), so running compaction",
                            gain, humanReadableByteCount(reachableSize),
                            humanReadableByteCount(totalSize),
                            reachableSize, totalSize);
        } else {
            if (totalSize == 0) {
                gcInfo = "Skipping compaction for now as repository consists of a single tar file only";
            } else {
                gcInfo = String
                        .format("Gain is %s%% or %s/%s (%s/%s bytes), so skipping compaction for now",
                                gain,
                                humanReadableByteCount(reachableSize),
                                humanReadableByteCount(totalSize),
                                reachableSize, totalSize);
            }
        }
        finished = true;
    }

    @Override
    public boolean gcNeeded() {
        if (!finished) {
            run();
        }
        return gcNeeded;
    }

    @Override
    public String gcLog() {
        if (!finished) {
            run();
        }
        return gcInfo;
    }

    // ---------------------------------------------------< TarEntryVisitor >--

    @Override
    public void visit(long msb, long lsb, File file, int offset, int size) {
        UUID uuid = new UUID(msb, lsb);
        int entrySize = TarReader.getEntrySize(size);
        totalSize += entrySize;
        if (uuids.mightContain(uuid)) {
            reachableSize += entrySize;
        }
    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy