org.apache.jackrabbit.oak.plugins.segment.InMemoryCompactionMap Maven / Gradle / Ivy

Go to download
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.plugins.segment;

import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.collect.Maps.newTreeMap;
import static com.google.common.collect.Sets.newTreeSet;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.decode;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.encode;

import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;

import javax.annotation.Nonnull;

/**
 * Immutable, space-optimized mapping of compacted record identifiers.
 * Used to optimize record equality comparisons across a compaction operation
 * without holding references to the {@link SegmentId} instances of the old,
 * compacted segments.
 * 
 * The data structure used by this class consists of four parts:
 * 

 *   The {@link #recent} map of recently compacted entries is maintained
 *       while the compaction is in progress and new entries need to be added.
 *       These entries are periodically compressed into the more
 *       memory-efficient structure described below.
 *   
The {@link #msbs} and {@link #lsbs} arrays store the identifiers
 *       of all old, compacted segments. The identifiers are stored in
 *       increasing order, with the i'th identifier stored in the
 *       {@code msbs[i]} and {@code lsbs[i]} slots. Interpolation search
 *       is used to quickly locate any given identifier.
 *   
Each compacted segment identifier is associated with a list of
 *       mapping entries that point from a record offset within that
 *       segment to the new identifier of the compacted record. The
 *       {@link #entryIndex} array is used to to locate these lists within
 *       the larger entry arrays described below. The list of entries for
 *       the i'th identifier consists of entries from {@code entryIndex[i]}
 *       (inclusive) to {@code entryIndex[i+1]} (exclusive). An extra
 *       sentinel slot is added at the end of the array to make the above
 *       rule work also for the last compacted segment identifier.
 *   
The mapping entries are stored in the {@link #beforeOffsets},
 *       {@link #afterSegmentIds} and {@link #afterOffsets} arrays. Once the
 *       list of entries for a given compacted segment is found, the
 *       before record offsets are scanned to find a match. If a match is
 *       found, the corresponding compacted record will be identified by the
 *       respective after segment identifier and offset.
 * 
 * 
 * Assuming each compacted segment contains {@code n} compacted records on
 * average, the amortized size of each entry in this mapping is about
 * {@code 20/n + 8} bytes, assuming compressed pointers.
 */
public class InMemoryCompactionMap implements PartialCompactionMap {

    /**
     * Number of map entries to keep until compressing this map.
     */
    private static final int COMPRESS_INTERVAL = Integer.getInteger("compress-interval", 100000);

    private final SegmentTracker tracker;

    private Map recent = newHashMap();

    private long[] msbs = new long[0];
    private long[] lsbs = new long[0];
    private short[] beforeOffsets = new short[0];

    private int[] entryIndex = new int[0];
    private short[] afterOffsets = new short[0];

    private int[] afterSegmentIds = new int[0];
    private long[] afterMsbs = new long[0];
    private long[] afterLsbs = new long[0];

    InMemoryCompactionMap(@Nonnull SegmentTracker tracker) {
        this.tracker = tracker;
    }

    @Override
    public boolean wasCompactedTo(@Nonnull RecordId before, @Nonnull RecordId after) {
        return after.equals(get(before));
    }

    @Override
    public boolean wasCompacted(@Nonnull UUID id) {
        return findEntry(id.getMostSignificantBits(), id.getLeastSignificantBits()) != -1;
    }

    @Override
    public RecordId get(@Nonnull RecordId before) {
        RecordId after = recent.get(before);
        if (after != null) {
            return after;
        }

        //empty map
        if (msbs.length == 0) {
            return null;
        }

        SegmentId segmentId = before.getSegmentId();
        long msb = segmentId.getMostSignificantBits();
        long lsb = segmentId.getLeastSignificantBits();
        int offset = before.getOffset();

        int entry = findEntry(msb, lsb);
        if (entry != -1) {
            int index = entryIndex[entry];
            int limit = entryIndex[entry + 1];
            for (int i = index; i < limit; i++) {
                int o = decode(beforeOffsets[i]);
                if (o == offset) {
                    // found it!
                    return new RecordId(asSegmentId(i), decode(afterOffsets[i]));
                } else if (o > offset) {
                    return null;
                }
            }
        }

        return null;
    }

    @Nonnull
    private SegmentId asSegmentId(int index) {
        int idx = afterSegmentIds[index];
        return new SegmentId(tracker, afterMsbs[idx], afterLsbs[idx]);
    }

    @Nonnull
    private static UUID asUUID(SegmentId id) {
        return new UUID(id.getMostSignificantBits(),
                id.getLeastSignificantBits());
    }

    @Override
    public void put(@Nonnull RecordId before, @Nonnull RecordId after) {
        if (get(before) != null) {
            throw new IllegalArgumentException();
        }
        recent.put(before, after);
        if (recent.size() >= COMPRESS_INTERVAL) {
            compress();
        }
    }

    @Override
    public void remove(@Nonnull Set uuids) {
        compress(uuids);
    }

    @Override
    public void compress() {
        compress(Collections.emptySet());
    }

    @Override
    public long getSegmentCount() {
        return msbs.length;
    }

    @Override
    public long getRecordCount() {
        return afterOffsets.length;
    }

    @Override
    public boolean isEmpty() {
        return afterOffsets.length == 0 && recent.isEmpty();
    }

    private void compress(@Nonnull Set removed) {
        if (recent.isEmpty() && removed.isEmpty()) {
            // no-op
            return;
        }

        Set uuids = newTreeSet();
        int newSize = 0;
        Map> mapping = newTreeMap();
        for (Entry entry : recent.entrySet()) {
            RecordId before = entry.getKey();

            SegmentId id = before.getSegmentId();
            UUID uuid = new UUID(
                    id.getMostSignificantBits(),
                    id.getLeastSignificantBits());
            if (uuids.add(uuid) && !removed.contains(uuid)) {
                newSize++;
            }

            Map map = mapping.get(uuid);
            if (map == null) {
                map = newTreeMap();
                mapping.put(uuid, map);
            }
            map.put(before.getOffset(), entry.getValue());
        }

        for (int i = 0; i < msbs.length; i++) {
            UUID uuid = new UUID(msbs[i], lsbs[i]);
            if (uuids.add(uuid) && !removed.contains(uuid)) {
                newSize++;
            }
        }

        long[] newMsbs = new long[newSize];
        long[] newLsbs = new long[newSize];
        int[] newEntryIndex = new int[newSize + 1];

        int newEntries = beforeOffsets.length + recent.size();
        short[] newBeforeOffsets = new short[newEntries];
        short[] newAfterOffsets = new short[newEntries];

        int[] newAfterSegmentIds = new int[newEntries];
        Map newAfterSegments = newHashMap();

        int newIndex = 0;
        int newEntry = 0;
        int oldEntry = 0;
        for (UUID uuid : uuids) {
            long msb = uuid.getMostSignificantBits();
            long lsb = uuid.getLeastSignificantBits();

            if (removed.contains(uuid)) {
                if (oldEntry < msbs.length
                        && msbs[oldEntry] == msb
                        && lsbs[oldEntry] == lsb) {
                    oldEntry++;
                }
                continue;
            }

            // offset -> record
            Map newSegment = mapping.get(uuid);
            if (newSegment == null) {
                newSegment = newTreeMap();
            }

            if (oldEntry < msbs.length
                    && msbs[oldEntry] == msb
                    && lsbs[oldEntry] == lsb) {
                int index = entryIndex[oldEntry];
                int limit = entryIndex[oldEntry + 1];
                for (int i = index; i < limit; i++) {
                    newSegment.put(decode(beforeOffsets[i]), new RecordId(
                            asSegmentId(i), decode(afterOffsets[i])));
                }
                oldEntry++;
            }

            newMsbs[newEntry] = msb;
            newLsbs[newEntry] = lsb;
            newEntryIndex[newEntry++] = newIndex;
            for (Entry entry : newSegment.entrySet()) {
                int key = entry.getKey();
                RecordId id = entry.getValue();
                newBeforeOffsets[newIndex] = encode(key);
                newAfterOffsets[newIndex] = encode(id.getOffset());

                UUID aUUID = asUUID(id.getSegmentId());
                int aSIdx;
                if (newAfterSegments.containsKey(aUUID)) {
                    aSIdx = newAfterSegments.get(aUUID);
                } else {
                    aSIdx = newAfterSegments.size();
                    newAfterSegments.put(aUUID, aSIdx);
                }
                newAfterSegmentIds[newIndex] = aSIdx;

                newIndex++;
            }
        }

        newEntryIndex[newEntry] = newIndex;

        this.msbs = newMsbs;
        this.lsbs = newLsbs;
        this.entryIndex = newEntryIndex;

        if (newIndex < newBeforeOffsets.length) {
            this.beforeOffsets = Arrays.copyOf(newBeforeOffsets, newIndex);
            this.afterOffsets = Arrays.copyOf(newAfterOffsets, newIndex);
            this.afterSegmentIds = Arrays.copyOf(newAfterSegmentIds, newIndex);
        } else {
            this.beforeOffsets = newBeforeOffsets;
            this.afterOffsets = newAfterOffsets;
            this.afterSegmentIds = newAfterSegmentIds;
        }

        this.afterMsbs = new long[newAfterSegments.size()];
        this.afterLsbs = new long[newAfterSegments.size()];
        for (Entry entry : newAfterSegments.entrySet()) {
            this.afterMsbs[entry.getValue()] = entry.getKey()
                    .getMostSignificantBits();
            this.afterLsbs[entry.getValue()] = entry.getKey()
                    .getLeastSignificantBits();
        }

        recent = newHashMap();
    }

    /**
     * Finds the given segment identifier (UUID) within the list of
     * identifiers of compacted segments tracked by this instance.
     * Since the UUIDs are randomly generated and we keep the list
     * sorted, we can use interpolation search to achieve
     * {@code O(log log n)} lookup performance.
     *
     * @param msb most significant bits of the UUID
     * @param lsb least significant bits of the UUID
     * @return entry index, or {@code -1} if not found
     */
    private final int findEntry(long msb, long lsb) {
        int lowIndex = 0;
        int highIndex = msbs.length - 1;

        // Use floats to prevent integer overflow during interpolation.
        // Lost accuracy is no problem, since we use interpolation only
        // as a guess of where the target value is located and the actual
        // comparisons are still done using the original values.
        float lowValue = Long.MIN_VALUE;
        float highValue = Long.MAX_VALUE;
        float targetValue = msb;

        while (lowIndex <= highIndex) {
            int guessIndex = lowIndex;
            float valueRange = highValue - lowValue;
            if (valueRange >= 1) { // no point in interpolating further
                // Math.round() also prevents IndexOutOfBoundsExceptions
                // caused by possible inaccuracy in the float computations.
                guessIndex += Math.round(
                        (highIndex - lowIndex) * (targetValue - lowValue)
                        / valueRange);
            }

            long m = msbs[guessIndex];
            if (msb < m) {
                highIndex = guessIndex - 1;
                highValue = m;
            } else if (msb > m) {
                lowIndex = guessIndex + 1;
                lowValue = m;
            } else {
                // getting close...
                long l = lsbs[guessIndex];
                if (lsb < l) {
                    highIndex = guessIndex - 1;
                    highValue = m;
                } else if (lsb > l) {
                    highIndex = guessIndex + 1;
                    highValue = m;
                } else {
                    // found it!
                    return guessIndex;
                }
            }
        }

        // not found
        return -1;
    }

    @Override
    public long getEstimatedWeight() {
        // estimation of the object including empty 'recent' map
        long total = 168;

        // msbs
        total += 24 + msbs.length * 8;
        // lsbs
        total += 24 + lsbs.length * 8;
        // beforeOffsets
        total += 24 + beforeOffsets.length * 2;

        // entryIndex
        total += 24 + entryIndex.length * 4;
        // afterOffsets
        total += 24 + afterOffsets.length * 2;

        // afterSegmentIds
        total += 24 + afterSegmentIds.length * 4;
        // afterMsbs
        total += 24 + afterMsbs.length * 8;
        // afterLsbs
        total += 24 + afterLsbs.length * 8;

        return total;
    }

}