All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.segment.InMemoryCompactionMap Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.plugins.segment;

import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.collect.Maps.newTreeMap;
import static com.google.common.collect.Sets.newTreeSet;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.decode;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.encode;

import java.util.Arrays;
import java.util.Collections;
import java.util.Map;
import java.util.Map.Entry;
import java.util.Set;
import java.util.UUID;

import javax.annotation.Nonnull;

/**
 * Immutable, space-optimized mapping of compacted record identifiers.
 * Used to optimize record equality comparisons across a compaction operation
 * without holding references to the {@link SegmentId} instances of the old,
 * compacted segments.
 * 

* The data structure used by this class consists of four parts: *

    *
  1. The {@link #recent} map of recently compacted entries is maintained * while the compaction is in progress and new entries need to be added. * These entries are periodically compressed into the more * memory-efficient structure described below. *
  2. The {@link #msbs} and {@link #lsbs} arrays store the identifiers * of all old, compacted segments. The identifiers are stored in * increasing order, with the i'th identifier stored in the * {@code msbs[i]} and {@code lsbs[i]} slots. Interpolation search * is used to quickly locate any given identifier. *
  3. Each compacted segment identifier is associated with a list of * mapping entries that point from a record offset within that * segment to the new identifier of the compacted record. The * {@link #entryIndex} array is used to to locate these lists within * the larger entry arrays described below. The list of entries for * the i'th identifier consists of entries from {@code entryIndex[i]} * (inclusive) to {@code entryIndex[i+1]} (exclusive). An extra * sentinel slot is added at the end of the array to make the above * rule work also for the last compacted segment identifier. *
  4. The mapping entries are stored in the {@link #beforeOffsets}, * {@link #afterSegmentIds} and {@link #afterOffsets} arrays. Once the * list of entries for a given compacted segment is found, the * before record offsets are scanned to find a match. If a match is * found, the corresponding compacted record will be identified by the * respective after segment identifier and offset. *
*

* Assuming each compacted segment contains {@code n} compacted records on * average, the amortized size of each entry in this mapping is about * {@code 20/n + 8} bytes, assuming compressed pointers. */ public class InMemoryCompactionMap implements PartialCompactionMap { /** * Number of map entries to keep until compressing this map. */ private static final int COMPRESS_INTERVAL = Integer.getInteger("compress-interval", 100000); private final SegmentTracker tracker; private Map recent = newHashMap(); private long[] msbs = new long[0]; private long[] lsbs = new long[0]; private short[] beforeOffsets = new short[0]; private int[] entryIndex = new int[0]; private short[] afterOffsets = new short[0]; private int[] afterSegmentIds = new int[0]; private long[] afterMsbs = new long[0]; private long[] afterLsbs = new long[0]; InMemoryCompactionMap(@Nonnull SegmentTracker tracker) { this.tracker = tracker; } @Override public boolean wasCompactedTo(@Nonnull RecordId before, @Nonnull RecordId after) { return after.equals(get(before)); } @Override public boolean wasCompacted(@Nonnull UUID id) { return findEntry(id.getMostSignificantBits(), id.getLeastSignificantBits()) != -1; } @Override public RecordId get(@Nonnull RecordId before) { RecordId after = recent.get(before); if (after != null) { return after; } //empty map if (msbs.length == 0) { return null; } SegmentId segmentId = before.getSegmentId(); long msb = segmentId.getMostSignificantBits(); long lsb = segmentId.getLeastSignificantBits(); int offset = before.getOffset(); int entry = findEntry(msb, lsb); if (entry != -1) { int index = entryIndex[entry]; int limit = entryIndex[entry + 1]; for (int i = index; i < limit; i++) { int o = decode(beforeOffsets[i]); if (o == offset) { // found it! return new RecordId(asSegmentId(i), decode(afterOffsets[i])); } else if (o > offset) { return null; } } } return null; } @Nonnull private SegmentId asSegmentId(int index) { int idx = afterSegmentIds[index]; return new SegmentId(tracker, afterMsbs[idx], afterLsbs[idx]); } @Nonnull private static UUID asUUID(SegmentId id) { return new UUID(id.getMostSignificantBits(), id.getLeastSignificantBits()); } @Override public void put(@Nonnull RecordId before, @Nonnull RecordId after) { if (get(before) != null) { throw new IllegalArgumentException(); } recent.put(before, after); if (recent.size() >= COMPRESS_INTERVAL) { compress(); } } @Override public void remove(@Nonnull Set uuids) { compress(uuids); } @Override public void compress() { compress(Collections.emptySet()); } @Override public long getSegmentCount() { return msbs.length; } @Override public long getRecordCount() { return afterOffsets.length; } @Override public boolean isEmpty() { return afterOffsets.length == 0 && recent.isEmpty(); } private void compress(@Nonnull Set removed) { if (recent.isEmpty() && removed.isEmpty()) { // no-op return; } Set uuids = newTreeSet(); int newSize = 0; Map> mapping = newTreeMap(); for (Entry entry : recent.entrySet()) { RecordId before = entry.getKey(); SegmentId id = before.getSegmentId(); UUID uuid = new UUID( id.getMostSignificantBits(), id.getLeastSignificantBits()); if (uuids.add(uuid) && !removed.contains(uuid)) { newSize++; } Map map = mapping.get(uuid); if (map == null) { map = newTreeMap(); mapping.put(uuid, map); } map.put(before.getOffset(), entry.getValue()); } for (int i = 0; i < msbs.length; i++) { UUID uuid = new UUID(msbs[i], lsbs[i]); if (uuids.add(uuid) && !removed.contains(uuid)) { newSize++; } } long[] newMsbs = new long[newSize]; long[] newLsbs = new long[newSize]; int[] newEntryIndex = new int[newSize + 1]; int newEntries = beforeOffsets.length + recent.size(); short[] newBeforeOffsets = new short[newEntries]; short[] newAfterOffsets = new short[newEntries]; int[] newAfterSegmentIds = new int[newEntries]; Map newAfterSegments = newHashMap(); int newIndex = 0; int newEntry = 0; int oldEntry = 0; for (UUID uuid : uuids) { long msb = uuid.getMostSignificantBits(); long lsb = uuid.getLeastSignificantBits(); if (removed.contains(uuid)) { if (oldEntry < msbs.length && msbs[oldEntry] == msb && lsbs[oldEntry] == lsb) { oldEntry++; } continue; } // offset -> record Map newSegment = mapping.get(uuid); if (newSegment == null) { newSegment = newTreeMap(); } if (oldEntry < msbs.length && msbs[oldEntry] == msb && lsbs[oldEntry] == lsb) { int index = entryIndex[oldEntry]; int limit = entryIndex[oldEntry + 1]; for (int i = index; i < limit; i++) { newSegment.put(decode(beforeOffsets[i]), new RecordId( asSegmentId(i), decode(afterOffsets[i]))); } oldEntry++; } newMsbs[newEntry] = msb; newLsbs[newEntry] = lsb; newEntryIndex[newEntry++] = newIndex; for (Entry entry : newSegment.entrySet()) { int key = entry.getKey(); RecordId id = entry.getValue(); newBeforeOffsets[newIndex] = encode(key); newAfterOffsets[newIndex] = encode(id.getOffset()); UUID aUUID = asUUID(id.getSegmentId()); int aSIdx; if (newAfterSegments.containsKey(aUUID)) { aSIdx = newAfterSegments.get(aUUID); } else { aSIdx = newAfterSegments.size(); newAfterSegments.put(aUUID, aSIdx); } newAfterSegmentIds[newIndex] = aSIdx; newIndex++; } } newEntryIndex[newEntry] = newIndex; this.msbs = newMsbs; this.lsbs = newLsbs; this.entryIndex = newEntryIndex; if (newIndex < newBeforeOffsets.length) { this.beforeOffsets = Arrays.copyOf(newBeforeOffsets, newIndex); this.afterOffsets = Arrays.copyOf(newAfterOffsets, newIndex); this.afterSegmentIds = Arrays.copyOf(newAfterSegmentIds, newIndex); } else { this.beforeOffsets = newBeforeOffsets; this.afterOffsets = newAfterOffsets; this.afterSegmentIds = newAfterSegmentIds; } this.afterMsbs = new long[newAfterSegments.size()]; this.afterLsbs = new long[newAfterSegments.size()]; for (Entry entry : newAfterSegments.entrySet()) { this.afterMsbs[entry.getValue()] = entry.getKey() .getMostSignificantBits(); this.afterLsbs[entry.getValue()] = entry.getKey() .getLeastSignificantBits(); } recent = newHashMap(); } /** * Finds the given segment identifier (UUID) within the list of * identifiers of compacted segments tracked by this instance. * Since the UUIDs are randomly generated and we keep the list * sorted, we can use interpolation search to achieve * {@code O(log log n)} lookup performance. * * @param msb most significant bits of the UUID * @param lsb least significant bits of the UUID * @return entry index, or {@code -1} if not found */ private final int findEntry(long msb, long lsb) { int lowIndex = 0; int highIndex = msbs.length - 1; // Use floats to prevent integer overflow during interpolation. // Lost accuracy is no problem, since we use interpolation only // as a guess of where the target value is located and the actual // comparisons are still done using the original values. float lowValue = Long.MIN_VALUE; float highValue = Long.MAX_VALUE; float targetValue = msb; while (lowIndex <= highIndex) { int guessIndex = lowIndex; float valueRange = highValue - lowValue; if (valueRange >= 1) { // no point in interpolating further // Math.round() also prevents IndexOutOfBoundsExceptions // caused by possible inaccuracy in the float computations. guessIndex += Math.round( (highIndex - lowIndex) * (targetValue - lowValue) / valueRange); } long m = msbs[guessIndex]; if (msb < m) { highIndex = guessIndex - 1; highValue = m; } else if (msb > m) { lowIndex = guessIndex + 1; lowValue = m; } else { // getting close... long l = lsbs[guessIndex]; if (lsb < l) { highIndex = guessIndex - 1; highValue = m; } else if (lsb > l) { highIndex = guessIndex + 1; highValue = m; } else { // found it! return guessIndex; } } } // not found return -1; } @Override public long getEstimatedWeight() { // estimation of the object including empty 'recent' map long total = 168; // msbs total += 24 + msbs.length * 8; // lsbs total += 24 + lsbs.length * 8; // beforeOffsets total += 24 + beforeOffsets.length * 2; // entryIndex total += 24 + entryIndex.length * 4; // afterOffsets total += 24 + afterOffsets.length * 2; // afterSegmentIds total += 24 + afterSegmentIds.length * 4; // afterMsbs total += 24 + afterMsbs.length * 8; // afterLsbs total += 24 + afterLsbs.length * 8; return total; } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy