org.apache.jackrabbit.oak.plugins.segment.SegmentTracker Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.jackrabbit.oak.plugins.segment;
import static com.google.common.collect.Lists.newLinkedList;
import static com.google.common.collect.Queues.newArrayDeque;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.collect.Sets.newIdentityHashSet;
import java.security.SecureRandom;
import java.util.LinkedList;
import java.util.Queue;
import java.util.Set;
import java.util.concurrent.atomic.AtomicReference;
import javax.annotation.Nonnull;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* Tracker of references to segment identifiers and segment instances
* that are currently kept in memory.
*/
public class SegmentTracker {
/** Logger instance */
private static final Logger log =
LoggerFactory.getLogger(SegmentTracker.class);
private static final long MSB_MASK = ~(0xfL << 12);
private static final long VERSION = (0x4L << 12);
private static final long LSB_MASK = ~(0xfL << 60);
private static final long DATA = 0xAL << 60;
private static final long BULK = 0xBL << 60;
private static final int MB = 1024 * 1024;
private static final int DEFAULT_MEMORY_CACHE_SIZE = 256;
/**
* The random number source for generating new segment identifiers.
*/
private final SecureRandom random = new SecureRandom();
private final SegmentStore store;
private final SegmentWriter writer;
/**
* Serialized map that contains the link between old record
* identifiers and identifiers of the corresponding records
* after compaction.
*/
private final AtomicReference compactionMap =
new AtomicReference(new CompactionMap(1));
private final long cacheSize;
/**
* Hash table of weak references to segment identifiers that are
* currently being accessed. The size of the table is always a power
* of two, which optimizes the {@link #expand()} operation. The table is
* indexed by the random identifier bits, which guarantees uniform
* distribution of entries. Each table entry is either {@code null}
* (when there are no matching identifiers) or a list of weak references
* to the matching identifiers.
*/
private final SegmentIdTable[] tables = new SegmentIdTable[32];
private final LinkedList segments = newLinkedList();
private long currentSize = 0;
public SegmentTracker(SegmentStore store, int cacheSizeMB) {
for (int i = 0; i < tables.length; i++) {
tables[i] = new SegmentIdTable(this);
}
this.store = store;
this.writer = new SegmentWriter(store, this);
this.cacheSize = cacheSizeMB * MB;
}
public SegmentTracker(SegmentStore store) {
this(store, DEFAULT_MEMORY_CACHE_SIZE);
}
public SegmentWriter getWriter() {
return writer;
}
public SegmentStore getStore() {
return store;
}
Segment getSegment(SegmentId id) {
Segment segment = store.readSegment(id);
setSegment(id, segment);
return segment;
}
void setSegment(SegmentId id, Segment segment) {
// done before synchronization to allow concurrent segment access
// while we update the cache below
id.setSegment(segment);
synchronized (this) {
long size = segment.getCacheSize();
segments.addFirst(segment);
currentSize += size;
log.debug("Added segment {} to tracker cache ({} bytes)",
id, size);
while (currentSize > cacheSize && segments.size() > 1) {
Segment last = segments.removeLast();
SegmentId lastId = last.getSegmentId();
if (last.accessed()) {
segments.addFirst(last);
log.debug("Segment {} was recently used, keeping in cache",
lastId);
} else {
long lastSize = last.getCacheSize();
lastId.setSegment(null);
currentSize -= lastSize;
log.debug("Removed segment {} from tracker cache ({} bytes)",
lastId, lastSize);
}
}
}
}
public void setCompactionMap(CompactionMap compaction) {
compactionMap.set(compaction);
}
@Nonnull
CompactionMap getCompactionMap() {
return compactionMap.get();
}
/**
* Returns all segment identifiers that are currently referenced in memory.
*
* @return referenced segment identifiers
*/
public synchronized Set getReferencedSegmentIds() {
Set ids = newHashSet();
for (SegmentIdTable table : tables) {
table.collectReferencedIds(ids);
}
return ids;
}
/**
* Finds all external blob references that are currently accessible
* in this repository and adds them to the given collector. Useful
* for collecting garbage in an external data store.
*
* Note that this method only collects blob references that are already
* stored in the repository (at the time when this method is called), so
* the garbage collector will need some other mechanism for tracking
* in-memory references and references stored while this method is
* running.
*/
public void collectBlobReferences(ReferenceCollector collector) {
Set processed = newIdentityHashSet();
Queue queue = newArrayDeque(getReferencedSegmentIds());
writer.flush(); // force the current segment to have root record info
while (!queue.isEmpty()) {
SegmentId id = queue.remove();
if (id.isDataSegmentId() && processed.add(id)) {
Segment segment = id.getSegment();
segment.collectBlobReferences(collector);
for (SegmentId refid : segment.getReferencedIds()) {
if (refid.isDataSegmentId() && !processed.contains(refid)) {
queue.add(refid);
}
}
}
}
}
/**
*
* @param msb
* @param lsb
* @return the segment id
*/
public SegmentId getSegmentId(long msb, long lsb) {
int index = ((int) msb) & (tables.length - 1);
return tables[index].getSegmentId(msb, lsb);
}
SegmentId newDataSegmentId() {
return newSegmentId(DATA);
}
SegmentId newBulkSegmentId() {
return newSegmentId(BULK);
}
private SegmentId newSegmentId(long type) {
long msb = (random.nextLong() & MSB_MASK) | VERSION;
long lsb = (random.nextLong() & LSB_MASK) | type;
return getSegmentId(msb, lsb);
}
}