
org.apache.jackrabbit.oak.segment.file.TarReader Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.segment.file;
import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Lists.newArrayListWithCapacity;
import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.collect.Maps.newLinkedHashMap;
import static com.google.common.collect.Maps.newTreeMap;
import static com.google.common.collect.Sets.newHashSet;
import static com.google.common.collect.Sets.newHashSetWithExpectedSize;
import static java.nio.ByteBuffer.wrap;
import static java.util.Collections.singletonList;
import static org.apache.jackrabbit.oak.segment.Segment.REF_COUNT_OFFSET;
import static org.apache.jackrabbit.oak.segment.Segment.getGcGeneration;
import static org.apache.jackrabbit.oak.segment.SegmentId.isDataSegmentId;
import static org.apache.jackrabbit.oak.segment.file.TarWriter.GRAPH_MAGIC;
import java.io.Closeable;
import java.io.File;
import java.io.IOException;
import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.util.Arrays;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.SortedMap;
import java.util.UUID;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import java.util.zip.CRC32;
import javax.annotation.Nonnull;
import com.google.common.base.Predicate;
import org.apache.commons.io.FileUtils;
import org.apache.jackrabbit.oak.plugins.blob.ReferenceCollector;
import org.apache.jackrabbit.oak.segment.SegmentGraph.SegmentGraphVisitor;
import org.apache.jackrabbit.oak.segment.SegmentId;
import org.apache.jackrabbit.oak.segment.SegmentStore;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
class TarReader implements Closeable {
/** Logger instance */
private static final Logger log = LoggerFactory.getLogger(TarReader.class);
private static final Logger GC_LOG = LoggerFactory.getLogger(TarReader.class.getName() + "-GC");
/** Magic byte sequence at the end of the index block. */
private static final int INDEX_MAGIC = TarWriter.INDEX_MAGIC;
/**
* Pattern of the segment entry names. Note the trailing (\\..*)? group
* that's included for compatibility with possible future extensions.
*/
private static final Pattern NAME_PATTERN = Pattern.compile(
"([0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12})"
+ "(\\.([0-9a-f]{8}))?(\\..*)?");
/** The tar file block size. */
private static final int BLOCK_SIZE = TarWriter.BLOCK_SIZE;
static int getEntrySize(int size) {
return BLOCK_SIZE + size + TarWriter.getPaddingSize(size);
}
static TarReader open(File file, boolean memoryMapping) throws IOException {
TarReader reader = openFirstFileWithValidIndex(
singletonList(file), memoryMapping);
if (reader != null) {
return reader;
} else {
throw new IOException("Failed to open tar file " + file);
}
}
/**
* Creates a TarReader instance for reading content from a tar file.
* If there exist multiple generations of the same tar file, they are
* all passed to this method. The latest generation with a valid tar
* index (which is a good indication of general validity of the file)
* is opened and the other generations are removed to clean things up.
* If none of the generations has a valid index, then something must have
* gone wrong and we'll try recover as much content as we can from the
* existing tar generations.
*
* @param files
* @param memoryMapping
* @return
* @throws IOException
*/
static TarReader open(Map files, boolean memoryMapping)
throws IOException {
SortedMap sorted = newTreeMap();
sorted.putAll(files);
List list = newArrayList(sorted.values());
Collections.reverse(list);
TarReader reader = openFirstFileWithValidIndex(list, memoryMapping);
if (reader != null) {
return reader;
}
// no generation has a valid index, so recover as much as we can
log.warn("Could not find a valid tar index in {}, recovering...", list);
LinkedHashMap entries = newLinkedHashMap();
for (File file : sorted.values()) {
collectFileEntries(file, entries, true);
}
// regenerate the first generation based on the recovered data
File file = sorted.values().iterator().next();
generateTarFile(entries, file);
reader = openFirstFileWithValidIndex(singletonList(file), memoryMapping);
if (reader != null) {
return reader;
} else {
throw new IOException("Failed to open recovered tar file " + file);
}
}
static TarReader openRO(Map files, boolean memoryMapping,
boolean recover) throws IOException {
// for readonly store only try the latest generation of a given
// tar file to prevent any rollback or rewrite
File file = files.get(Collections.max(files.keySet()));
TarReader reader = openFirstFileWithValidIndex(singletonList(file),
memoryMapping);
if (reader != null) {
return reader;
}
if (recover) {
log.warn(
"Could not find a valid tar index in {}, recovering read-only",
file);
// collecting the entries (without touching the original file) and
// writing them into an artificial tar file '.ro.bak'
LinkedHashMap entries = newLinkedHashMap();
collectFileEntries(file, entries, false);
file = findAvailGen(file, ".ro.bak");
generateTarFile(entries, file);
reader = openFirstFileWithValidIndex(singletonList(file),
memoryMapping);
if (reader != null) {
return reader;
}
}
throw new IOException("Failed to open tar file " + file);
}
/**
* Collects all entries from the given file and optionally backs-up the
* file, by renaming it to a ".bak" extension
*
* @param file
* @param entries
* @param backup
* @throws IOException
*/
private static void collectFileEntries(File file,
LinkedHashMap entries, boolean backup)
throws IOException {
log.info("Recovering segments from tar file {}", file);
try {
RandomAccessFile access = new RandomAccessFile(file, "r");
try {
recoverEntries(file, access, entries);
} finally {
access.close();
}
} catch (IOException e) {
log.warn("Could not read tar file {}, skipping...", file, e);
}
if (backup) {
backupSafely(file);
}
}
/**
* Regenerates a tar file from a list of entries.
*
* @param entries
* @param file
* @throws IOException
*/
private static void generateTarFile(LinkedHashMap entries,
File file) throws IOException {
log.info("Regenerating tar file {}", file);
TarWriter writer = new TarWriter(file);
for (Map.Entry entry : entries.entrySet()) {
UUID uuid = entry.getKey();
byte[] data = entry.getValue();
int generation = getGcGeneration(wrap(data), uuid);
writer.writeEntry(
uuid.getMostSignificantBits(),
uuid.getLeastSignificantBits(),
data, 0, data.length, generation);
}
writer.close();
}
/**
* Backup this tar file for manual inspection. Something went
* wrong earlier so we want to prevent the data from being
* accidentally removed or overwritten.
*
* @param file
* @throws IOException
*/
private static void backupSafely(File file) throws IOException {
File backup = findAvailGen(file, ".bak");
log.info("Backing up {} to {}", file, backup.getName());
if (!file.renameTo(backup)) {
log.warn("Renaming failed, so using copy to backup {}", file);
FileUtils.copyFile(file, backup);
if (!file.delete()) {
throw new IOException(
"Could not remove broken tar file " + file);
}
}
}
/**
* Fine next available generation number so that a generated file doesn't
* overwrite another existing file.
*
* @param file
*/
private static File findAvailGen(File file, String ext) {
File parent = file.getParentFile();
String name = file.getName();
File backup = new File(parent, name + ext);
for (int i = 2; backup.exists(); i++) {
backup = new File(parent, name + "." + i + ext);
}
return backup;
}
private static TarReader openFirstFileWithValidIndex(List files, boolean memoryMapping) {
for (File file : files) {
String name = file.getName();
try {
RandomAccessFile access = new RandomAccessFile(file, "r");
try {
ByteBuffer index = loadAndValidateIndex(access, name);
if (index == null) {
log.info("No index found in tar file {}, skipping...", name);
} else {
// found a file with a valid index, drop the others
for (File other : files) {
if (other != file) {
log.info("Removing unused tar file {}", other.getName());
other.delete();
}
}
if (memoryMapping) {
try {
FileAccess mapped = new FileAccess.Mapped(access);
// re-read the index, now with memory mapping
int indexSize = index.remaining();
index = mapped.read(
mapped.length() - indexSize - 16 - 1024,
indexSize);
return new TarReader(file, mapped, index);
} catch (IOException e) {
log.warn("Failed to mmap tar file {}. Falling back to normal file " +
"IO, which will negatively impact repository performance. " +
"This problem may have been caused by restrictions on the " +
"amount of virtual memory available to the JVM. Please make " +
"sure that a 64-bit JVM is being used and that the process " +
"has access to unlimited virtual memory (ulimit option -v).",
name, e);
}
}
FileAccess random = new FileAccess.Random(access);
// prevent the finally block from closing the file
// as the returned TarReader will take care of that
access = null;
return new TarReader(file, random, index);
}
} finally {
if (access != null) {
access.close();
}
}
} catch (IOException e) {
log.warn("Could not read tar file {}, skipping...", name, e);
}
}
return null;
}
/**
* Tries to read an existing index from the given tar file. The index is
* returned if it is found and looks valid (correct checksum, passes
* sanity checks).
*
* @param file tar file
* @param name name of the tar file, for logging purposes
* @return tar index, or {@code null} if not found or not valid
* @throws IOException if the tar file could not be read
*/
private static ByteBuffer loadAndValidateIndex(
RandomAccessFile file, String name)
throws IOException {
long length = file.length();
if (length % BLOCK_SIZE != 0
|| length < 6 * BLOCK_SIZE
|| length > Integer.MAX_VALUE) {
log.warn("Unexpected size {} of tar file {}", length, name);
return null; // unexpected file size
}
// read the index metadata just before the two final zero blocks
ByteBuffer meta = ByteBuffer.allocate(16);
file.seek(length - 2 * BLOCK_SIZE - 16);
file.readFully(meta.array());
int crc32 = meta.getInt();
int count = meta.getInt();
int bytes = meta.getInt();
int magic = meta.getInt();
if (magic != INDEX_MAGIC) {
return null; // magic byte mismatch
}
if (count < 1 || bytes < count * TarEntry.SIZE + 16 || bytes % BLOCK_SIZE != 0) {
log.warn("Invalid index metadata in tar file {}", name);
return null; // impossible entry and/or byte counts
}
// this involves seeking backwards in the file, which might not
// perform well, but that's OK since we only do this once per file
ByteBuffer index = ByteBuffer.allocate(count * TarEntry.SIZE);
file.seek(length - 2 * BLOCK_SIZE - 16 - count * TarEntry.SIZE);
file.readFully(index.array());
index.mark();
CRC32 checksum = new CRC32();
long limit = length - 2 * BLOCK_SIZE - bytes - BLOCK_SIZE;
long lastmsb = Long.MIN_VALUE;
long lastlsb = Long.MIN_VALUE;
byte[] entry = new byte[TarEntry.SIZE];
for (int i = 0; i < count; i++) {
index.get(entry);
checksum.update(entry);
ByteBuffer buffer = wrap(entry);
long msb = buffer.getLong();
long lsb = buffer.getLong();
int offset = buffer.getInt();
int size = buffer.getInt();
if (lastmsb > msb || (lastmsb == msb && lastlsb > lsb)) {
log.warn("Incorrect index ordering in tar file {}", name);
return null;
} else if (lastmsb == msb && lastlsb == lsb && i > 0) {
log.warn("Duplicate index entry in tar file {}", name);
return null;
} else if (offset < 0 || offset % BLOCK_SIZE != 0) {
log.warn("Invalid index entry offset in tar file {}", name);
return null;
} else if (size < 1 || offset + size > limit) {
log.warn("Invalid index entry size in tar file {}", name);
return null;
}
lastmsb = msb;
lastlsb = lsb;
}
if (crc32 != (int) checksum.getValue()) {
log.warn("Invalid index checksum in tar file {}", name);
return null; // checksum mismatch
}
index.reset();
return index;
}
/**
* Scans through the tar file, looking for all segment entries.
*
* @throws IOException if the tar file could not be read
*/
private static void recoverEntries(
File file, RandomAccessFile access,
LinkedHashMap entries) throws IOException {
byte[] header = new byte[BLOCK_SIZE];
while (access.getFilePointer() + BLOCK_SIZE <= access.length()) {
// read the tar header block
access.readFully(header);
// compute the header checksum
int sum = 0;
for (int i = 0; i < BLOCK_SIZE; i++) {
sum += header[i] & 0xff;
}
// identify possible zero block
if (sum == 0 && access.getFilePointer() + 2 * BLOCK_SIZE == access.length()) {
return; // found the zero blocks at the end of the file
}
// replace the actual stored checksum with spaces for comparison
for (int i = 148; i < 148 + 8; i++) {
sum -= header[i] & 0xff;
sum += ' ';
}
byte[] checkbytes = String.format("%06o\0 ", sum).getBytes(UTF_8);
for (int i = 0; i < checkbytes.length; i++) {
if (checkbytes[i] != header[148 + i]) {
log.warn("Invalid entry checksum at offset {} in tar file {}, skipping...",
access.getFilePointer() - BLOCK_SIZE, file);
}
}
// The header checksum passes, so read the entry name and size
ByteBuffer buffer = wrap(header);
String name = readString(buffer, 100);
buffer.position(124);
int size = readNumber(buffer, 12);
if (access.getFilePointer() + size > access.length()) {
// checksum was correct, so the size field should be accurate
log.warn("Partial entry {} in tar file {}, ignoring...", name, file);
return;
}
Matcher matcher = NAME_PATTERN.matcher(name);
if (matcher.matches()) {
UUID id = UUID.fromString(matcher.group(1));
String checksum = matcher.group(3);
if (checksum != null || !entries.containsKey(id)) {
byte[] data = new byte[size];
access.readFully(data);
// skip possible padding to stay at block boundaries
long position = access.getFilePointer();
long remainder = position % BLOCK_SIZE;
if (remainder != 0) {
access.seek(position + (BLOCK_SIZE - remainder));
}
if (checksum != null) {
CRC32 crc = new CRC32();
crc.update(data);
if (crc.getValue() != Long.parseLong(checksum, 16)) {
log.warn("Checksum mismatch in entry {} of tar file {}, skipping...",
name, file);
continue;
}
}
entries.put(id, data);
}
} else if (!name.equals(file.getName() + ".idx")) {
log.warn("Unexpected entry {} in tar file {}, skipping...",
name, file);
long position = access.getFilePointer() + size;
long remainder = position % BLOCK_SIZE;
if (remainder != 0) {
position += BLOCK_SIZE - remainder;
}
access.seek(position);
}
}
}
private final File file;
private final FileAccess access;
private final ByteBuffer index;
private volatile boolean closed;
private volatile boolean hasGraph;
private TarReader(File file, FileAccess access, ByteBuffer index) {
this.file = file;
this.access = access;
this.index = index;
}
long size() {
return file.length();
}
/**
* Returns the number of segments in this tar file.
*
* @return number of segments
*/
int count() {
return index.capacity() / TarEntry.SIZE;
}
/**
* Iterates over all entries in this tar file and calls
* {@link TarEntryVisitor#visit(long, long, File, int, int)} on them.
*
* @param visitor entry visitor
*/
void accept(TarEntryVisitor visitor) {
int position = index.position();
while (position < index.limit()) {
visitor.visit(
index.getLong(position),
index.getLong(position + 8),
file,
index.getInt(position + 16),
index.getInt(position + 20));
position += TarEntry.SIZE;
}
}
Set getUUIDs() {
Set uuids = newHashSetWithExpectedSize(index.remaining() / TarEntry.SIZE);
int position = index.position();
while (position < index.limit()) {
uuids.add(new UUID(
index.getLong(position),
index.getLong(position + 8)));
position += TarEntry.SIZE;
}
return uuids;
}
boolean containsEntry(long msb, long lsb) {
return findEntry(msb, lsb) != -1;
}
/**
* If the given segment is in this file, get the byte buffer that allows
* reading it.
*
* Whether or not this will read from the file depends on whether memory
* mapped files are used or not.
*
* @param msb the most significant bits of the segment id
* @param lsb the least significant bits of the segment id
* @return the byte buffer, or null if not in this file
*/
ByteBuffer readEntry(long msb, long lsb) throws IOException {
int position = findEntry(msb, lsb);
if (position != -1) {
return access.read(
index.getInt(position + 16),
index.getInt(position + 20));
} else {
return null;
}
}
/**
* Find the position of the given segment in the tar file.
* It uses the tar index if available.
*
* @param msb the most significant bits of the segment id
* @param lsb the least significant bits of the segment id
* @return the position in the file, or -1 if not found
*/
private int findEntry(long msb, long lsb) {
// The segment identifiers are randomly generated with uniform
// distribution, so we can use interpolation search to find the
// matching entry in the index. The average runtime is O(log log n).
int lowIndex = 0;
int highIndex = index.remaining() / TarEntry.SIZE - 1;
float lowValue = Long.MIN_VALUE;
float highValue = Long.MAX_VALUE;
float targetValue = msb;
while (lowIndex <= highIndex) {
int guessIndex = lowIndex + Math.round(
(highIndex - lowIndex)
* (targetValue - lowValue)
/ (highValue - lowValue));
int position = index.position() + guessIndex * TarEntry.SIZE;
long m = index.getLong(position);
if (msb < m) {
highIndex = guessIndex - 1;
highValue = m;
} else if (msb > m) {
lowIndex = guessIndex + 1;
lowValue = m;
} else {
// getting close...
long l = index.getLong(position + 8);
if (lsb < l) {
highIndex = guessIndex - 1;
highValue = m;
} else if (lsb > l) {
lowIndex = guessIndex + 1;
lowValue = m;
} else {
// found it!
return position;
}
}
}
// not found
return -1;
}
@Nonnull
private TarEntry[] getEntries() {
TarEntry[] entries = new TarEntry[index.remaining() / TarEntry.SIZE];
int position = index.position();
for (int i = 0; position < index.limit(); i++) {
entries[i] = new TarEntry(
index.getLong(position),
index.getLong(position + 8),
index.getInt(position + 16),
index.getInt(position + 20),
index.getInt(position + 24));
position += TarEntry.SIZE;
}
Arrays.sort(entries, TarEntry.OFFSET_ORDER);
return entries;
}
@Nonnull
private List getReferences(TarEntry entry, UUID id, Map> graph) throws IOException {
if (graph != null) {
List uuids = graph.get(id);
return uuids == null ? Collections.emptyList() : uuids;
} else {
// a pre-compiled graph is not available, so read the
// references directly from this segment
ByteBuffer segment = access.read(
entry.offset(),
Math.min(entry.size(), 16 * 256));
int pos = segment.position();
int refCount = segment.get(pos + REF_COUNT_OFFSET) & 0xff;
int refEnd = pos + 16 * (refCount + 1);
List refIds = newArrayList();
for (int refPos = pos + 16; refPos < refEnd; refPos += 16) {
refIds.add(new UUID(
segment.getLong(refPos),
segment.getLong(refPos + 8)));
}
return refIds;
}
}
/**
* Build the graph of segments reachable from an initial set of segments
* @param roots the initial set of segments
* @param visitor visitor receiving call back while following the segment graph
* @throws IOException
*/
public void traverseSegmentGraph(
@Nonnull Set roots,
@Nonnull SegmentGraphVisitor visitor) throws IOException {
checkNotNull(roots);
checkNotNull(visitor);
Map> graph = getGraph(false);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if (roots.remove(id) && isDataSegmentId(entry.lsb())) {
// this is a referenced data segment, so follow the graph
for (UUID refId : getReferences(entry, id, graph)) {
visitor.accept(id, refId);
roots.add(refId);
}
} else {
// this segment is not referenced anywhere
visitor.accept(id, null);
}
}
}
/**
* Calculate the ids of the segments directly referenced from {@code referenceIds}
* through forward references.
*
* @param referencedIds The initial set of ids to start from. On return it
* contains the set of direct forward references.
*
* @throws IOException
*/
void calculateForwardReferences(Set referencedIds) throws IOException {
Map> graph = getGraph(false);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if (referencedIds.remove(id)) {
if (isDataSegmentId(entry.lsb())) {
referencedIds.addAll(getReferences(entry, id, graph));
}
}
}
}
/**
* Collect the references of those blobs that are reachable from any segment with a
* generation at or above {@code minGeneration}.
* @param store
* @param collector
* @param minGeneration
*/
void collectBlobReferences(SegmentStore store, ReferenceCollector collector, int minGeneration) {
for (TarEntry entry : getEntries()) {
if (entry.generation() >= minGeneration) {
// FIXME OAK-4201: Add an index of binary references in a tar file
// Fetch the blob references from the tar index instead reading them from the segment
SegmentId id = store.newSegmentId(entry.msb(), entry.lsb());
id.getSegment().collectBlobReferences(collector);
}
}
}
/**
* Collect reclaimable segments.
* A data segment is reclaimable iff its generation is in the {@code reclaimGeneration}
* predicate.
* A bulk segment is reclaimable if it is in {@code bulkRefs} or if it is transitively
* reachable through a non reclaimable data segment.
*
* @param bulkRefs bulk segment gc roots
* @param reclaim reclaimable segments
* @param reclaimGeneration reclaim generation predicate for data segments
* @throws IOException
*/
void mark(Set bulkRefs, Set reclaim, Predicate reclaimGeneration)
throws IOException {
Map> graph = getGraph(true);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if ((!isDataSegmentId(entry.lsb()) && !bulkRefs.remove(id)) ||
(isDataSegmentId(entry.lsb()) && reclaimGeneration.apply(entry.generation()))) {
// non referenced bulk segment or old data segment
reclaim.add(id);
} else {
if (isDataSegmentId(entry.lsb())) {
for (UUID refId : getReferences(entry, id, graph)) {
if (!isDataSegmentId(refId.getLeastSignificantBits())) {
// keep the extra check for bulk segments for the case where a
// pre-compiled graph is not available and getReferences also
// includes data references
if (!reclaim.remove(id)) {
bulkRefs.add(refId);
}
}
}
}
}
}
}
/**
* Remove reclaimable segments and collect actually reclaimed segments.
* @param reclaim segments to reclaim
* @param reclaimed actually reclaimed segments
* @return reader resulting from the reclamation process
* @throws IOException
*/
TarReader sweep(@Nonnull Set reclaim, @Nonnull Set reclaimed) throws IOException {
String name = file.getName();
log.debug("Cleaning up {}", name);
Set cleaned = newHashSet();
int size = 0;
int count = 0;
TarEntry[] entries = getEntries();
for (int i = 0; i < entries.length; i++) {
TarEntry entry = entries[i];
UUID id = new UUID(entry.msb(), entry.lsb());
if (reclaim.contains(id)) {
cleaned.add(id);
entries[i] = null;
} else {
size += getEntrySize(entry.size());
count += 1;
}
}
size += getEntrySize(TarEntry.SIZE * count + 16);
size += 2 * BLOCK_SIZE;
if (count == 0) {
log.debug("None of the entries of {} are referenceable.", name);
logCleanedSegments(cleaned);
return null;
}
if (size >= access.length() * 3 / 4 && hasGraph()) {
// the space savings are not worth it at less than 25%,
// unless this tar file lacks a pre-compiled segment graph
// in which case we'll always generate a new tar file with
// the graph to speed up future garbage collection runs.
log.debug("Not enough space savings. ({}/{}). Skipping clean up of {}",
access.length() - size, access.length(), name);
return this;
}
if (!hasGraph()) {
log.warn("Recovering {}, which is missing its graph.", name);
}
int pos = name.length() - "a.tar".length();
char generation = name.charAt(pos);
if (generation == 'z') {
log.debug("No garbage collection after reaching generation z: {}", name);
return this;
}
File newFile = new File(
file.getParentFile(),
name.substring(0, pos) + (char) (generation + 1) + ".tar");
log.debug("Writing new generation {}", newFile.getName());
TarWriter writer = new TarWriter(newFile);
for (TarEntry entry : entries) {
if (entry != null) {
byte[] data = new byte[entry.size()];
access.read(entry.offset(), entry.size()).get(data);
writer.writeEntry(
entry.msb(), entry.lsb(), data, 0, entry.size(), entry.generation());
}
}
writer.close();
TarReader reader = openFirstFileWithValidIndex(
singletonList(newFile), access.isMemoryMapped());
if (reader != null) {
logCleanedSegments(cleaned);
reclaimed.addAll(cleaned);
return reader;
} else {
log.warn("Failed to open cleaned up tar file {}", file);
return this;
}
}
private void logCleanedSegments(Set cleaned) {
StringBuilder uuids = new StringBuilder();
String newLine = System.getProperty("line.separator", "\n") + " ";
int c = 0;
String sep = "";
for (UUID uuid : cleaned) {
uuids.append(sep);
if (c++ % 4 == 0) {
uuids.append(newLine);
}
uuids.append(uuid);
sep = ", ";
}
GC_LOG.info("TarMK cleaned segments from {}: {}", file.getName(), uuids);
}
/**
* @return {@code true} iff this reader has been closed
* @see #close()
*/
boolean isClosed() {
return closed;
}
@Override
public void close() throws IOException {
closed = true;
access.close();
}
//-----------------------------------------------------------< private >--
/**
* Loads and parses the optional pre-compiled graph entry from the given tar
* file.
*
* @return the parsed graph, or {@code null} if one was not found
* @throws IOException if the tar file could not be read
*/
Map> getGraph(boolean bulkOnly) throws IOException {
ByteBuffer graph = loadGraph();
if (graph == null) {
return null;
} else {
return parseGraph(graph, bulkOnly);
}
}
private boolean hasGraph() {
if (!hasGraph) {
try {
loadGraph();
} catch (IOException ignore) { }
}
return hasGraph;
}
/**
* Loads the optional pre-compiled graph entry from the given tar file.
*
* @return graph buffer, or {@code null} if one was not found
* @throws IOException if the tar file could not be read
*/
private ByteBuffer loadGraph() throws IOException {
// read the graph metadata just before the tar index entry
int pos = access.length() - 2 * BLOCK_SIZE - getEntrySize(index.remaining() + 16);
ByteBuffer meta = access.read(pos - 16, 16);
int crc32 = meta.getInt();
int count = meta.getInt();
int bytes = meta.getInt();
int magic = meta.getInt();
if (magic != GRAPH_MAGIC) {
return null; // magic byte mismatch
}
if (count < 0 || bytes < count * 16 + 16 || BLOCK_SIZE + bytes > pos) {
log.warn("Invalid graph metadata in tar file {}", file);
return null; // impossible uuid and/or byte counts
}
// this involves seeking backwards in the file, which might not
// perform well, but that's OK since we only do this once per file
ByteBuffer graph = access.read(pos - bytes, bytes);
byte[] b = new byte[bytes - 16];
graph.mark();
graph.get(b);
graph.reset();
CRC32 checksum = new CRC32();
checksum.update(b);
if (crc32 != (int) checksum.getValue()) {
log.warn("Invalid graph checksum in tar file {}", file);
return null; // checksum mismatch
}
hasGraph = true;
return graph;
}
private static Map> parseGraph(ByteBuffer graphByteBuffer, boolean bulkOnly) {
int count = graphByteBuffer.getInt(graphByteBuffer.limit() - 12);
ByteBuffer buffer = graphByteBuffer.duplicate();
buffer.limit(graphByteBuffer.limit() - 16);
List uuids = newArrayListWithCapacity(count);
for (int i = 0; i < count; i++) {
uuids.add(new UUID(buffer.getLong(), buffer.getLong()));
}
Map> graph = newHashMap();
while (buffer.hasRemaining()) {
UUID uuid = uuids.get(buffer.getInt());
List list = newArrayList();
int refid = buffer.getInt();
while (refid != -1) {
UUID ref = uuids.get(refid);
if (!bulkOnly || !isDataSegmentId(ref.getLeastSignificantBits())) {
list.add(ref);
}
refid = buffer.getInt();
}
graph.put(uuid, list);
}
return graph;
}
private static String readString(ByteBuffer buffer, int fieldSize) {
byte[] b = new byte[fieldSize];
buffer.get(b);
int n = 0;
while (n < fieldSize && b[n] != 0) {
n++;
}
return new String(b, 0, n, UTF_8);
}
private static int readNumber(ByteBuffer buffer, int fieldSize) {
byte[] b = new byte[fieldSize];
buffer.get(b);
int number = 0;
for (int i = 0; i < fieldSize; i++) {
int digit = b[i] & 0xff;
if ('0' <= digit && digit <= '7') {
number = number * 8 + digit - '0';
} else {
break;
}
}
return number;
}
File getFile() {
return file;
}
//------------------------------------------------------------< Object >--
@Override
public String toString() {
return file.toString();
}
}