org.eclipse.jgit.storage.dfs.DfsPackCompactor Maven / Gradle / Ivy
/*
* Copyright (C) 2011, Google Inc.
* and other copyright owners as documented in the project's IP log.
*
* This program and the accompanying materials are made available
* under the terms of the Eclipse Distribution License v1.0 which
* accompanies this distribution, is reproduced below, and is
* available at http://www.eclipse.org/org/documents/edl-v10.php
*
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or
* without modification, are permitted provided that the following
* conditions are met:
*
* - Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* - Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* - Neither the name of the Eclipse Foundation, Inc. nor the
* names of its contributors may be used to endorse or promote
* products derived from this software without specific prior
* written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
* CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
* ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
package org.eclipse.jgit.storage.dfs;
import static org.eclipse.jgit.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
import static org.eclipse.jgit.storage.pack.PackExt.PACK;
import static org.eclipse.jgit.storage.pack.PackExt.INDEX;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.eclipse.jgit.errors.IncorrectObjectTypeException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.AnyObjectId;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectId;
import org.eclipse.jgit.lib.ProgressMonitor;
import org.eclipse.jgit.revwalk.RevFlag;
import org.eclipse.jgit.revwalk.RevObject;
import org.eclipse.jgit.revwalk.RevWalk;
import org.eclipse.jgit.storage.file.PackIndex;
import org.eclipse.jgit.storage.pack.PackConfig;
import org.eclipse.jgit.storage.pack.PackWriter;
import org.eclipse.jgit.util.BlockList;
import org.eclipse.jgit.util.io.CountingOutputStream;
/**
* Combine several pack files into one pack.
*
* The compactor combines several pack files together by including all objects
* contained in each pack file into the same output pack. If an object appears
* multiple times, it is only included once in the result. Because the new pack
* is constructed by enumerating the indexes of the source packs, it is quicker
* than doing a full repack of the repository, however the result is not nearly
* as space efficient as new delta compression is disabled.
*
* This method is suitable for quickly combining several packs together after
* receiving a number of small fetch or push operations into a repository,
* allowing the system to maintain reasonable read performance without expending
* a lot of time repacking the entire repository.
*/
public class DfsPackCompactor {
private final DfsRepository repo;
private final List srcPacks;
private final List newPacks;
private final List newStats;
private int autoAddSize;
/**
* Initialize a pack compactor.
*
* @param repository
* repository objects to be packed will be read from.
*/
public DfsPackCompactor(DfsRepository repository) {
repo = repository;
autoAddSize = 5 * 1024 * 1024; // 5 MiB
srcPacks = new ArrayList();
newPacks = new ArrayList(1);
newStats = new ArrayList(1);
}
/**
* Add a pack to be compacted.
*
* All of the objects in this pack will be copied into the resulting pack.
* The resulting pack will order objects according to the source pack's own
* description ordering (which is based on creation date), and then by the
* order the objects appear in the source pack.
*
* @param pack
* a pack to combine into the resulting pack.
* @return {@code this}
*/
public DfsPackCompactor add(DfsPackFile pack) {
srcPacks.add(pack);
return this;
}
/**
* Automatically select packs to be included, and add them.
*
* Packs are selected based on size, smaller packs get included while bigger
* ones are omitted.
*
* @return {@code this}
* @throws IOException
* existing packs cannot be read.
*/
public DfsPackCompactor autoAdd() throws IOException {
DfsObjDatabase objdb = repo.getObjectDatabase();
for (DfsPackFile pack : objdb.getPacks()) {
DfsPackDescription d = pack.getPackDescription();
if (d.getFileSize(PACK) < autoAddSize)
add(pack);
}
return this;
}
/**
* Compact the pack files together.
*
* @param pm
* progress monitor to receive updates on as packing may take a
* while, depending on the size of the repository.
* @throws IOException
* the packs cannot be compacted.
*/
public void compact(ProgressMonitor pm) throws IOException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
DfsObjDatabase objdb = repo.getObjectDatabase();
DfsReader ctx = (DfsReader) objdb.newReader();
try {
PackConfig pc = new PackConfig(repo);
pc.setIndexVersion(2);
pc.setDeltaCompress(false);
pc.setReuseDeltas(true);
pc.setReuseObjects(true);
PackWriter pw = new PackWriter(pc, ctx);
try {
pw.setDeltaBaseAsOffset(true);
pw.setReuseDeltaCommits(false);
addObjectsToPack(pw, ctx, pm);
if (pw.getObjectCount() == 0)
return;
boolean rollback = true;
DfsPackDescription pack = objdb.newPack(COMPACT);
try {
writePack(objdb, pack, pw, pm);
writeIndex(objdb, pack, pw);
PackWriter.Statistics stats = pw.getStatistics();
pw.release();
pw = null;
pack.setPackStats(stats);
objdb.commitPack(Collections.singletonList(pack), toPrune());
newPacks.add(pack);
newStats.add(stats);
rollback = false;
} finally {
if (rollback)
objdb.rollbackPack(Collections.singletonList(pack));
}
} finally {
if (pw != null)
pw.release();
}
} finally {
ctx.release();
}
}
/** @return all of the source packs that fed into this compaction. */
public List getSourcePacks() {
return toPrune();
}
/** @return new packs created by this compaction. */
public List getNewPacks() {
return newPacks;
}
/** @return statistics corresponding to the {@link #getNewPacks()}. */
public List getNewPackStatistics() {
return newStats;
}
private List toPrune() {
int cnt = srcPacks.size();
List all = new ArrayList(cnt);
for (DfsPackFile pack : srcPacks)
all.add(pack.getPackDescription());
return all;
}
private void addObjectsToPack(PackWriter pw, DfsReader ctx,
ProgressMonitor pm) throws IOException,
IncorrectObjectTypeException {
// Sort packs by description ordering, this places newer packs before
// older packs, allowing the PackWriter to be handed newer objects
// first and older objects last.
Collections.sort(srcPacks, new Comparator() {
public int compare(DfsPackFile a, DfsPackFile b) {
return a.getPackDescription().compareTo(b.getPackDescription());
}
});
RevWalk rw = new RevWalk(ctx);
RevFlag added = rw.newFlag("ADDED"); //$NON-NLS-1$
pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
for (DfsPackFile src : srcPacks) {
List want = new BlockList();
for (PackIndex.MutableEntry ent : src.getPackIndex(ctx)) {
ObjectId id = ent.toObjectId();
RevObject obj = rw.lookupOrNull(id);
if (obj == null || !obj.has(added))
want.add(new ObjectIdWithOffset(id, ent.getOffset()));
}
// Sort objects by the order they appear in the pack file, for
// two benefits. Scanning object type information is faster when
// the pack is traversed in order, and this allows the PackWriter
// to be given the new objects in a relatively sane newest-first
// ordering without additional logic, like unpacking commits and
// walking a commit queue.
Collections.sort(want, new Comparator() {
public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
return Long.signum(a.offset - b.offset);
}
});
// Only pack each object at most once into the output file. The
// PackWriter will later select a representation to reuse, which
// may be the version in this pack, or may be from another pack if
// the object was copied here to complete a thin pack and is larger
// than a delta from another pack. This is actually somewhat common
// if an object is modified frequently, such as the top level tree.
for (ObjectIdWithOffset id : want) {
int type = src.getObjectType(ctx, id.offset);
RevObject obj = rw.lookupAny(id, type);
if (!obj.has(added)) {
pm.update(1);
pw.addObject(obj);
obj.add(added);
}
}
}
pm.endTask();
}
private void writePack(DfsObjDatabase objdb, DfsPackDescription pack,
PackWriter pw, ProgressMonitor pm) throws IOException {
DfsOutputStream out = objdb.writeFile(pack, PACK);
try {
CountingOutputStream cnt = new CountingOutputStream(out);
pw.writePack(pm, pm, cnt);
pack.setObjectCount(pw.getObjectCount());
pack.setFileSize(PACK, cnt.getCount());
} finally {
out.close();
}
}
private void writeIndex(DfsObjDatabase objdb, DfsPackDescription pack,
PackWriter pw) throws IOException {
DfsOutputStream out = objdb.writeFile(pack, INDEX);
try {
CountingOutputStream cnt = new CountingOutputStream(out);
pw.writeIndex(cnt);
pack.setFileSize(INDEX, cnt.getCount());
} finally {
out.close();
}
}
private static class ObjectIdWithOffset extends ObjectId {
final long offset;
ObjectIdWithOffset(AnyObjectId id, long ofs) {
super(id);
offset = ofs;
}
}
}