All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.bdeploy.bhive.BHivePoolOrganizer Maven / Gradle / Ivy

Go to download

Public API including dependencies, ready to be used for integrations and plugins.

There is a newer version: 7.3.6
Show newest version
package io.bdeploy.bhive;

import java.io.IOException;
import java.nio.file.Path;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.SortedSet;
import java.util.TreeMap;
import java.util.concurrent.Future;
import java.util.concurrent.atomic.LongAdder;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import io.bdeploy.bhive.BHive.Operation;
import io.bdeploy.bhive.model.ObjectId;
import io.bdeploy.bhive.objects.ObjectDatabase;
import io.bdeploy.bhive.objects.ObjectReferenceDatabase;
import io.bdeploy.bhive.op.ManifestListOperation;
import io.bdeploy.bhive.op.ObjectListOperation;
import io.bdeploy.bhive.remote.jersey.BHiveRegistry;
import io.bdeploy.common.ActivityReporter;
import io.bdeploy.common.actions.Actions;
import io.bdeploy.common.util.FutureHelper;
import io.bdeploy.common.util.PathHelper;
import io.bdeploy.common.util.ZipHelper;
import io.bdeploy.jersey.actions.Action;
import io.bdeploy.jersey.actions.ActionExecution;
import io.bdeploy.jersey.actions.ActionService;
import io.bdeploy.jersey.actions.ActionService.ActionHandle;

/**
 * Capable of reorganizing a pool, given a set of input {@link BHive}s.
 */
public class BHivePoolOrganizer {

    private static final int PRUNE_LOOP_COUNT = 10000;

    private static final Logger log = LoggerFactory.getLogger(BHivePoolOrganizer.class);

    private final Path pool;
    private final int usageThreshold;
    private final ActionService as;

    private BHivePoolOrganizer(Path pool, int usageThreshold, ActionService as) {
        this.pool = pool.toAbsolutePath().normalize();
        this.usageThreshold = usageThreshold;
        this.as = as;
    }

    /**
     * Reorganizes the pool to match the given {@link BHive}s.
     * 

* ATTENTION: The complete set of {@link BHive}s using this pool needs to be specified as this operation is destructive. It * will remove objects from the pool which are not referenced anymore from any of the given {@link BHive}s. * * @param bhives the set of BHives using this pool. */ private void reorganize(Collection bhives) { Path refDbRoot = pool.resolve(".refdb"); if (PathHelper.exists(refDbRoot)) { PathHelper.deleteRecursiveRetry(refDbRoot); } ObjectReferenceDatabase orefdb = new ObjectReferenceDatabase(refDbRoot, new ActivityReporter.Null()); Map hivesById = new TreeMap<>(); bhives.forEach(h -> hivesById.put(h.getUri().toString(), h)); log.info("Begin re-organization of {} hives using {}", bhives.size(), pool); try (ActionHandle org = as.start(new Action(Actions.REORGANIZE_POOL, null, null, pool.toString()), ActionExecution.fromSystem())) { // very low level as we are the only ones allowed to *write* to the pool ObjectDatabase poolDb = new ObjectDatabase(pool, pool.resolve("tmp"), new ActivityReporter.Null(), null); for (var entry : hivesById.entrySet()) { String id = entry.getKey(); BHive hive = entry.getValue(); if (ZipHelper.isZipUri(hive.getUri())) { log.warn("Ignoring ZIP hive in pool reorganization: {}", id); continue; } if (log.isDebugEnabled()) { log.debug("Finding references in {}", id); } // fetch *all* required objects regardless of origin. ignore missing in case a manifest was deleted after listing it. var hiveObjects = hive.execute(new ObjectListOperation().addManifest(hive.execute(new ManifestListOperation())) .ignoreMissingManifest(true)); // now mark all objects that remain as referenced. we mark *all*, also the duplicates hiveObjects.forEach(o -> orefdb.addReference(o, id)); if (log.isDebugEnabled()) { log.debug("Marked {} referenced objects in {}", hiveObjects.size(), id); } } if (log.isDebugEnabled()) { log.debug("Reading all referenced objects"); } log.info("Moving with threshold {}", usageThreshold); LongAdder count = new LongAdder(); Map> toRemove = new TreeMap<>(); // walk through all referenced objects, move to pool as required, and remove from the origins in batches. Consumer processOne = id -> { SortedSet references = orefdb.read(id); boolean isPooled = poolDb.hasObject(id); if (references.size() >= usageThreshold || isPooled) { List refHives = references.stream().map(hivesById::get).collect(Collectors.toList()); // if reference count > usage threshold and not yet in pool -> move if (!isPooled) { moveToPool(poolDb, id, refHives); } // however it came to the pool, we need to prune it from all hives, remember the id. references.forEach(r -> toRemove.computeIfAbsent(r, k -> new ArrayList<>()).add(id)); count.increment(); } // every 10000 objects we perform the prune on the hives just to save on memory. long current = count.sum(); if (current > 0 && current % PRUNE_LOOP_COUNT == 0) { removeFromHives(hivesById, current, toRemove); } }; // walk through all objects, processing them one by one. orefdb.walkAllObjects(processOne); // final batch of removals. if (!toRemove.isEmpty()) { removeFromHives(hivesById, count.sum(), toRemove); } log.info("Checked or moved {} referenced objects to the pool", count.sum()); // cleanup of pool. look at all objects in pool - if it does not exist in the orefdb -> remove // pruning the pool is way easier than pruning a bhive! we do not need to care about concurrency. LongAdder remCount = new LongAdder(); Consumer remover = id -> { if (!orefdb.hasObject(id)) { poolDb.removeObject(id); if (log.isDebugEnabled()) { log.debug("Removed {} from pool", id); } remCount.increment(); } }; poolDb.walkAllObjects(remover); if (log.isDebugEnabled()) { log.debug("Removed {} objects from the pool during cleanup", remCount.sum()); } } catch (Exception e) { log.error("Cannot reorganize pool {}", pool, e); } finally { // delete the temporary reference counter db. PathHelper.deleteRecursiveRetry(refDbRoot); } log.info("Finished re-organization."); } private static void removeFromHives(Map hivesById, long current, Map> toRemove) { if (log.isTraceEnabled()) { log.trace("...{}", current); } for (var entry : toRemove.entrySet()) { if (log.isTraceEnabled()) { log.trace("Assuring absence of {} objects from {}, already in pool", entry.getValue().size(), entry.getKey()); } hivesById.get(entry.getKey()).execute(new InternalRemoveObjectsOperation().setObjectIds(entry.getValue())); } toRemove.clear(); } private static void moveToPool(ObjectDatabase poolDb, ObjectId id, List origins) { // try to find a valid file. for (BHive origin : origins) { Path source = origin.execute(new InternalFindValidSourceOperation().setObjectId(id)); if (source == null) { continue; } try { // first copy to pool, then remove from origins ObjectId calculated = poolDb.addObject(source); if (!id.equals(calculated)) { // uh oh. this *should* never happen as we found a valid matching file in the origin. throw new IllegalStateException("Cannot move " + id + " to pool, calculated " + calculated); } } catch (IOException e) { throw new IllegalStateException("Cannot move " + id, e); } // now remove from the origins. origins.forEach(h -> h.execute(new InternalRemoveObjectsOperation().setObjectIds(Collections.singletonList(id)))); if (log.isDebugEnabled()) { log.debug("Moved {} to pool", id); } // done. return; } // if we got here no hive had a valid source for the object -> bad! throw new IllegalStateException("No origin BHive was able to supply a valid object for " + id); } /** * Convenience method which figures out common pool directories over a given set of hives and reorganizes each pool. * * @param registry the registry with all BHives to be examined. * @param usageThreshold the usageThreshold for objects to be considered for pooling. */ public static void reorganizeAll(BHiveRegistry registry, int usageThreshold, ActionService as) { Map> pools = new TreeMap<>(); for (var entry : registry.getAll().entrySet()) { BHive hive = entry.getValue(); if (hive.isPooling() && hive.getPoolPath() != null) { pools.computeIfAbsent(hive.getPoolPath(), k -> new ArrayList<>()).add(hive); } } for (var entry : pools.entrySet()) { log.info("Re-organizing pool {} using {} BHives and threshold {}", entry.getKey(), entry.getValue().size(), usageThreshold); new BHivePoolOrganizer(entry.getKey(), usageThreshold, as).reorganize(entry.getValue()); } } /** * @param hive the hive with enabled pooling * @param target the target {@link ObjectDatabase} which should be populated with all required objects. */ public static void unpoolHive(BHive hive, ObjectDatabase target) { if (!hive.isPooling()) { return; // nothing to do. } log.info("Begin unpooling {}", hive.getUri()); // fetch *all* required objects regardless of origin. ignore missing in case a manifest was deleted after listing it. var reqObjects = hive.execute( new ObjectListOperation().addManifest(hive.execute(new ManifestListOperation())).ignoreMissingManifest(true)); log.info("Moving and checking {} objects", reqObjects.size()); // copy every single one of the objects to the target - it will avoid copy on its own if it already has the object. hive.execute(new InternalCopyObjectsOperation().setTarget(target).setObjectIds(reqObjects)); } private static final class InternalFindValidSourceOperation extends Operation { private ObjectId oid; @Override public Path call() { if (oid == null) { throw new IllegalArgumentException("oid must be set"); } Boolean valid = getObjectManager().db(db -> db.checkObject(oid)); if (!Boolean.TRUE.equals(valid)) { return null; } return getObjectManager().db(db -> db.getObjectFile(oid)); } public InternalFindValidSourceOperation setObjectId(ObjectId id) { this.oid = id; return this; } } private static final class InternalRemoveObjectsOperation extends Operation { private Collection oid; @Override public Void call() { List> ops = new ArrayList<>(); getObjectManager().db(db -> { oid.forEach(o -> ops.add(submitFileOperation(() -> { try { db.removeObject(o); } catch (Exception e) { // cannot remove, maybe in use? ignore the issue as the next re-org will try again. if (log.isDebugEnabled()) { log.debug("Cannot remove " + o, e); } } }))); return null; }); FutureHelper.awaitAll(ops); return null; } public InternalRemoveObjectsOperation setObjectIds(Collection ids) { this.oid = ids; return this; } } private static final class InternalCopyObjectsOperation extends Operation { private Collection oid; private ObjectDatabase target; @Override public Void call() { List> ops = new ArrayList<>(); getObjectManager().db(db -> { oid.forEach(o -> ops.add(submitFileOperation(() -> { try { target.addObject(db.getObjectFile(o)); } catch (Exception e) { throw new IllegalStateException("Cannot copy object " + o, e); } }))); return null; }); FutureHelper.awaitAll(ops); return null; } public InternalCopyObjectsOperation setObjectIds(Collection ids) { this.oid = ids; return this; } public InternalCopyObjectsOperation setTarget(ObjectDatabase db) { this.target = db; return this; } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy