com.vertispan.j2cl.build.DiskCache Maven / Gradle / Ivy
/*
* Copyright © 2021 j2cl-maven-plugin authors
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.vertispan.j2cl.build;
import com.google.gson.GsonBuilder;
import com.vertispan.j2cl.build.impl.CollectedTaskInputs;
import com.vertispan.j2cl.build.task.CachedPath;
import io.methvin.watcher.PathUtils;
import io.methvin.watcher.hashing.FileHash;
import io.methvin.watcher.hashing.FileHasher;
import io.methvin.watcher.hashing.Murmur3F;
import io.methvin.watchservice.MacOSXListeningWatchService;
import io.methvin.watchservice.WatchablePath;
import java.io.File;
import java.io.IOException;
import java.io.UncheckedIOException;
import java.nio.charset.StandardCharsets;
import java.nio.file.WatchService;
import java.nio.file.*;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.FileTime;
import java.time.Instant;
import java.util.*;
import java.util.concurrent.ConcurrentHashMap;
import java.util.concurrent.CopyOnWriteArrayList;
import java.util.concurrent.Executor;
import java.util.stream.Collectors;
/**
* Manages the cached task inputs and outputs, without direct knowledge of the project or task apis.
*/
public abstract class DiskCache {
private static final boolean IS_MAC = System.getProperty("os.name").toLowerCase().contains("mac");
private static final int MARK_ACTIVE_UPDATE_DELAY = Integer.getInteger("j2cl.diskcache.mark_active_update_delay_ms", 1000);
private static final int MAX_STALE_AGE = Integer.getInteger("j2cl.diskcache.max_stale_age", 10);
public class CacheResult {
private final Path taskDir;
public CacheResult(Path taskDir) {
this.taskDir = taskDir;
}
public Path taskDir() {
return taskDir;
}
public Path logFile() {
//TODO finish building a logger that will write to this
return DiskCache.this.logFile(taskDir);
}
public Path outputDir() {
return DiskCache.this.outputDir(taskDir);
}
public TaskOutput output() {
TaskOutput taskOutput = knownOutputs.get(taskDir);
if (taskOutput == null) {
throw new IllegalStateException("Output not yet ready for " + taskDir);
}
return taskOutput;
}
public Path cachedSummary() {
return DiskCache.this.cacheSummary(taskDir);
}
public void markSuccess() {
markFinished(this);
runningTasks.remove(taskDir);
}
public void markFailure() {
markFailed(this);
runningTasks.remove(taskDir);
}
public void markBegun() {
runningTasks.add(taskDir);
}
public void cancel() {
try {
deleteRecursively(taskDir);
} catch (IOException e) {
// log and return, might be useful for debugging, but not recoverable
e.printStackTrace();
}
}
}
protected final File cacheDir;
private final Executor executor;
/**
* A single watch service to monitor all changes to the cache dir, under the assumption that
* the entire cache directory is on a single filesystem.
*/
private final WatchService service;
/**
* A thread to monitor for changes, notify waiting work as necessary.
*/
private final Thread watchThread = new Thread(this::checkForWork, "DiskCacheThread");
private Map knownOutputs = new ConcurrentHashMap<>();
private Map lastSuccessfulOutputs = new ConcurrentHashMap<>();
private final Map knownMarkers = new ConcurrentHashMap<>();
private final Map> taskFutures = new ConcurrentHashMap<>();
private final List runningTasks = new CopyOnWriteArrayList<>();
private final Thread livenessThread = new Thread(this::markActive, "DiskCacheLivenessThread");
public DiskCache(File cacheDir, Executor executor) throws IOException {
this.cacheDir = cacheDir;
this.executor = executor;
cacheDir.mkdirs();
if (!cacheDir.exists() && !cacheDir.isDirectory()) {
throw new IllegalArgumentException("Can't use " + cacheDir + ", failed to create it, or already exists and isn't a directory");
}
if (IS_MAC) {
service = new MacOSXListeningWatchService();
} else {
service = cacheDir.toPath().getFileSystem().newWatchService();
}
watchThread.start();
livenessThread.start();
}
private void checkForWork() {
try {
WatchKey key;
while ((key = service.take()) != null) {
for (WatchEvent> event : key.pollEvents()) {
if (event.kind() == StandardWatchEventKinds.ENTRY_CREATE) {
// task ended one way or the other
Path taskDir = pathFromWatchable(key.watchable());
Path createdPath = taskDir.resolve((Path) event.context());
Set listeners = taskFutures.get(taskDir);
if (createdPath.equals(successMarker(taskDir))) {
try {
knownOutputs.put(taskDir, makeOutput(taskDir));
listeners.forEach(PendingCacheResult::success);
} catch (UncheckedIOException ioException) {
// failure to hash is pretty terrible, we're in trouble
ioException.printStackTrace();
listeners.forEach(l -> l.error(ioException));
}
} else if (createdPath.equals(failureMarker(taskDir))) {
listeners.forEach(PendingCacheResult::failure);
} //else this is the log file
} else if (event.kind() == StandardWatchEventKinds.ENTRY_DELETE) {
// task was canceled, we should attempt to take over
Path taskDir = (Path) event.context();
Set listeners = taskFutures.get(taskDir);
// Attempt to re-create the directory (note that we might not be the only process watching
// for this work to complete), then alert only the first listener to attempt the work again.
if (taskDir.toFile().mkdir()) {
Files.createDirectory(outputDir(taskDir));
Files.createFile(logFile(taskDir));
listeners.iterator().next().ready();
}
listeners.forEach(l -> l.error(new IllegalStateException("Existing task was canceled, not yet supported")));
}
}
key.reset();
}
} catch (InterruptedException e) {
// asked to shut down, time to stop
// TODO mark all pending work as canceled?
} catch (IOException e) {
// disaster, can't interact with the cache, stop and give up
// TODO mark all pending work as canceled?
} catch (ClosedWatchServiceException e) {
if(!livenessThread.getState().equals(Thread.State.TERMINATED)) {
throw new Error(e);
}
// This is purely noise in the log and doesn't indicate an actual
// error, so it can be safely ignored. Please see:
// https://github.com/Vertispan/j2clmavenplugin/issues/188
}
}
private void markActive() {
while (true) {
long startLoop = System.currentTimeMillis();
FileTime now = FileTime.from(Instant.now());
runningTasks.forEach(path -> {
try {
Files.setLastModifiedTime(path, now);
} catch (IOException e) {
// race, probably the file was deleted, leaving it in the collection
// for now, the failing task will cause the entry to be deleted.
e.printStackTrace();
}
});
try {
long remainingDelay = MARK_ACTIVE_UPDATE_DELAY - (System.currentTimeMillis() - startLoop);
if (remainingDelay > 0) {
Thread.sleep(remainingDelay);
} else {
System.out.println("Negative remaining delay, continuing " + now);
}
} catch (InterruptedException e) {
return;// done
}
}
}
private Path pathFromWatchable(Watchable watchable) {
if (watchable instanceof WatchablePath) {
return ((WatchablePath) watchable).getFile();
}
if (watchable instanceof Path) {
return (Path) watchable;
}
throw new UnsupportedOperationException("Can't handle watchable of type " + watchable.getClass());
}
private TaskOutput makeOutput(Path taskDir) {
Path outputDir = outputDir(taskDir);
return new TaskOutput(hashContents(outputDir));
}
public static class CacheEntry implements Comparable, CachedPath {
/** Relative path to the resuls dir or its original source dir */
private final Path sourcePath;
/** Absolute path to the results dir. Not to be serialized to disk. */
private final Path absoluteParent;
/** Hash of the file, so we can notice changes, or hash the tree. */
private final FileHash hash;
public CacheEntry(Path sourcePath, Path absoluteParent, FileHash hash) {
if (sourcePath.isAbsolute()) {
this.sourcePath = absoluteParent.relativize(sourcePath);
} else {
this.sourcePath = sourcePath;
}
this.absoluteParent = absoluteParent;
this.hash = hash;
}
@Override
public Path getSourcePath() {
return sourcePath;
}
public Path getAbsoluteParent() {
return absoluteParent;
}
@Override
public Path getAbsolutePath() {
return absoluteParent.resolve(sourcePath);
}
/**
* Internal API, as this is not at this time used by any caller.
*/
public FileHash getHash() {
return hash;
}
@Override
public int compareTo(CacheEntry cacheEntry) {
return sourcePath.compareTo(cacheEntry.sourcePath);
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
CacheEntry that = (CacheEntry) o;
if (!sourcePath.equals(that.sourcePath)) return false;
if (!absoluteParent.equals(that.absoluteParent)) return false;
return hash.equals(that.hash);
}
@Override
public int hashCode() {
int result = sourcePath.hashCode();
result = 31 * result + absoluteParent.hashCode();
result = 31 * result + hash.hashCode();
return result;
}
@Override
public String toString() {
return "CacheEntry{" +
"sourcePath=" + sourcePath +
", absoluteParent=" + absoluteParent +
", hash=" + hash +
'}';
}
}
/**
* Helper like PathUtils.initWatcherState to produce the relative paths of any files
* in a path, and their corresponding hashes.
*/
public static Collection hashContents(Path path) {
Set fileHashes = new HashSet<>();
if (Files.exists(path)) {
FileHasher fileHasher = FileHasher.DEFAULT_FILE_HASHER;
try {
Files.walkFileTree(
path,
new SimpleFileVisitor() {
@Override
public FileVisitResult preVisitDirectory(Path dir, BasicFileAttributes attrs) {
return FileVisitResult.CONTINUE;
}
@Override
public FileVisitResult visitFile(Path file, BasicFileAttributes attrs) {
if (attrs.isDirectory()) {
// ignore directories
return FileVisitResult.CONTINUE;
}
FileHash hash = PathUtils.hash(fileHasher, file);
if (hash == null) {
//file could have been deleted or was otherwise unreadable
//TODO how do we handle this? For now skipping as PathUtils does
} else {
fileHashes.add(new CacheEntry(file, path, hash));
}
return FileVisitResult.CONTINUE;
}
});
} catch (IOException ioException) {
throw new UncheckedIOException(ioException);
}
}
return fileHashes;
}
public void close() throws IOException, InterruptedException {
livenessThread.interrupt();
watchThread.interrupt();
service.close();
watchThread.join();
for (Path path : runningTasks) {
deleteRecursively(path);
}
}
private void deleteRecursively(Path path) throws IOException {
if (Files.exists(path)) {
if (Files.isDirectory(path, LinkOption.NOFOLLOW_LINKS)) {
try (DirectoryStream entries = Files.newDirectoryStream(path)) {
for (Path entry : entries) {
deleteRecursively(entry);
}
}
}
Files.deleteIfExists(path);
}
}
private String taskSummaryContents(CollectedTaskInputs inputs) {
TaskSummaryDiskFormat src = new TaskSummaryDiskFormat();
src.setProjectKey(inputs.getProject().getKey());
src.setOutputType(inputs.getTaskFactory().getOutputType());
src.setTaskImpl(inputs.getTaskFactory().getClass().getName());
src.setTaskImplVersion(inputs.getTaskFactory().getVersion());
src.setInputs(inputs.getInputs().stream()
.map(Input::makeDiskFormat)
.collect(Collectors.groupingBy(i -> i.getProjectKey() + "-" + i.getOutputType()))
.values().stream()
.map(list -> {
TaskSummaryDiskFormat.InputDiskFormat result = new TaskSummaryDiskFormat.InputDiskFormat();
result.setProjectKey(list.get(0).getProjectKey());
result.setOutputType(list.get(0).getOutputType());
result.setFileHashes(
list.stream().flatMap(i -> i.getFileHashes().entrySet().stream())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue, (left, right) -> {
if (left.equals(right)) {
return left;
}
throw new IllegalStateException("Two hashes for one file! " + left + " vs " + right);
}))
);
return result;
})
.collect(Collectors.toUnmodifiableList()));
src.setConfigs(inputs.getUsedConfigs());
return new GsonBuilder().serializeNulls().setPrettyPrinting().create().toJson(src);
}
protected abstract Path taskDir(String projectName, String hashString, String outputType);
protected abstract Path successMarker(Path taskDir);
protected abstract Path failureMarker(Path taskDir);
protected abstract Path logFile(Path taskDir);
protected abstract Path outputDir(Path taskDir);
protected abstract Path cacheSummary(Path taskDir);
interface Listener {
/** Ready for the current listener to do the work */
void onReady(CacheResult result);
/** Someone else did it, but failed for some reason, not re-runnable */
void onFailure(CacheResult result);
/** Someone else tried to do it, but ran into an error, possibly recoverable if we try again */
void onError(Throwable throwable);
/** Someone else finished it, successfully, notify listeners */
void onSuccess(CacheResult result);
}
public class PendingCacheResult implements Cancelable {
private final Path taskDir;
private final Listener listener;
private boolean done;
public PendingCacheResult(Path taskDir, Listener listener) {
this.taskDir = taskDir;
this.listener = listener;
}
private synchronized void error(Throwable throwable) {
if (done) {
return;
}
remove();
listener.onError(throwable);
}
private synchronized void success() {
if (done) {
return;
}
remove();
executor.execute(() -> {
listener.onSuccess(new CacheResult(taskDir));
});
}
private void remove() {
// mop up so that this won't be called/retained any more
//TODO this shouldn't be necessary if all the calls to remove() already mean removing this
taskFutures.get(taskDir).remove(this);
// ensure we won't call any listener method
done = true;
}
/**
* Caller is no longer interested in starting the work, and if no one is, we should avoid
* trying to acquire the lock.
*/
public synchronized void cancel() {
remove();
// TODO notify that we're not listening any more
}
private synchronized void ready() {
if (done) {
return;
}
remove();
listener.onReady(new CacheResult(taskDir));
}
private synchronized void failure() {
if (done) {
return;
}
remove();
listener.onFailure(new CacheResult(taskDir));
}
}
/**
* Returns a future which is successful if the tasks either finishes normally or reports an error.
* The future only fails if there was a problem in managing the cache - this is a fatal problem
* but doesn't reflect that there was an issue with doing the requested work.
*
* Note that this method does not actually block, but notifies when the task is finished. This
* cannot directly be canceled, though the notification that a task was successful or failed can
* be ignored (though notification on ready-to-build cannot be ignored). If the listener is told
* to start the work, it will happen before this method returns.
*
* @param taskDetails details about the work being requested to either find existing work or
* make a new location for it
* @param listener an instance to be notified of the state of the task. If onReady is called, the work
* may not be canceled
*/
public void waitForTask(CollectedTaskInputs taskDetails, Listener listener) {
assert taskDetails.getInputs().stream().allMatch(Input::hasContents);
Murmur3F murmur3F = new Murmur3F();
byte[] taskSummaryContents = taskSummaryContents(taskDetails).getBytes(StandardCharsets.UTF_8);
murmur3F.update(taskSummaryContents);
String hashString = murmur3F.getValueHexString();
final Path taskDir = taskDir(taskDetails.getProject().getKey(), hashString, taskDetails.getTaskFactory().getOutputType());
PendingCacheResult cancelable = new PendingCacheResult(taskDir, listener);
taskFutures.computeIfAbsent(taskDir, ignore -> Collections.newSetFromMap(new ConcurrentHashMap<>())).add(cancelable);
try {
Path outputDir = outputDir(taskDir);
// make sure the parent dir exists, we'll need it to one way or the other
if (!taskDir.getParent().toFile().exists()) {
Files.createDirectories(taskDir.getParent());
}
// first check if it isn't already on disk
// try to create the task directory - if we succeed, we own it (this is atomic), if we fail, someone else already made it and we wait for them to finish
//TODO one more check here that we even want to make this and start the work
if (taskDir.toFile().mkdir()) {
// caller can begin work right away
Files.createDirectory(outputDir);
Files.createFile(logFile(taskDir));
Files.write(cacheSummary(taskDir), taskSummaryContents);
cancelable.ready();
return;
}
// caller will need to wait until the current owner completes it
//TODO register the future instance so the service can let us know when it is up
// set up markers in case we finish registration very fast
Path successMarker = successMarker(taskDir);
Path failureMarker = failureMarker(taskDir);
knownMarkers.put(successMarker, taskDir);
knownMarkers.put(failureMarker, taskDir);
// register to watch if a marker is made so we can get a call back, then check for existing markers
WatchKey key = registerWatchCreate(taskDir);
// check once more if we can take over the task dir, if we raced with the registration
//TODO one more check here that we even want to make this and start the work
if (taskDir.toFile().mkdir()) {
//TODO mark this as "nevermind" further?
key.cancel();
Files.createDirectory(outputDir);
Files.createFile(logFile(taskDir));
cancelable.ready();
return;
}
if (successMarker.toFile().exists()) {
// make sure we know it was successful
knownOutputs.computeIfAbsent(taskDir, this::makeOutput);
// already finished, success, no need to actually wait
cancelable.success();
//TODO mark as "nevermind" further?
key.cancel();
return;
}
if (failureMarker.toFile().exists()) {
// already finished, failure, no need to actually wait
cancelable.failure();
//TODO mark as "nevermind" further?
key.cancel();
return;
}
// This task dir isn't owned by this process, and it might be stale
if (!runningTasks.contains(taskDir)) {
FileTime lastModifiedTime = Files.getLastModifiedTime(taskDir);
FileTime limit = FileTime.from(Instant.now().minusSeconds(MAX_STALE_AGE));
if (lastModifiedTime.compareTo(limit) < 0) {
//directory hasn't been updated, it must be stale, take over
System.out.println("STALE BUILD DETECTED - build was stale after " + MAX_STALE_AGE + " seconds, deleting it to take over: " + taskDir);
System.out.println("File was last modified at " + lastModifiedTime);
System.out.println("Expected it to be after " + limit);
deleteRecursively(taskDir);
}
}
} catch (IOException ioException) {
cancelable.error(new IOException("Error when interacting with the disk cache", ioException));
}
// we're waiting for real now, give up on this thread
}
/**
* Helper to deal with correctly watching paths using the mac-specific watch impl
*/
private WatchKey registerWatchCreate(Path taskDir) throws IOException {
final Watchable watchable;
if (IS_MAC) {
watchable = new WatchablePath(taskDir);
} else {
watchable = taskDir;
}
return watchable.register(this.service, StandardWatchEventKinds.ENTRY_CREATE);
}
public void markFinished(CacheResult successfulResult) {
try {
this.knownOutputs.put(successfulResult.taskDir, makeOutput(successfulResult.taskDir));
Files.createFile(successMarker(successfulResult.taskDir));
} catch (IOException ioException) {
//TODO need to basically stop everything if we can't write files to cache
throw new UncheckedIOException(ioException);
}
}
public void markFailed(CacheResult failedResult) {
try {
Files.createFile(failureMarker(failedResult.taskDir));
new RuntimeException().printStackTrace();
} catch (IOException ioException) {
//TODO need to basically stop everything if we can't write files to cache
throw new UncheckedIOException(ioException);
}
}
public Optional getCacheResult(Path taskDir) {
if (Files.exists(taskDir) || Files.exists(successMarker(taskDir))) {
CacheResult result = new CacheResult(taskDir);
knownOutputs.computeIfAbsent(taskDir, this::makeOutput);
return Optional.of(result);
}
return Optional.empty();
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy