All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.datakernel.remotefs.CachedFsClient Maven / Gradle / Ivy

Go to download

Package provides tools for building efficient, scalable remote file servers. It utilizes CSP for fast and reliable file transfer.

The newest version!
/*
 * Copyright (C) 2015-2019 SoftIndex LLC.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.datakernel.remotefs;

import io.datakernel.async.function.AsyncSupplier;
import io.datakernel.async.function.AsyncSuppliers;
import io.datakernel.async.service.EventloopService;
import io.datakernel.bytebuf.ByteBuf;
import io.datakernel.common.MemSize;
import io.datakernel.common.exception.StacklessException;
import io.datakernel.common.ref.RefLong;
import io.datakernel.common.time.CurrentTimeProvider;
import io.datakernel.csp.ChannelConsumer;
import io.datakernel.csp.ChannelSupplier;
import io.datakernel.csp.ChannelSuppliers;
import io.datakernel.csp.process.ChannelSplitter;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.promise.Promise;
import io.datakernel.promise.Promises;
import org.jetbrains.annotations.NotNull;

import java.util.Comparator;
import java.util.HashMap;
import java.util.List;
import java.util.Map;

import static io.datakernel.common.Preconditions.checkArgument;
import static io.datakernel.common.Preconditions.checkState;

/**
 * Represents a cached filesystem client which is an implementation of {@link FsClient}.
 * Cached filesystem client works on top of two {@link FsClient}s.
 * First is main client, which is potentially slow, but contains necessary data. Typically it's a {@link RemoteFsClient}
 * which connects to a remote server.
 * It is backed up by second one, which acts as a cache folder, typically it is a local filesystem client ({@link LocalFsClient})
 * Cache replacement policy is defined by supplying a {@link Comparator} of {@link FullCacheStat}.
 */
public final class CachedFsClient implements FsClient, EventloopService {
	private static final double LOAD_FACTOR = 0.75;
	private final Eventloop eventloop;
	private final FsClient mainClient;
	private final FsClient cacheClient;
	private final Map cacheStats = new HashMap<>();
	private final Comparator comparator;
	private final AsyncSupplier ensureSpace = AsyncSuppliers.reuse(this::doEnsureSpace);
	private MemSize cacheSizeLimit;
	private long downloadingNowSize;
	private long totalCacheSize;
	CurrentTimeProvider timeProvider = CurrentTimeProvider.ofSystem();

	// region creators
	private CachedFsClient(Eventloop eventloop, FsClient mainClient, FsClient cacheClient, Comparator comparator) {
		this.eventloop = eventloop;
		this.mainClient = mainClient;
		this.cacheClient = cacheClient;
		this.comparator = comparator;
	}

	public static CachedFsClient create(FsClient mainClient, FsClient cacheClient, Comparator comparator) {
		return new CachedFsClient(Eventloop.getCurrentEventloop(), mainClient, cacheClient, comparator);
	}

	public CachedFsClient with(@NotNull MemSize cacheSizeLimit) {
		this.cacheSizeLimit = cacheSizeLimit;
		return this;
	}

	public CachedFsClient with(@NotNull CurrentTimeProvider timeProvider) {
		this.timeProvider = timeProvider;
		return this;
	}
	// endregion

	public Promise setCacheSizeLimit(@NotNull MemSize cacheSizeLimit) {
		this.cacheSizeLimit = cacheSizeLimit;
		return ensureSpace();
	}

	public MemSize getCacheSizeLimit() {
		return cacheSizeLimit;
	}

	public Promise getTotalCacheSize() {
		return cacheClient.list("**")
				.then(list -> Promise.of(MemSize.of(list.stream().mapToLong(FileMetadata::getSize).sum())));
	}

	@NotNull
	@Override
	public Eventloop getEventloop() {
		return eventloop;
	}

	@NotNull
	@Override
	public Promise start() {
		checkState(cacheSizeLimit != null, "Cannot start cached client without specifying cache size limit");
		return getTotalCacheSize()
				.then(size -> {
					totalCacheSize = size.toLong();
					return ensureSpace();
				});
	}

	@Override
	public Promise> upload(@NotNull String name, long offset) {
		return mainClient.upload(name, offset);
	}

	@Override
	public Promise> upload(@NotNull String name, long offset, long revision) {
		return mainClient.upload(name, offset, revision);
	}

	/**
	 * Tries to download file either from cache (if present) or from server.
	 *
	 * @param name   name of the file to be downloaded
	 * @param offset from which byte to download the file
	 * @param length how much bytes of the file do download
	 * @return promise for stream supplier of byte buffers
	 */
	@Override
	public Promise> download(@NotNull String name, long offset, long length) {
		checkArgument(offset >= 0, "Data offset must be greater than or equal to zero");
		checkArgument(length >= -1, "Data length must be either -1 or greater than or equal to zero");

		return cacheClient.getMetadata(name)
				.then(cacheMetadata -> {
					if (cacheMetadata == null) {
						if (offset != 0) {
							return mainClient.download(name, offset, length);
						}
						return mainClient.getMetadata(name)
								.then(mainMetadata -> {
									if (mainMetadata == null) {
										return Promise.ofException(new StacklessException(CachedFsClient.class, "File not found: " + name));
									}
									return downloadToCache(name, offset, length, 0, mainMetadata.getSize());
								});
					}
					long sizeInCache = cacheMetadata.getSize();

					if (length != -1 && sizeInCache >= offset + length) {
						return cacheClient.download(name, offset, length)
								.whenComplete((val, e) -> updateCacheStats(name));
					}

					if (offset > sizeInCache) {
						return mainClient.download(name, offset, length);
					}

					return mainClient.getMetadata(name)
							.then(mainMetadata -> {
								if (mainMetadata == null) {
									return cacheClient.download(name, offset, length)
											.whenComplete(() -> updateCacheStats(name));
								}

								long sizeInMain = mainMetadata.getSize();

								if (sizeInCache >= sizeInMain) {
									return cacheClient.download(name, offset, length)
											.whenComplete(() -> updateCacheStats(name));
								}

								if ((length != -1) && (sizeInMain < (offset + length))) {
									String repr = name + "(size=" + sizeInMain + (offset != 0 ? ", offset=" + offset : "") + ", length=" + length;
									return Promise.ofException(new StacklessException(CachedFsClient.class, "Boundaries exceed file size: " + repr));
								}

								return downloadToCache(name, offset, length, sizeInCache, sizeInMain);
							});

				});
	}

	private Promise> downloadToCache(String fileName, long offset, long length, long sizeInCache, long sizeInMain) {
		long size = length == -1 ? length : length + offset - sizeInCache;
		return mainClient.download(fileName, sizeInCache, size)
				.then(supplier -> {
					long toBeCached = sizeInMain - sizeInCache;
					if (downloadingNowSize + toBeCached > cacheSizeLimit.toLong() || toBeCached > cacheSizeLimit.toLong() * (1 - LOAD_FACTOR)) {
						return Promise.of(supplier);
					}
					downloadingNowSize += toBeCached;

					return ensureSpace()
							.map($ -> {
								ChannelSplitter splitter = ChannelSplitter.create(supplier);
								splitter.addOutput()
										.set(ChannelConsumer.ofPromise(cacheClient.upload(fileName, sizeInCache)));
								ChannelSupplier prefix = sizeInCache != 0 ?
										ChannelSupplier.ofPromise(cacheClient.download(fileName, offset, sizeInCache)) :
										ChannelSupplier.of();

								return ChannelSuppliers.concat(prefix, splitter.addOutput().getSupplier())
										.withEndOfStream(eos -> eos
												.both(splitter.getProcessCompletion())
												.then($2 -> {
													totalCacheSize += toBeCached;
													return updateCacheStats(fileName);
												})
												.whenResult($2 -> downloadingNowSize -= toBeCached));
							});
				});
	}

	@Override
	public Promise move(@NotNull String name, @NotNull String target, long targetRevision, long tombstoneRevision) {
		return mainClient.move(name, target, targetRevision, tombstoneRevision);
	}

	@Override
	public Promise copy(@NotNull String name, @NotNull String target, long targetRevision) {
		return mainClient.copy(name, target, targetRevision);
	}

	/**
	 * Lists files that are matched by glob. List is combined from cache folder files and files that are on server.
	 *
	 * @param glob specified in {@link java.nio.file.FileSystem#getPathMatcher NIO path matcher} documentation for glob patterns
	 * @return promise that is a union of the most actual files from cache folder and server
	 */
	@Override
	public Promise> listEntities(@NotNull String glob) {
		return Promises.toList(cacheClient.listEntities(glob), mainClient.listEntities(glob))
				.map(lists -> FileMetadata.flatten(lists.stream()));
	}

	@Override
	public Promise> list(@NotNull String glob) {
		return Promises.toList(cacheClient.list(glob), mainClient.list(glob))
				.map(lists -> FileMetadata.flatten(lists.stream()));
	}

	/**
	 * Deletes file both on server and in cache folder
	 *
	 * @return promise of {@link Void} that represents successful deletion
	 */
	@Override
	public Promise delete(@NotNull String name, long revision) {
		cacheStats.remove(name);
		return Promises.all(cacheClient.delete(name, revision), mainClient.delete(name, revision));
	}

	@NotNull
	@Override
	public Promise stop() {
		return ensureSpace();
	}

	private Promise updateCacheStats(String fileName) {
		return Promise.of(cacheStats
				.computeIfPresent(fileName, (s, cacheStat) -> {
					cacheStat.numberOfHits++;
					cacheStat.lastHitTimestamp = timeProvider.currentTimeMillis();
					return cacheStat;
				}))
				.whenResult($ -> cacheStats.computeIfAbsent(fileName, s -> new CacheStat(1, timeProvider.currentTimeMillis())))
				.toVoid();
	}

	private Promise ensureSpace() {
		return ensureSpace.get();
	}

	@NotNull
	private Promise doEnsureSpace() {
		if (totalCacheSize + downloadingNowSize <= cacheSizeLimit.toLong()) {
			return Promise.complete();
		}
		RefLong size = new RefLong(0);
		return cacheClient.list("**")
				.map(list -> list
						.stream()
						.map(metadata -> {
							CacheStat cacheStat = cacheStats.get(metadata.getName());
							return cacheStat == null ?
									new FullCacheStat(metadata, 0, 0) :
									new FullCacheStat(metadata, cacheStat.numberOfHits, cacheStat.lastHitTimestamp);
						})
						.sorted(comparator.reversed())
						.filter(fullCacheStat -> size.inc(fullCacheStat.getFileMetadata().getSize()) > cacheSizeLimit.toLong() * LOAD_FACTOR))
				.then(filesToDelete -> Promises.all(filesToDelete
						.map(fullCacheStat -> cacheClient
								.delete(fullCacheStat.getFileMetadata().getName())
								.whenResult($ -> {
									totalCacheSize -= fullCacheStat.getFileMetadata().getSize();
									cacheStats.remove(fullCacheStat.getFileMetadata().getName());
								}))));
	}

	private static final class CacheStat {
		private long numberOfHits;
		private long lastHitTimestamp;

		private CacheStat(long numberOfHits, long lastHitTimestamp) {
			this.numberOfHits = numberOfHits;
			this.lastHitTimestamp = lastHitTimestamp;
		}

		@Override
		public String toString() {
			return "CacheStat{" +
					"numberOfHits=" + numberOfHits +
					", lastHitTimestamp=" + lastHitTimestamp +
					'}';
		}
	}

	/**
	 * POJO class that encapsulates stats about file in cache folder.
	 * Consists of {@link FileMetadata}, number of successful cache hits and a time of the last cache hit occurrence.
	 */
	public static final class FullCacheStat {
		private final FileMetadata fileMetadata;
		private final long numberOfHits;
		private final long lastHitTimestamp;

		FullCacheStat(FileMetadata fileMetadata, long numberOfHits, long lastHitTimestamp) {
			this.fileMetadata = fileMetadata;
			this.numberOfHits = numberOfHits;
			this.lastHitTimestamp = lastHitTimestamp;
		}

		public FileMetadata getFileMetadata() {
			return fileMetadata;
		}

		public long getNumberOfHits() {
			return numberOfHits;
		}

		public long getLastHitTimestamp() {
			return lastHitTimestamp;
		}

		@Override
		public String toString() {
			return "FullCacheStat{" +
					"fileMetadata=" + fileMetadata +
					", numberOfHits=" + numberOfHits +
					", lastHitTimestamp=" + lastHitTimestamp +
					'}';
		}
	}

	/**
	 * Default {@link Comparator} to compare files by the time of last usage
	 *
	 * @return LRU (Least recently used) {@link Comparator}
	 */
	public static Comparator lruCompare() {
		return Comparator.comparing(FullCacheStat::getLastHitTimestamp)
				.thenComparing(FullCacheStat::getNumberOfHits)
				.thenComparing(fullCacheStat -> -fullCacheStat.getFileMetadata().getSize());
	}

	/**
	 * Default {@link Comparator} to compare files by the number of usages
	 *
	 * @return LFU (Least frequently used) {@link Comparator}
	 */
	public static Comparator lfuCompare() {
		return Comparator.comparing(FullCacheStat::getNumberOfHits)
				.thenComparing(FullCacheStat::getLastHitTimestamp)
				.thenComparing(fullCacheStat -> -fullCacheStat.getFileMetadata().getSize());
	}

	/**
	 * Default {@link Comparator} to compare files by size
	 *
	 * @return filesize {@link Comparator}
	 */
	public static Comparator sizeCompare() {
		return Comparator.comparingLong(fullCacheStat -> -fullCacheStat.getFileMetadata().getSize());
	}

	@Override
	public String toString() {
		return "CachedFsClient{mainClient=" + mainClient + ", cacheClient=" + cacheClient
				+ ", cacheSizeLimit = " + cacheSizeLimit.format() + '}';
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy