Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want.
io.datakernel.remotefs.RemoteFsClusterClient Maven / Gradle / Ivy
Go to download
Package provides tools for building efficient, scalable remote file servers.
It utilizes CSP for fast and reliable file transfer.
/*
* Copyright (C) 2015-2019 SoftIndex LLC.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package io.datakernel.remotefs;
import io.datakernel.async.process.Cancellable;
import io.datakernel.async.service.EventloopService;
import io.datakernel.bytebuf.ByteBuf;
import io.datakernel.common.Initializable;
import io.datakernel.common.collection.Try;
import io.datakernel.common.exception.StacklessException;
import io.datakernel.common.tuple.Tuple2;
import io.datakernel.csp.ChannelConsumer;
import io.datakernel.csp.ChannelSupplier;
import io.datakernel.csp.process.ChannelSplitter;
import io.datakernel.eventloop.Eventloop;
import io.datakernel.eventloop.jmx.EventloopJmxMBeanEx;
import io.datakernel.jmx.api.JmxAttribute;
import io.datakernel.promise.Promise;
import io.datakernel.promise.Promises;
import io.datakernel.promise.jmx.PromiseStats;
import org.jetbrains.annotations.NotNull;
import org.jetbrains.annotations.Nullable;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.time.Duration;
import java.util.*;
import java.util.function.BiFunction;
import static io.datakernel.async.util.LogUtils.toLogger;
import static io.datakernel.common.Preconditions.checkArgument;
import static io.datakernel.common.Preconditions.checkState;
import static io.datakernel.csp.ChannelConsumer.getAcknowledgement;
import static io.datakernel.remotefs.ServerSelector.RENDEZVOUS_HASH_SHARDER;
import static java.util.Collections.emptyList;
import static java.util.stream.Collectors.joining;
import static java.util.stream.Collectors.toList;
/**
* An implementation of {@link FsClient} which operates on a map of other clients as a cluster.
* Contains some redundancy and fail-safety capabilities.
*/
public final class RemoteFsClusterClient implements FsClient, Initializable, EventloopService, EventloopJmxMBeanEx {
private static final Logger logger = LoggerFactory.getLogger(RemoteFsClusterClient.class);
private final Eventloop eventloop;
private final Map clients;
private final Map aliveClients = new HashMap<>();
private final Map deadClients = new HashMap<>();
private int replicationCount = 1;
private ServerSelector serverSelector = RENDEZVOUS_HASH_SHARDER;
// region JMX
private final PromiseStats connectPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats uploadStartPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats uploadFinishPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats downloadStartPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats downloadFinishPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats movePromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats copyPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats listPromise = PromiseStats.create(Duration.ofMinutes(5));
private final PromiseStats deletePromise = PromiseStats.create(Duration.ofMinutes(5));
// endregion
// region creators
private RemoteFsClusterClient(Eventloop eventloop, Map clients) {
this.eventloop = eventloop;
this.clients = clients;
aliveClients.putAll(clients);
}
public static RemoteFsClusterClient create(Eventloop eventloop) {
return new RemoteFsClusterClient(eventloop, new HashMap<>());
}
public static RemoteFsClusterClient create(Eventloop eventloop, Map clients) {
return new RemoteFsClusterClient(eventloop, clients);
}
/**
* Adds given client with given partition id to this cluster
*/
public RemoteFsClusterClient withPartition(Object id, FsClient client) {
clients.put(id, client);
aliveClients.put(id, client);
return this;
}
/**
* Sets the replication count that determines how many copies of the file should persist over the cluster.
*/
public RemoteFsClusterClient withReplicationCount(int replicationCount) {
checkArgument(1 <= replicationCount && replicationCount <= clients.size(), "Replication count cannot be less than one or more than number of clients");
this.replicationCount = replicationCount;
return this;
}
/**
* Sets the server selection strategy based on file name, alive partitions, and replication count.
*/
public RemoteFsClusterClient withServerSelector(@NotNull ServerSelector serverSelector) {
this.serverSelector = serverSelector;
return this;
}
// endregion
// region getters
@NotNull
@Override
public Eventloop getEventloop() {
return eventloop;
}
public Map getClients() {
return Collections.unmodifiableMap(clients);
}
public Map getAliveClients() {
return Collections.unmodifiableMap(aliveClients);
}
public Map getDeadClients() {
return Collections.unmodifiableMap(deadClients);
}
public ServerSelector getServerSelector() {
return serverSelector;
}
// endregion
/**
* Starts a check process, which pings all partitions and marks them as dead or alive accordingly
*
* @return promise of the check
*/
public Promise checkAllPartitions() {
return Promises.all(
clients.entrySet().stream()
.map(entry -> {
Object id = entry.getKey();
return entry.getValue()
.ping()
.mapEx(($, e) -> {
if (e == null) {
markAlive(id);
} else {
markDead(id, e);
}
return null;
});
}))
.whenComplete(toLogger(logger, "checkAllPartitions"));
}
/**
* Starts a check process, which pings all dead partitions to possibly mark them as alive.
* This is the preferred method as it does nothing when no clients are marked as dead,
* and RemoteFS operations themselves do mark nodes as dead on connection failures.
*
* @return promise of the check
*/
public Promise checkDeadPartitions() {
return Promises.all(
deadClients.entrySet().stream()
.map(entry -> entry.getValue()
.ping()
.mapEx(($, e) -> {
if (e == null) {
markAlive(entry.getKey());
}
return null;
})))
.whenComplete(toLogger(logger, "checkDeadPartitions"));
}
private void markAlive(Object partitionId) {
FsClient client = deadClients.remove(partitionId);
if (client != null) {
logger.info("Partition " + partitionId + " is alive again!");
aliveClients.put(partitionId, client);
}
}
/**
* Mark partition as dead. It means that no operations will use it and it would not be given to the server selector.
* Next call of {@link #checkDeadPartitions()} or {@link #checkAllPartitions()} will ping this partition and possibly
* mark it as alive again.
*
* @param partitionId id of the partition to be marked
* @param e optional exception for logging
* @return true if partition was alive and false otherwise
*/
public boolean markDead(Object partitionId, @Nullable Throwable e) {
FsClient client = aliveClients.remove(partitionId);
if (client != null) {
logger.warn("marking " + partitionId + " as dead (" + e + ')');
deadClients.put(partitionId, client);
return true;
}
return false;
}
private void markIfDead(Object partitionId, Throwable e) {
// marking as dead only on lower level connection and other I/O exceptions,
// remote fs exceptions are the ones actually received with an ServerError response (so the node is obviously not dead)
if (e.getClass() != StacklessException.class) {
markDead(partitionId, e);
}
}
private BiFunction> wrapDeath(Object partitionId) {
return (res, e) -> {
if (e == null) {
return Promise.of(res);
}
markIfDead(partitionId, e);
return Promise.ofException(new StacklessException(RemoteFsClusterClient.class, "Node failed with exception", e));
};
}
// shortcut for creating single Exception from list of possibly failed tries
private static Promise ofFailure(String message, List> failed) {
StacklessException exception = new StacklessException(RemoteFsClusterClient.class, message);
failed.stream()
.map(Try::getExceptionOrNull)
.filter(Objects::nonNull)
.forEach(exception::addSuppressed);
return Promise.ofException(exception);
}
private Promise> upload(@NotNull String filename, long offset, @Nullable Long revision) {
List selected = serverSelector.selectFrom(filename, aliveClients.keySet(), replicationCount);
checkState(!selected.isEmpty(), "Selected no servers to upload file " + filename);
checkState(aliveClients.keySet().containsAll(selected), "Selected an id that is not one of client ids");
class ConsumerWithId {
final Object id;
final ChannelConsumer consumer;
ConsumerWithId(Object id, ChannelConsumer consumer) {
this.id = id;
this.consumer = consumer;
}
}
return Promises.toList(selected.stream()
.map(id -> {
FsClient client = aliveClients.get(id);
return (revision == null ? client.upload(filename, offset) : client.upload(filename, offset, revision))
.thenEx(wrapDeath(id))
.map(consumer -> new ConsumerWithId(id,
consumer.withAcknowledgement(ack ->
ack.whenException(e -> markIfDead(id, e)))))
.toTry();
}))
.then(tries -> {
List successes = tries.stream()
.filter(Try::isSuccess)
.map(Try::get)
.collect(toList());
if (successes.isEmpty()) {
return ofFailure("Couldn't connect to any partition to upload file " + filename, tries);
}
ChannelSplitter splitter = ChannelSplitter.create().lenient();
Promise>> uploadResults = Promises.toList(successes.stream()
.map(s1 -> getAcknowledgement(fn ->
splitter.addOutput()
.set(s1.consumer.withAcknowledgement(fn)))
.toTry()));
if (logger.isTraceEnabled()) {
logger.trace("uploading file {} to {}, {}", filename, successes.stream().map(s -> s.id.toString()).collect(joining(", ", "[", "]")), this);
}
ChannelConsumer consumer = splitter.getInput().getConsumer();
// check number of uploads only here, so even if there were less connections
// than replicationCount, they will still upload
return Promise.of(consumer.withAcknowledgement(ack -> ack
.then($ -> uploadResults)
.then(ackTries -> {
long successCount = ackTries.stream().filter(Try::isSuccess).count();
// check number of uploads only here, so even if there were less connections
// than replicationCount, they will still upload
if (ackTries.size() < replicationCount) {
return ofFailure("Didn't connect to enough partitions uploading " +
filename + ", only " + successCount + " finished uploads", ackTries);
}
if (successCount < replicationCount) {
return ofFailure("Couldn't finish uploadind file " +
filename + ", only " + successCount + " acknowlegdes received", ackTries);
}
return Promise.complete();
})
.whenComplete(uploadFinishPromise.recordStats())));
})
.whenComplete(uploadStartPromise.recordStats());
}
@Override
public Promise> upload(@NotNull String name, long offset) {
return upload(name, offset, null);
}
@Override
public Promise> upload(@NotNull String name, long offset, long revision) {
return upload(name, offset, (Long) revision);
}
@Override
public Promise> download(@NotNull String name, long offset, long length) {
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", emptyList());
}
return Promises.toList(
aliveClients.entrySet().stream()
.map(entry -> {
Object partitionId = entry.getKey();
return entry.getValue().getMetadata(name) // ↓ use null's as file non-existence indicators
.map(res -> res != null ? new Tuple2<>(partitionId, res) : null)
.thenEx(wrapDeath(partitionId))
.toTry();
}))
.then(tries -> {
List> successes = tries.stream() // filter successful connections
.filter(Try::isSuccess)
.map(Try::get)
.collect(toList());
// recheck if our download request marked any partitions as dead
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", tries);
}
// filter partitions where file was found
List> found = successes.stream().filter(Objects::nonNull).collect(toList());
// find any partition with the biggest file size
Optional> maybeBest = found.stream()
.max(Comparator.comparing(Tuple2::getValue2, FileMetadata.COMPARATOR));
if (!maybeBest.isPresent()) {
return ofFailure("File not found: " + name, tries);
}
Tuple2 best = maybeBest.get();
return Promises.any(found.stream()
.filter(piwfs -> piwfs.getValue2().getRevision() == best.getValue2().getRevision())
.map(piwfs -> {
FsClient client = aliveClients.get(piwfs.getValue1());
if (client == null) { // marked as dead already by somebody
return Promise.ofException(new StacklessException(RemoteFsClusterClient.class, "Client " + piwfs.getValue1() + " is not alive"));
}
logger.trace("downloading file {} from {}", name, piwfs.getValue1());
return client.download(name, offset, length)
.whenException(e -> logger.warn("Failed to connect to server with key " + piwfs.getValue1() + " to download file " + name, e))
.thenEx(wrapDeath(piwfs.getValue1()))
.map(supplier -> supplier
.withEndOfStream(eos -> eos
.whenException(e -> markIfDead(piwfs.getValue1(), e))
.whenComplete(downloadFinishPromise.recordStats())));
}), Cancellable::cancel);
})
.whenComplete(downloadStartPromise.recordStats());
}
@Override
public Promise move(@NotNull String name, @NotNull String target, long targetRevision, long tombstoneRevision) {
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", emptyList());
}
return Promises.all(aliveClients.entrySet().stream().map(e -> e.getValue().move(name, target, targetRevision, tombstoneRevision).thenEx(wrapDeath(e.getKey()))))
.whenComplete(movePromise.recordStats());
}
@Override
public Promise copy(@NotNull String name, @NotNull String target, long targetRevision) {
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", emptyList());
}
return Promises.all(aliveClients.entrySet().stream().map(e -> e.getValue().copy(name, target, targetRevision).thenEx(wrapDeath(e.getKey()))))
.whenComplete(copyPromise.recordStats());
}
@Override
public Promise delete(@NotNull String name, long revision) {
return Promises.toList(
aliveClients.entrySet().stream()
.map(entry -> entry.getValue().delete(name)
.thenEx(wrapDeath(entry.getKey()))
.toTry()))
.then(tries -> {
if (tries.stream().anyMatch(Try::isSuccess)) { // connected at least to somebody
return Promise.complete();
}
return ofFailure("Couldn't delete on any partition", tries);
})
.whenComplete(deletePromise.recordStats());
}
private Promise> doList(@NotNull String glob, BiFunction>> list) {
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", emptyList());
}
// this all is the same as delete, but with list of lists of results, flattened and unified
return Promises.toList(
aliveClients.entrySet().stream()
.map(entry -> list.apply(entry.getValue(), glob)
.thenEx(wrapDeath(entry.getKey()))
.toTry()))
.then(tries -> {
// recheck if our list request marked any partitions as dead
if (deadClients.size() >= replicationCount) {
return ofFailure("There are more dead partitions than replication count(" +
deadClients.size() + " dead, replication count is " + replicationCount + "), aborting", tries);
}
return Promise.of(FileMetadata.flatten(tries.stream().filter(Try::isSuccess).map(Try::get)));
})
.whenComplete(listPromise.recordStats());
}
@Override
public Promise> listEntities(@NotNull String glob) {
return doList(glob, FsClient::listEntities);
}
@Override
public Promise> list(@NotNull String glob) {
return doList(glob, FsClient::list);
}
@Override
public Promise ping() {
return checkAllPartitions();
}
@NotNull
@Override
public Promise start() {
return Promise.complete();
}
@NotNull
@Override
public Promise stop() {
return Promise.complete();
}
@Override
public String toString() {
return "RemoteFsClusterClient{clients=" + clients + ", dead=" + deadClients.keySet() + '}';
}
// region JMX
@JmxAttribute
public int getReplicationCount() {
return replicationCount;
}
@JmxAttribute
public void setReplicationCount(int replicationCount) {
withReplicationCount(replicationCount);
}
@JmxAttribute
public int getAlivePartitionCount() {
return aliveClients.size();
}
@JmxAttribute
public int getDeadPartitionCount() {
return deadClients.size();
}
@JmxAttribute
public String[] getAlivePartitions() {
return aliveClients.keySet().stream()
.map(Object::toString)
.toArray(String[]::new);
}
@JmxAttribute
public String[] getDeadPartitions() {
return deadClients.keySet().stream()
.map(Object::toString)
.toArray(String[]::new);
}
@JmxAttribute
public PromiseStats getConnectPromise() {
return connectPromise;
}
@JmxAttribute
public PromiseStats getUploadStartPromise() {
return uploadStartPromise;
}
@JmxAttribute
public PromiseStats getUploadFinishPromise() {
return uploadFinishPromise;
}
@JmxAttribute
public PromiseStats getDownloadStartPromise() {
return downloadStartPromise;
}
@JmxAttribute
public PromiseStats getDownloadFinishPromise() {
return downloadFinishPromise;
}
@JmxAttribute
public PromiseStats getMovePromise() {
return movePromise;
}
@JmxAttribute
public PromiseStats getCopyPromise() {
return copyPromise;
}
@JmxAttribute
public PromiseStats getListPromise() {
return listPromise;
}
@JmxAttribute
public PromiseStats getDeletePromise() {
return deletePromise;
}
// endregion
}