ai.grakn.batch.BatchExecutorClient Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of client-java Show documentation
The newest version!
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package ai.grakn.batch;

import ai.grakn.API;
import ai.grakn.Keyspace;
import ai.grakn.graql.Query;
import ai.grakn.util.SimpleURI;
import com.codahale.metrics.Meter;
import com.codahale.metrics.MetricRegistry;
import com.codahale.metrics.Timer;
import com.codahale.metrics.Timer.Context;
import com.github.rholder.retry.Attempt;
import com.github.rholder.retry.RetryException;
import com.github.rholder.retry.RetryListener;
import com.github.rholder.retry.Retryer;
import com.github.rholder.retry.RetryerBuilder;
import com.github.rholder.retry.StopStrategies;
import com.github.rholder.retry.WaitStrategies;
import com.netflix.hystrix.HystrixCollapser;
import com.netflix.hystrix.HystrixCollapserKey;
import com.netflix.hystrix.HystrixCollapserProperties;
import com.netflix.hystrix.HystrixCommand;
import com.netflix.hystrix.HystrixCommandGroupKey;
import com.netflix.hystrix.HystrixCommandProperties;
import com.netflix.hystrix.HystrixThreadPoolProperties;
import com.netflix.hystrix.strategy.concurrency.HystrixRequestContext;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import rx.Observable;
import rx.Scheduler;
import rx.schedulers.Schedulers;

import javax.annotation.Nullable;
import java.io.Closeable;
import java.net.ConnectException;
import java.util.Collection;
import java.util.List;
import java.util.UUID;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.Semaphore;
import java.util.concurrent.TimeUnit;
import java.util.function.Consumer;
import java.util.stream.Collectors;

import static com.codahale.metrics.MetricRegistry.name;

/**
 * Client to batch load qraql queries into Grakn that mutate the graph.
 *
 * Provides methods to batch load queries. Optionally can provide a consumer that will execute when
 * a batch finishes loading. BatchExecutorClient will block when the configured resources are being
 * used to execute tasks.
 *
 * @author Domenico Corapi
 */
public class BatchExecutorClient implements Closeable {

    private static final Logger LOG = LoggerFactory.getLogger(BatchExecutorClient.class);

    private final GraknClient graknClient;
    private final HystrixRequestContext context;

    // We only allow a certain number of queries to be waiting to execute at once for performance reasons
    private final Semaphore queryExecutionSemaphore;

    // Config
    private final int maxDelay;
    private final int maxRetries;
    private final int maxQueries;
    private final int threadPoolCoreSize;
    private final int timeoutMs;

    // Metrics
    private final MetricRegistry metricRegistry;
    private final Meter failureMeter;
    private final Timer addTimer;
    private final Scheduler scheduler;
    private final ExecutorService executor;
    private boolean requestLogEnabled;

    @Nullable
    private Consumer queryResponseHandler = null;

    @Nullable
    private Consumer exceptionHandler = null;

    private final UUID id = UUID.randomUUID();

    private BatchExecutorClient(Builder builder) {
        context = HystrixRequestContext.initializeContext();
        graknClient = builder.graknClient;
        maxDelay = builder.maxDelay;
        maxRetries = builder.maxRetries;
        maxQueries = builder.maxQueries;
        metricRegistry = builder.metricRegistry;
        timeoutMs = builder.timeoutMs;
        threadPoolCoreSize = builder.threadPoolCoreSize;
        requestLogEnabled = builder.requestLogEnabled;
        // Note that the pool on which the observables run is different from the Hystrix pool
        // They need to be of comparable sizes and they should match the capabilities
        // of the server
        executor = Executors.newFixedThreadPool(threadPoolCoreSize);
        scheduler = Schedulers.from(executor);
        queryExecutionSemaphore = new Semaphore(maxQueries);
        addTimer = metricRegistry.timer(name(BatchExecutorClient.class, "add"));
        failureMeter = metricRegistry.meter(name(BatchExecutorClient.class, "failure"));
    }

    /**
     * Will block until there is space for the query to be submitted
     */
    public void add(Query query, Keyspace keyspace) {
        QueryRequest queryRequest = new QueryRequest(query);
        queryRequest.acquirePermit();

        Context contextAddTimer = addTimer.time();
        Observable observable = new QueriesObservableCollapser(queryRequest, keyspace)
                .observe()
                .doOnError(error -> failureMeter.mark())
                .subscribeOn(scheduler)
                .doOnTerminate(contextAddTimer::close);

        // We have to subscribe to make the query start loading
        observable.subscribe();
    }

    public void onNext(Consumer queryResponseHandler) {
        this.queryResponseHandler = queryResponseHandler;
    }

    public void onError(Consumer exceptionHandler) {
        this.exceptionHandler = exceptionHandler;
    }

    /**
     * Will block until all submitted queries have executed
     */
    @Override
    public void close() {
        LOG.debug("Closing BatchExecutorClient");

        // Acquire ALL permits. Only possible when all the permits are released.
        // This means this method will only return when ALL the queries are completed.
        LOG.trace("Acquiring all {} permits ({} available)", maxQueries, queryExecutionSemaphore.availablePermits());
        queryExecutionSemaphore.acquireUninterruptibly(maxQueries);
        LOG.trace("Acquired all {} permits ({} available)", maxQueries, queryExecutionSemaphore.availablePermits());

        context.close();
        executor.shutdownNow();
    }

    public static Builder newBuilder() {
        return new Builder();
    }

    @API
    public static Builder newBuilderforURI(SimpleURI simpleURI) {
        return new Builder().taskClient(GraknClient.of(simpleURI));
    }

    /**
     * Builder
     *
     * @author Domenico Corapi
     */
    public static final class Builder {

        private GraknClient graknClient;
        private int maxDelay = 50;
        private int maxRetries = 5;
        private int threadPoolCoreSize = 8;
        private int timeoutMs = 60_000;
        private int maxQueries = 10_000;
        private boolean requestLogEnabled = false;
        private MetricRegistry metricRegistry = new MetricRegistry();

        private Builder() {
        }

        public Builder taskClient(GraknClient val) {
            graknClient = val;
            return this;
        }

        public Builder maxDelay(int val) {
            maxDelay = val;
            return this;
        }

        public Builder maxRetries(int val) {
            maxRetries = val;
            return this;
        }

        public Builder threadPoolCoreSize(int val) {
            threadPoolCoreSize = val;
            return this;
        }

        public Builder metricRegistry(MetricRegistry val) {
            metricRegistry = val;
            return this;
        }

        public Builder maxQueries(int val) {
            maxQueries = val;
            return this;
        }

        public Builder requestLogEnabled(boolean val) {
            requestLogEnabled = val;
            return this;
        }

        public BatchExecutorClient build() {
            return new BatchExecutorClient(this);
        }
    }

    /**
     * Used to make queries with the same text different.
     * We need this because we don't want to cache inserts.
     *
     * This is a non-static class so it can access all fields of {@link BatchExecutorClient}, such as the
     * {@link BatchExecutorClient#queryExecutionSemaphore}. This avoids bugs where Hystrix caches certain parameters
     * or properties like the semaphore: the request is linked directly to the {@link BatchExecutorClient} that
     * created it.
     */
    private class QueryRequest {

        private Query query;
        private UUID id;

        QueryRequest(Query query) {
            this.query = query;
            this.id = UUID.randomUUID();
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) {
                return true;
            }
            if (o == null || getClass() != o.getClass()) {
                return false;
            }
            QueryRequest that = (QueryRequest) o;
            return (query != null ? query.equals(that.query) : that.query == null) && (id != null
                    ? id.equals(that.id) : that.id == null);
        }

        @Override
        public int hashCode() {
            int result = query != null ? query.hashCode() : 0;
            result = 31 * result + (id != null ? id.hashCode() : 0);
            return result;
        }

        @Override
        public String toString() {
            return "QueryRequest{" +
                    "query=" + query +
                    ", id=" + id +
                    '}';
        }

        public Query getQuery() {
            return query;
        }

        void acquirePermit() {
            assert queryExecutionSemaphore.availablePermits() <= maxQueries :
                    "Number of available permits should never exceed max queries";

            // Acquire permission to execute a query - will block until a permit is available
            LOG.trace("Acquiring a permit for {} ({} available)", id, queryExecutionSemaphore.availablePermits());
            queryExecutionSemaphore.acquireUninterruptibly();
            LOG.trace("Acquired a permit for {} ({} available)", id, queryExecutionSemaphore.availablePermits());
        }

        void releasePermit() {
            // Release a query execution permit, allowing a new query to execute
            queryExecutionSemaphore.release();

            int availablePermits = queryExecutionSemaphore.availablePermits();
            LOG.trace("Released a permit for {} ({} available)", id, availablePermits);
        }
    }

    // Internal commands

    /*
     * The Batch Executor client uses Hystrix to batch requests. As a positive side effect
     * we get the Hystrix circuit breaker too.
     * Hystrix wraps every thing that it does inside a Command. A Command defines what happens
     * when it's run, and optionally a fallback. Here in CommandQueries, we just define the run.
     * The batching is implemented using a Collapser, in our case
     * it's the QueriesObservableCollapser.
     * See the classes Javadocs for more info.
     */


    /**
     * This is the hystrix command for the batch. If collapsing weren't performed
     * we would call this command directly passing a set of queries.
     * Within the collapsing logic, this command is called after a certain timeout
     * expires to batch requests together.
     *
     * @author Domenico Corapi
     */
    private class CommandQueries extends HystrixCommand> {

        static final int QUEUE_MULTIPLIER = 1024;

        private final List queries;
        private final Keyspace keyspace;
        private final Timer graqlExecuteTimer;
        private final Meter attemptMeter;
        private final Retryer retryer;

        CommandQueries(List queries, Keyspace keyspace) {
            super(Setter
                    .withGroupKey(HystrixCommandGroupKey.Factory.asKey("BatchExecutor"))
                    .andThreadPoolPropertiesDefaults(
                            HystrixThreadPoolProperties.Setter()
                                    .withCoreSize(threadPoolCoreSize)
                                    // Sizing these two based on the thread pool core size
                                    .withQueueSizeRejectionThreshold(
                                            threadPoolCoreSize * QUEUE_MULTIPLIER)
                                    .withMaxQueueSize(threadPoolCoreSize * QUEUE_MULTIPLIER))
                    .andCommandPropertiesDefaults(
                            HystrixCommandProperties.Setter()
                                    .withExecutionTimeoutEnabled(false)
                                    .withExecutionTimeoutInMilliseconds(timeoutMs)
                                    .withRequestLogEnabled(requestLogEnabled)));

            this.queries = queries;
            this.keyspace = keyspace;
            this.graqlExecuteTimer = metricRegistry.timer(name(this.getClass(), "execute"));
            this.attemptMeter = metricRegistry.meter(name(this.getClass(), "attempt"));
            this.retryer = RetryerBuilder.newBuilder()
                    .retryIfException(throwable ->
                            throwable instanceof GraknClientException
                                    && ((GraknClientException) throwable).isRetriable())
                    .retryIfExceptionOfType(ConnectException.class)
                    .withWaitStrategy(WaitStrategies.exponentialWait(10, 1, TimeUnit.MINUTES))
                    .withStopStrategy(StopStrategies.stopAfterAttempt(maxRetries + 1))
                    .withRetryListener(new RetryListener() {
                        @Override
                        public  void onRetry(Attempt attempt) {
                            attemptMeter.mark();
                        }
                    })
                    .build();
        }

        @Override
        protected List run() throws GraknClientException {
            List> queryList = queries.stream().map(QueryRequest::getQuery)
                    .collect(Collectors.toList());
            try {
                List responses = retryer.call(() -> {
                    try (Context c = graqlExecuteTimer.time()) {
                        return graknClient.graqlExecute(queryList, keyspace);
                    }
                });

                if (queryResponseHandler != null) {
                    responses.forEach(queryResponseHandler);
                }

                return responses;
            } catch (RetryException | ExecutionException e) {
                Throwable cause = e.getCause();
                if (cause instanceof GraknClientException) {
                    if (exceptionHandler != null) {
                        exceptionHandler.accept((GraknClientException) cause);
                    }
                    throw (GraknClientException) cause;
                } else {
                    RuntimeException exception = new RuntimeException("Unexpected exception while retrying, " + queryList.size() + " queries failed.", e);
                    if (exceptionHandler != null) {
                        exceptionHandler.accept(exception);
                    }
                    throw exception;
                }
            } finally {
                queries.forEach(QueryRequest::releasePermit);
            }
        }
    }

    /**
     * This is the hystrix collapser. It's instantiated with a single query but
     * internally it waits until a timeout expires to batch the requests together.
     *
     * @author Domenico Corapi
     */
    private class QueriesObservableCollapser extends HystrixCollapser, QueryResponse, QueryRequest> {

        private final QueryRequest query;
        private Keyspace keyspace;

        QueriesObservableCollapser(QueryRequest query, Keyspace keyspace) {
            super(Setter
                    .withCollapserKey(hystrixCollapserKey(keyspace))
                    .andCollapserPropertiesDefaults(
                            HystrixCollapserProperties.Setter()
                                    .withRequestCacheEnabled(false)
                                    .withTimerDelayInMilliseconds(maxDelay)
                    )
            );

            this.query = query;
            this.keyspace = keyspace;
        }

        @Override
        public QueryRequest getRequestArgument() {
            return query;
        }

        /**
         * Logic to collapse requests into into CommandQueries
         *
         * @param collapsedRequests Set of requests being collapsed
         * @return returns a command that executed all the requests
         */
        @Override
        protected HystrixCommand> createCommand(
                Collection> collapsedRequests) {

            List requests =
                    collapsedRequests.stream().map(CollapsedRequest::getArgument).collect(Collectors.toList());

            return new CommandQueries(requests, keyspace);
        }

        @Override
        protected void mapResponseToRequests(List batchResponse, Collection> collapsedRequests) {
            int count = 0;
            for (CollapsedRequest request : collapsedRequests) {
                QueryResponse response = batchResponse.get(count++);
                request.setResponse(response);
                request.setComplete();
            }
            metricRegistry.histogram(name(QueriesObservableCollapser.class, "batch", "size")).update(collapsedRequests.size());
        }
    }

    private HystrixCollapserKey hystrixCollapserKey(Keyspace keyspace) {
        return HystrixCollapserKey.Factory.asKey(String.format("QueriesObservableCollapser_%s_%s", id, keyspace));
    }
}