org.icij.extract.solr.SolrMachine Maven / Gradle / Ivy
package org.icij.extract.solr;
import java.util.Collection;
import java.util.ArrayList;
import java.util.List;
import java.util.function.Supplier;
import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ExecutionException;
import java.io.IOException;
import org.apache.solr.common.SolrDocument;
import org.apache.solr.client.solrj.SolrServerException;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* A multi-threaded document-cycling robot for Solr.
*
* Multiple threads are used to consume from a streaming producer which runs from the current thread.
*
* Memory use is kept under control by throttling the streaming producer.
*
*
*/
public class SolrMachine implements Callable {
private static final Logger logger = LoggerFactory.getLogger(SolrMachine.class);
protected final SolrMachineConsumer consumer;
protected final ExecutorService executor;
private final SolrMachineProducer producer;
private final int parallelism;
public SolrMachine(final SolrMachineConsumer consumer,
final SolrMachineProducer producer, final int parallelism) {
this.consumer = consumer;
this.producer = producer;
this.parallelism = parallelism;
this.executor = Executors.newFixedThreadPool(parallelism + 1);
}
public SolrMachine(final SolrMachineConsumer consumer, final SolrMachineProducer producer) {
this(consumer, producer, Runtime.getRuntime().availableProcessors());
}
public void terminate() throws InterruptedException {
logger.info("Shutting down Solr machine executor.");
executor.shutdown();
do {
logger.info("Awaiting termination of Solr machine.");
} while (!executor.awaitTermination(60, TimeUnit.SECONDS));
logger.info("Solr machine terminated.");
}
@Override
public Long call() throws IOException, SolrServerException, InterruptedException {
final Collection> tasks = new ArrayList<>();
// Add the producer to its own thread.
tasks.add(producer);
// Add the transformers - one per thread.
for (int i = 0; i < parallelism; i++) {
tasks.add(new Worker(producer));
}
final List> futures = executor.invokeAll(tasks);
long accepted = 0;
try {
futures.remove(0).get();
for (Future task : futures) {
accepted += task.get();
}
} catch (ExecutionException e) {
final Throwable cause = e.getCause();
if (cause instanceof SolrServerException) {
throw (SolrServerException) cause;
}
if (cause instanceof IOException) {
throw (IOException) cause;
}
throw new RuntimeException(cause);
}
return accepted;
}
private class Worker implements Callable {
private final Supplier supplier;
Worker(final Supplier supplier) {
this.supplier = supplier;
}
@Override
public Long call() throws Exception {
long accepted = 0;
while (!Thread.currentThread().isInterrupted()) {
SolrDocument document = supplier.get();
// Null value is used as a poison pull to parse workers to exit.
if (null == document) {
break;
}
try {
consumer.accept(document);
accepted++;
// Log run-time exceptions and continue.
} catch (RuntimeException e) {
logger.error(String.format("Could not consume document: \"%s\".", document.getFieldValue(producer
.getIdField())), e);
}
}
return accepted;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy