All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.icij.extract.solr.SolrMachine Maven / Gradle / Ivy

There is a newer version: 7.4.0
Show newest version
package org.icij.extract.solr;

import java.util.Collection;
import java.util.ArrayList;
import java.util.List;

import java.util.function.Supplier;

import java.util.concurrent.Future;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.Callable;
import java.util.concurrent.Executors;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.ExecutionException;

import java.io.IOException;

import org.apache.solr.common.SolrDocument;
import org.apache.solr.client.solrj.SolrServerException;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A multi-threaded document-cycling robot for Solr.
 *
 * Multiple threads are used to consume from a streaming producer which runs from the current thread.
 *
 * Memory use is kept under control by throttling the streaming producer.
 *
 *
 */
public class SolrMachine implements Callable {

	private static final Logger logger = LoggerFactory.getLogger(SolrMachine.class);

	protected final SolrMachineConsumer consumer;
	protected final ExecutorService executor;

	private final SolrMachineProducer producer;
	private final int parallelism;

	public SolrMachine(final SolrMachineConsumer consumer,
		final SolrMachineProducer producer, final int parallelism) {
		this.consumer = consumer;
		this.producer = producer;
		this.parallelism = parallelism;
		this.executor = Executors.newFixedThreadPool(parallelism + 1);
	}

	public SolrMachine(final SolrMachineConsumer consumer, final SolrMachineProducer producer) {
		this(consumer, producer, Runtime.getRuntime().availableProcessors());
	}

	public void terminate() throws InterruptedException {
		logger.info("Shutting down Solr machine executor.");
		executor.shutdown();

		do {
			logger.info("Awaiting termination of Solr machine.");
		} while (!executor.awaitTermination(60, TimeUnit.SECONDS));
		logger.info("Solr machine terminated.");
	}

	@Override
	public Long call() throws IOException, SolrServerException, InterruptedException {
		final Collection> tasks = new ArrayList<>();

		// Add the producer to its own thread.
		tasks.add(producer);

		// Add the transformers - one per thread.
		for (int i = 0; i < parallelism; i++) {
			tasks.add(new Worker(producer));
		}

		final List> futures = executor.invokeAll(tasks);
		long accepted = 0;

		try {
			futures.remove(0).get();

			for (Future task : futures) {
				accepted += task.get();
			}
		} catch (ExecutionException e) {
			final Throwable cause = e.getCause();

			if (cause instanceof SolrServerException) {
				throw (SolrServerException) cause;
			}

			if (cause instanceof IOException) {
				throw (IOException) cause;
			}

			throw new RuntimeException(cause);
		}

		return accepted;
	}

	private class Worker implements Callable {

		private final Supplier supplier;

		Worker(final Supplier supplier) {
			this.supplier = supplier;
		}

		@Override
		public Long call() throws Exception {
			long accepted = 0;

			while (!Thread.currentThread().isInterrupted()) {
				SolrDocument document = supplier.get();

				// Null value is used as a poison pull to parse workers to exit.
				if (null == document) {
					break;
				}

				try {
					consumer.accept(document);
					accepted++;

				// Log run-time exceptions and continue.
				} catch (RuntimeException e) {
					logger.error(String.format("Could not consume document: \"%s\".", document.getFieldValue(producer
							.getIdField())), e);
				}
			}

			return accepted;
		}
	}
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy