com.inin.analytics.elasticsearch.ESEmbededContainer Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch-lambda Show documentation
Framework For Lambda Architecture on Elasticsearch
There is a newer version: 1.2.1
package com.inin.analytics.elasticsearch;

import static org.elasticsearch.node.NodeBuilder.nodeBuilder;

import java.util.HashMap;
import java.util.List;
import java.util.Map;

import org.apache.hadoop.mapred.Reporter;
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsRequest;
import org.elasticsearch.action.admin.cluster.snapshots.get.GetSnapshotsResponse;
import org.elasticsearch.cluster.metadata.IndexMetaData;
import org.elasticsearch.common.settings.ImmutableSettings;
import org.elasticsearch.common.settings.Settings;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.node.Node;
import org.elasticsearch.plugins.PluginsService;
import org.elasticsearch.snapshots.SnapshotInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.google.common.base.Preconditions;

/**
 * Builds an embedded elasticsearch instance and configures it for you
 * 
 * @author drew
 *
 */
public class ESEmbededContainer {
	private Node node;
	private long DEFAULT_TIMEOUT_MS = 60 * 30 * 1000; 
	private static Integer MAX_MERGED_SEGMENT_SIZE_MB = 256;
	private static transient Logger logger = LoggerFactory.getLogger(ESEmbededContainer.class);
	
	public void snapshot(List index, String snapshotName, String snapshotRepoName, Reporter reporter) {
		snapshot(index, snapshotName, snapshotRepoName, DEFAULT_TIMEOUT_MS, reporter);
	}
	
	/**
	 * Flush, optimize, and snapshot an index. Block until complete. 
	 * 
	 * @param index
	 * @param snapshotName
	 * @param snapshotRepoName
	 */
	public void snapshot(List indicies, String snapshotName, String snapshotRepoName, long timeoutMS, Reporter reporter) {
		/* Flush & optimize before the snapshot.
		 *  
		 * TODO: Long operations could block longer that the container allows an operation to go
		 * unresponsive b/f killing. We need to issue the request and poll the future waiting on the
		 * operation to succeed, but update a counter or something to let the hadoop framework
		 * know the process is still alive. 
		 */  
		TimeValue v = new TimeValue(timeoutMS);
		for(String index : indicies) {
			long start = System.currentTimeMillis();

			// Flush
			node.client().admin().indices().prepareFlush(index).get(v);
			if(reporter != null) {
				reporter.incrCounter(BaseESReducer.JOB_COUNTER.TIME_SPENT_FLUSHING_MS, System.currentTimeMillis() - start);
			}

			// Merge
			start = System.currentTimeMillis();
			node.client().admin().indices().prepareOptimize(index).get(v);
			if(reporter != null) {
				reporter.incrCounter(BaseESReducer.JOB_COUNTER.TIME_SPENT_MERGING_MS, System.currentTimeMillis() - start);
			}
		}

		// Snapshot
		long start = System.currentTimeMillis();
		node.client().admin().cluster().prepareCreateSnapshot(snapshotRepoName, snapshotName).setIndices((String[]) indicies.toArray(new String[0])).execute();

		// ES snapshot restore ignores timers and will block no more than 30s :( You have to block & poll to make sure it's done
		blockForSnapshot(snapshotRepoName, indicies, timeoutMS);	
		
		if(reporter != null) {
			reporter.incrCounter(BaseESReducer.JOB_COUNTER.TIME_SPENT_SNAPSHOTTING_MS, System.currentTimeMillis() - start);
		}

	}

	/** 
	 * Block for index snapshots to be complete
	 *  
	 * @param snapshotRepoName
	 * @param index
	 * @param timeoutMS
	 * @param reporter
	 */
	private void blockForSnapshot(String snapshotRepoName, List indicies, long timeoutMS) {
		long start = System.currentTimeMillis();
		while(System.currentTimeMillis() - start < timeoutMS) {

			GetSnapshotsResponse repos = node.client().admin().cluster().getSnapshots(new GetSnapshotsRequest(snapshotRepoName)).actionGet();
				for(SnapshotInfo i : repos.getSnapshots()) {
					if(i.state().completed() && i.successfulShards() == i.totalShards() && i.totalShards() >= indicies.size()) {
						logger.info("Snapshot completed {} out of {} indicies. Snapshot state {}. ", i.successfulShards(), i.totalShards(), i.state().completed());
						return;
					} else {
						logger.info("Snapshotted {} out of {} indicies, polling for completion. Snapshot state {}.", i.successfulShards(), i.totalShards(), i.state().completed());
					}
				}
			try {
				// Don't slam ES with snapshot status requests in a tight loop
				Thread.sleep(1000);
			} catch (InterruptedException e) {
				Thread.currentThread().interrupt();
			}
		}
	}
	
	public void deleteSnapshot(String snapshotName, String snapshotRepoName) {
		node.client().admin().cluster().prepareDeleteSnapshot(snapshotRepoName, snapshotName).execute().actionGet();
	}

	public static class Builder {
		private ESEmbededContainer container;
		private String nodeName;
		private Integer numShardsPerIndex;
		private String workingDir;
		private String clusterName;
		private String templateName;
		private String templateSource;
		private String snapshotWorkingLocation;
		private String snapshotRepoName;
		private boolean memoryBackedIndex = false;

		public ESEmbededContainer build() {
			Preconditions.checkNotNull(nodeName);
			Preconditions.checkNotNull(numShardsPerIndex);
			Preconditions.checkNotNull(workingDir);
			Preconditions.checkNotNull(clusterName);

			org.elasticsearch.common.settings.ImmutableSettings.Builder builder = ImmutableSettings.builder()
			.put("http.enabled", false) // Disable HTTP transport, we'll communicate inner-jvm
			.put("processors", 1) // We could experiment ramping this up to match # cores - num reducers per node
			.put(IndexMetaData.SETTING_NUMBER_OF_SHARDS, numShardsPerIndex) 
			.put("node.name", nodeName)
			.put("path.data", workingDir)
			.put("plugins." + PluginsService.LOAD_PLUGIN_FROM_CLASSPATH, true) // Allow plugins if they're bundled in with the uuberjar
			.put("index.refresh_interval", -1) 
			.put("index.translog.flush_threshold_size", "128mb") // Aggressive flushing helps keep the memory footprint below the yarn container max. TODO: Make configurable 
			.put("bootstrap.mlockall", true)
			.put("cluster.routing.allocation.disk.watermark.low", 99) // Nodes don't form a cluster, so routing allocations don't matter
			.put("cluster.routing.allocation.disk.watermark.high", 99)
			.put("index.load_fixed_bitset_filters_eagerly", false)
			.put("indices.store.throttle.type", "none") // Allow indexing to max out disk IO
			.put("indices.memory.index_buffer_size", "5%") // The default 10% is a bit large b/c it's calculated against JVM heap size & not Yarn container allocation. Choosing a good value here could be made smarter.
			.put("index.merge.policy.max_merged_segment", MAX_MERGED_SEGMENT_SIZE_MB + "mb") // The default 5gb segment max size is too large for the typical hadoop node
			//.put("index.merge.policy.max_merge_at_once", 10) 
			.put("index.merge.policy.segments_per_tier", 4)
			.put("index.merge.scheduler.max_thread_count", 1)
			.put("path.repo", snapshotWorkingLocation)
			.put("index.compound_format", false) // Explicitly disable compound files
			//.put("index.codec", "best_compression") // Lucene 5/ES 2.0 feature to play with when that's out
			.put("indices.fielddata.cache.size", "0%");
			
			if(memoryBackedIndex) {
				builder.put("index.store.type", "memory");
			}
			Settings nodeSettings = builder.build();

			// Create the node
			container.setNode(nodeBuilder()
					.client(false) // It's a client + data node
					.local(true) // Tell ES cluster discovery to be inner-jvm only, disable HTTP based node discovery
					.clusterName(clusterName)
					.settings(nodeSettings)
					.build());

			// Start ES
			container.getNode().start();

			// Configure the cluster with an index template mapping
			if(templateName != null && templateSource != null) {
				container.getNode().client().admin().indices().preparePutTemplate(templateName).setSource(templateSource).get();	
			}

			// Create the snapshot repo
			if(snapshotWorkingLocation != null && snapshotRepoName != null) {
				Map settings = new HashMap<>();
				settings.put("location", snapshotWorkingLocation);
				settings.put("compress", true);
				settings.put("max_snapshot_bytes_per_sec", "400mb"); // The default 20mb/sec is very slow for a local disk to disk snapshot
				container.getNode().client().admin().cluster().preparePutRepository(snapshotRepoName).setType("fs").setSettings(settings).get();
			}

			return container;
		}

		public Builder() {
			container = new ESEmbededContainer();
		}

		public Builder withNodeName(String nodeName) {
			this.nodeName = nodeName;
			return this;
		}

		public Builder withNumShardsPerIndex(Integer numShardsPerIndex) {
			this.numShardsPerIndex = numShardsPerIndex;
			return this;
		}

		public Builder withWorkingDir(String workingDir) {
			this.workingDir = workingDir;
			return this;
		}
		public Builder withClusterName(String clusterName) {
			this.clusterName = clusterName;
			return this;
		}

		public Builder withTemplate(String templateName, String templateSource) {
			this.templateName = templateName;
			this.templateSource = templateSource;
			return this;
		}

		public Builder withSnapshotWorkingLocation(String snapshotWorkingLocation) {
			this.snapshotWorkingLocation = snapshotWorkingLocation;
			return this;
		}

		public Builder withSnapshotRepoName(String snapshotRepoName) {
			this.snapshotRepoName = snapshotRepoName;
			return this;
		}
		
		public Builder withInMemoryBackedIndexes(boolean memoryBackedIndex) {
			this.memoryBackedIndex = memoryBackedIndex;
			return this;
		}

	}

	public Node getNode() {
		return node;
	}

	public void setNode(Node node) {
		this.node = node;
	}

}