Please wait. This can take some minutes ...
Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance.
Project price only 1 $
You can buy this project and download/modify it how often you want. Maven / Gradle / Ivy
import org.apache.commons.logging.Log;
import org.elasticsearch.hadoop.EsHadoopIllegalArgumentException;
import org.elasticsearch.hadoop.cfg.ConfigurationOptions;
import org.elasticsearch.hadoop.cfg.FieldPresenceValidation;
import org.elasticsearch.hadoop.cfg.Settings;
import org.elasticsearch.hadoop.serialization.ScrollReader;
import org.elasticsearch.hadoop.serialization.ScrollReaderConfigBuilder;
import org.elasticsearch.hadoop.serialization.builder.ValueReader;
import org.elasticsearch.hadoop.serialization.dto.IndicesAliases;
import org.elasticsearch.hadoop.serialization.dto.NodeInfo;
import org.elasticsearch.hadoop.serialization.dto.ShardInfo;
import org.elasticsearch.hadoop.serialization.dto.mapping.Mapping;
import org.elasticsearch.hadoop.serialization.dto.mapping.MappingSet;
import org.elasticsearch.hadoop.serialization.dto.mapping.MappingUtils;
import org.elasticsearch.hadoop.serialization.field.IndexExtractor;
import org.elasticsearch.hadoop.util.Assert;
import org.elasticsearch.hadoop.util.ClusterInfo;
import org.elasticsearch.hadoop.util.EsMajorVersion;
import org.elasticsearch.hadoop.util.IOUtils;
import org.elasticsearch.hadoop.util.ObjectUtils;
import org.elasticsearch.hadoop.util.SettingsUtils;
import org.elasticsearch.hadoop.util.StringUtils;
import org.elasticsearch.hadoop.util.Version;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Random;
import java.util.Set;
public abstract class RestService implements Serializable {
public static class PartitionReader implements Closeable {
public final ScrollReader scrollReader;
public final RestRepository client;
public final SearchRequestBuilder queryBuilder;
private ScrollQuery scrollQuery;
private boolean closed = false ;
PartitionReader(ScrollReader scrollReader, RestRepository client, SearchRequestBuilder queryBuilder) {
this .scrollReader = scrollReader;
this .client = client;
this .queryBuilder = queryBuilder;
public void close () {
if (!closed) {
closed = true ;
if (scrollQuery != null ) {
public ScrollQuery scrollQuery () {
if (scrollQuery == null ) {
scrollQuery =, scrollReader);
return scrollQuery;
public static class PartitionWriter implements Closeable {
public final RestRepository repository;
public final long number;
public final int total;
public final Settings settings;
private boolean closed = false ;
PartitionWriter(Settings settings, long splitIndex, int splitsSize, RestRepository repository) {
this .settings = settings;
this .repository = repository;
this .number = splitIndex;
this .total = splitsSize;
public void close () {
if (!closed) {
closed = true ;
public static class MultiReaderIterator implements Closeable , Iterator {
private final List definitions;
private final Iterator definitionIterator;
private PartitionReader currentReader;
private ScrollQuery currentScroll;
private boolean finished = false ;
private final Settings settings;
private final Log log;
MultiReaderIterator(List defs, Settings settings, Log log) {
this .definitions = defs;
definitionIterator = defs.iterator();
this .settings = settings;
this .log = log;
public void close () {
if (finished) {
return ;
ScrollQuery sq = getCurrent();
if (sq != null ) {
if (currentReader != null ) {
finished = true ;
public boolean hasNext () {
ScrollQuery sq = getCurrent();
return (sq != null ? sq.hasNext() : false );
private ScrollQuery getCurrent () {
if (finished) {
return null ;
for (boolean hasValue = false ; !hasValue; ) {
if (currentReader == null ) {
if (definitionIterator.hasNext()) {
currentReader = RestService.createReader(settings,, log);
} else {
finished = true ;
return null ;
if (currentScroll == null ) {
currentScroll = currentReader.scrollQuery();
hasValue = currentScroll.hasNext();
if (!hasValue) {
currentScroll = null ;
currentReader = null ;
return currentScroll;
public Object[] next() {
ScrollQuery sq = getCurrent();
public void remove () {
throw new UnsupportedOperationException();
@SuppressWarnings ("unchecked" )
public static List findPartitions (Settings settings, Log log) {
ClusterInfo clusterInfo = InitializationUtils.discoverAndValidateClusterInfo(settings, log);
List nodes = InitializationUtils.discoverNodesIfNeeded(settings, log);
InitializationUtils.filterNonClientNodesIfNeeded(settings, log);
InitializationUtils.filterNonDataNodesIfNeeded(settings, log);
InitializationUtils.filterNonIngestNodesIfNeeded(settings, log);
RestRepository client = new RestRepository(settings);
try {
boolean indexExists = client.resourceExists(true );
List>> shards = null ;
if (!indexExists) {
if (settings.getIndexReadMissingAsEmpty()) {"Index [%s] missing - treating it as empty" , settings.getResourceRead()));
shards = Collections.emptyList();
} else {
throw new EsHadoopIllegalArgumentException(
String.format("Index [%s] missing and settings [%s] is set to false" , settings.getResourceRead(), ConfigurationOptions.ES_INDEX_READ_MISSING_AS_EMPTY));
} else {
shards = client.getReadTargetShards();
if (log.isTraceEnabled()) {
log.trace("Creating splits for shards " + shards);
}"Reading from [%s]" , settings.getResourceRead()));
MappingSet mapping = null ;
if (!shards.isEmpty()) {
mapping = client.getMappings();
if (log.isDebugEnabled()) {
log.debug(String.format("Discovered resolved mapping {%s} for [%s]" , mapping.getResolvedView(), settings.getResourceRead()));
FieldPresenceValidation validation = settings.getReadFieldExistanceValidation();
if (validation.isRequired()) {
MappingUtils.validateMapping(SettingsUtils.determineSourceFields(settings), mapping.getResolvedView(), validation, log);
final Map nodesMap = new HashMap();
if (nodes != null ) {
for (NodeInfo node : nodes) {
nodesMap.put(node.getId(), node);
final List partitions;
if (clusterInfo.getMajorVersion().onOrAfter(EsMajorVersion.V_5_X) && settings.getMaxDocsPerPartition() != null ) {
partitions = findSlicePartitions(client.getRestClient(), settings, mapping, nodesMap, shards, log);
} else {
partitions = findShardPartitions(settings, mapping, nodesMap, shards, log);
return partitions;
} finally {
static List findShardPartitions (Settings settings, MappingSet mappingSet, Map nodes,
List >> shards, Log log) {
Mapping resolvedMapping = mappingSet == null ? null : mappingSet.getResolvedView();
List partitions = new ArrayList(shards.size());
PartitionDefinition.PartitionDefinitionBuilder partitionBuilder = PartitionDefinition.builder(settings, resolvedMapping);
for (List> group : shards) {
String index = null ;
int shardId = -1 ;
List locationList = new ArrayList ();
for (Map replica : group) {
ShardInfo shard = new ShardInfo(replica);
index = shard.getIndex();
shardId = shard.getName();
if (nodes.containsKey(shard.getNode())) {
if (index == null ) {
if (settings.getIndexReadAllowRedStatus()) {
log.warn("Shard information is missing from an index and will not be reached during job execution. " +
"Assuming shard is unavailable and cluster is red! Continuing with read operation by " +
"skipping this shard! This may result in incomplete data retrieval!" );
} else {
throw new IllegalStateException("Could not locate shard information for one of the read indices. " +
"Check your cluster status to see if it is unstable!" );
} else {
PartitionDefinition partition =, shardId, locationList.toArray(new String[0 ]));
return partitions;
static List findSlicePartitions (RestClient client, Settings settings, MappingSet mappingSet,
Map nodes, List >> shards, Log log) {
QueryBuilder query = QueryUtils.parseQueryAndFilters(settings);
Integer maxDocsPerPartition = settings.getMaxDocsPerPartition();
Assert.notNull(maxDocsPerPartition, "Attempting to find slice partitions but maximum documents per partition is not set." );
Resource readResource = new Resource(settings, true );
Mapping resolvedMapping = mappingSet == null ? null : mappingSet.getResolvedView();
PartitionDefinition.PartitionDefinitionBuilder partitionBuilder = PartitionDefinition.builder(settings, resolvedMapping);
List partitions = new ArrayList(shards.size());
for (List> group : shards) {
String index = null ;
int shardId = -1 ;
List locationList = new ArrayList ();
for (Map replica : group) {
ShardInfo shard = new ShardInfo(replica);
index = shard.getIndex();
shardId = shard.getName();
if (nodes.containsKey(shard.getNode())) {
String[] locations = locationList.toArray(new String[0 ]);
if (index == null ) {
if (settings.getIndexReadAllowRedStatus()) {
log.warn("Shard information is missing from an index and will not be reached during job execution. " +
"Assuming shard is unavailable and cluster is red! Continuing with read operation by " +
"skipping this shard! This may result in incomplete data retrieval!" );
} else {
throw new IllegalStateException("Could not locate shard information for one of the read indices. " +
"Check your cluster status to see if it is unstable!" );
} else {
long numDocs;
if (readResource.isTyped()) {
numDocs = client.count(index, readResource.type(), Integer.toString(shardId), query);
} else {
numDocs = client.countIndexShard(index, Integer.toString(shardId), query);
int numPartitions = (int ) Math.max(1 , numDocs / maxDocsPerPartition);
for (int i = 0 ; i < numPartitions; i++) {
PartitionDefinition.Slice slice = new PartitionDefinition.Slice(i, numPartitions);
partitions.add(, shardId, slice, locations));
return partitions;
static String checkLocality (String[] locations, Log log) {
try {
InetAddress[] candidates = NetworkUtils.getGlobalInterfaces();
for (String address : locations) {
StringUtils.IpAndPort ipAndPort = StringUtils.parseIpAddress(address);
InetAddress addr = InetAddress.getByName(ipAndPort.ip);
for (InetAddress candidate : candidates) {
if (addr.equals(candidate)) {
return address;
} catch (SocketException e) {
if (log.isDebugEnabled()) {
log.debug("Unable to retrieve the global interfaces of the system" , e);
} catch (UnknownHostException e) {
if (log.isDebugEnabled()) {
log.debug("Unable to retrieve IP address" , e);
return null ;
public static PartitionReader createReader (Settings settings, PartitionDefinition partition, Log log) {
if (!SettingsUtils.hasPinnedNode(settings) && partition.getLocations().length > 0 ) {
String pinAddress = checkLocality(partition.getLocations(), log);
if (pinAddress != null ) {
if (log.isDebugEnabled()) {
log.debug(String.format("Partition reader instance [%s] assigned to [%s]" , partition, pinAddress));
SettingsUtils.pinNode(settings, pinAddress);
ClusterInfo clusterInfo = InitializationUtils.discoverClusterInfo(settings, log);
ValueReader reader = ObjectUtils.instantiate(settings.getSerializerValueReaderClassName(), settings);
RestRepository repository = new RestRepository(settings);
Mapping fieldMapping = null ;
if (StringUtils.hasText(partition.getSerializedMapping())) {
fieldMapping = IOUtils.deserializeFromJsonString(partition.getSerializedMapping(), Mapping.class);
else {
log.warn(String.format("No mapping found for [%s] - either no index exists or the partition configuration has been corrupted" , partition));
ScrollReader scrollReader = new ScrollReader(ScrollReaderConfigBuilder.builder(reader, fieldMapping, settings));
if (settings.getNodesClientOnly()) {
String clientNode = repository.getRestClient().getCurrentNode();
if (log.isDebugEnabled()) {
log.debug(String.format("Client-node routing detected; partition reader instance [%s] assigned to [%s]" ,
partition, clientNode));
SettingsUtils.pinNode(settings, clientNode);
boolean includeVersion = settings.getReadMetadata() && settings.getReadMetadataVersion();
Resource read = new Resource(settings, true );
SearchRequestBuilder requestBuilder =
new SearchRequestBuilder(clusterInfo.getMajorVersion(), includeVersion)
.local(true )
if (partition.getSlice() != null && partition.getSlice().max > 1 ) {
requestBuilder.slice(partition.getSlice().id, partition.getSlice().max);
String[] indices = read.index().split("," );
if (QueryUtils.isExplicitlyRequested(partition.getIndex(), indices) == false ) {
IndicesAliases indicesAliases =
new GetAliasesRequestBuilder(repository.getRestClient())
Map aliases = indicesAliases.getAliases(partition.getIndex());
if (aliases != null && aliases.size() > 0 ) {
requestBuilder = applyAliasMetadata(clusterInfo.getMajorVersion(), aliases, requestBuilder, partition.getIndex(), indices);
return new PartitionReader(scrollReader, repository, requestBuilder);
static SearchRequestBuilder applyAliasMetadata (EsMajorVersion version,
Map aliases,
SearchRequestBuilder searchRequestBuilder,
String index, String... indicesOrAliases) {
if (QueryUtils.isExplicitlyRequested(index, indicesOrAliases)) {
return searchRequestBuilder;
Set routing = new HashSet();
List aliasFilters = new ArrayList();
for (IndicesAliases.Alias alias : aliases.values()) {
if (QueryUtils.isExplicitlyRequested(alias.getName(), indicesOrAliases)) {
if (StringUtils.hasLength(alias.getSearchRouting())) {
for (String value : alias.getSearchRouting().split("," )) {
if (alias.getFilter() != null ) {
try {
aliasFilters.add(new RawQueryBuilder(alias.getFilter(), false ));
} catch (IOException e) {
throw new EsHadoopIllegalArgumentException("Failed to parse alias filter: [" + alias.getFilter() + "]" );
if (aliasFilters.size() > 0 ) {
QueryBuilder aliasQuery;
if (aliasFilters.size() == 1 ) {
aliasQuery = aliasFilters.get(0 );
} else {
aliasQuery = new BoolQueryBuilder();
for (QueryBuilder filter : aliasFilters) {
((BoolQueryBuilder) aliasQuery).should(filter);
if (searchRequestBuilder.query() == null ) {
} else {
BoolQueryBuilder mainQuery = new BoolQueryBuilder();
if (version.after(EsMajorVersion.V_1_X)) {
} else {
mainQuery.must(new ConstantScoreQueryBuilder().filter(aliasQuery).boost(0.0f ));
if (routing.size() > 0 ) {
searchRequestBuilder.routing(StringUtils.concatenate(routing, "," ));
return searchRequestBuilder;
public static List assignPartitions (List partitions, int currentTask, int totalTasks) {
int esPartitions = partitions.size();
if (totalTasks >= esPartitions) {
return (currentTask >= esPartitions ? Collections.emptyList() : Collections.singletonList(partitions.get(currentTask)));
} else {
int partitionsPerTask = esPartitions / totalTasks;
int remainder = esPartitions % totalTasks;
int partitionsPerCurrentTask = partitionsPerTask;
if (currentTask < remainder) {
int offset = partitionsPerTask * currentTask;
if (currentTask != 0 ) {
offset += (remainder > currentTask ? 1 : remainder);
if (partitionsPerCurrentTask == 1 ) {
return Collections.singletonList(partitions.get(offset));
List pa = new ArrayList(partitionsPerCurrentTask);
for (int index = offset; index < offset + partitionsPerCurrentTask; index++) {
return pa;
public static MultiReaderIterator multiReader (Settings settings, List definitions, Log log) {
return new MultiReaderIterator(definitions, settings, log);
public static PartitionWriter createWriter (Settings settings, long currentSplit, int totalSplits, Log log) {
InitializationUtils.discoverAndValidateClusterInfo(settings, log);
InitializationUtils.discoverNodesIfNeeded(settings, log);
InitializationUtils.filterNonClientNodesIfNeeded(settings, log);
InitializationUtils.filterNonDataNodesIfNeeded(settings, log);
InitializationUtils.filterNonIngestNodesIfNeeded(settings, log);
List nodes = SettingsUtils.discoveredOrDeclaredNodes(settings);
int selectedNode = (currentSplit < 0 ) ? new Random().nextInt(nodes.size()) : (int )(currentSplit % nodes.size());
SettingsUtils.pinNode(settings, nodes.get(selectedNode));
Resource resource = new Resource(settings, false );"Writing to [%s]" , resource));
IndexExtractor iformat = ObjectUtils.instantiate(settings.getMappingIndexExtractorClassName(), settings);
RestRepository repository;
if (iformat.hasPattern()) {
repository = initMultiIndices(settings, currentSplit, resource, log);
} else {
if (!StringUtils.isValidSingularIndexName(resource.index())) {
throw new EsHadoopIllegalArgumentException("Illegal write index name [" + resource.index() + "]. Write resources must " +
"be lowercase singular index names, with no illegal pattern characters except for multi-resource writes." );
RestClient bootstrap = new RestClient(settings);
GetAliasesRequestBuilder.Response response = null ;
try {
response = new GetAliasesRequestBuilder(bootstrap).aliases(resource.index()).execute();
} catch (EsHadoopInvalidRequest remoteException) {
if (log.isDebugEnabled()) {
log.debug(String.format("Provided index name [%s] is not an alias. Reason: [%s]" ,
resource.index(), remoteException.getMessage()));
} finally {
if (response != null && response.hasAliases()) {
repository = initAliasWrite(response, settings, currentSplit, resource, log);
} else {
repository = initSingleIndex(settings, currentSplit, resource, log);
return new PartitionWriter(settings, currentSplit, totalSplits, repository);
private static RestRepository initSingleIndex (Settings settings, long currentInstance, Resource resource, Log log) {
if (log.isDebugEnabled()) {
log.debug(String.format("Resource [%s] resolves as a single index" , resource));
RestRepository repository = new RestRepository(settings);
if (repository.touch()) {
if (repository.waitForYellow()) {
log.warn(String.format("Timed out waiting for index [%s] to reach yellow health" , resource));
if (settings.getNodesWANOnly()) {
String node = SettingsUtils.getPinnedNode(settings);
if (log.isDebugEnabled()) {
log.debug(String.format("Partition writer instance [%s] assigned to [%s]" , currentInstance, node));
return repository;
if (settings.getNodesClientOnly()) {
String clientNode = repository.getRestClient().getCurrentNode();
if (log.isDebugEnabled()) {
log.debug(String.format("Client-node routing detected; partition writer instance [%s] assigned to [%s]" ,
currentInstance, clientNode));
return repository;
Map targetShards = repository.getWriteTargetPrimaryShards(settings.getNodesClientOnly());
String.format("Cannot determine write shards for [%s]; likely its format is incorrect (maybe it contains illegal characters? or all shards failed?)" , resource));
List orderedShards = new ArrayList(targetShards.keySet());
if (log.isTraceEnabled()) {
log.trace(String.format("Partition writer instance [%s] discovered [%s] primary shards %s" , currentInstance, orderedShards.size(), orderedShards));
if (currentInstance <= 0 ) {
currentInstance = new Random().nextInt(targetShards.size()) + 1 ;
int bucket = (int )(currentInstance % targetShards.size());
ShardInfo chosenShard = orderedShards.get(bucket);
NodeInfo targetNode = targetShards.get(chosenShard);
SettingsUtils.pinNode(settings, targetNode.getPublishAddress());
String node = SettingsUtils.getPinnedNode(settings);
repository = new RestRepository(settings);
if (log.isDebugEnabled()) {
log.debug(String.format("Partition writer instance [%s] assigned to primary shard [%s] at address [%s]" ,
currentInstance, chosenShard.getName(), node));
return repository;
private static RestRepository initMultiIndices (Settings settings, long currentInstance, Resource resource, Log log) {
if (log.isDebugEnabled()) {
log.debug(String.format("Resource [%s] resolves as an index pattern" , resource));
String node = SettingsUtils.getPinnedNode(settings);
if (log.isDebugEnabled()) {
log.debug(String.format("Partition writer instance [%s] assigned to [%s]" , currentInstance, node));
return new RestRepository(settings);
private static RestRepository initAliasWrite (GetAliasesRequestBuilder.Response response, Settings settings, long currentInstance,
Resource resource, Log log) {
if (log.isDebugEnabled()) {
log.debug(String.format("Resource [%s] resolves as an index alias" , resource));
Map> indexAliasTable = response.getIndices().getAll();
if (indexAliasTable.size() < 1 ) {
throw new EsHadoopIllegalArgumentException("Cannot initialize alias write resource [" + resource.index() +
"] if it does not have any alias entries." );
} else if (indexAliasTable.size() > 1 ) {
String currentWriteIndex = null ;
for (Map.Entry> indexRow : indexAliasTable.entrySet()) {
String indexName = indexRow.getKey();
Map aliases = indexRow.getValue();
IndicesAliases.Alias aliasInfo = aliases.get(resource.index());
if (aliasInfo.isWriteIndex()) {
currentWriteIndex = indexName;
break ;
if (currentWriteIndex == null ) {
throw new EsHadoopIllegalArgumentException("Attempting to write to alias [" + resource.index() + "], " +
"but detected multiple indices [" + indexAliasTable.size() + "] with no write index selected. " +
"Bailing out..." );
} else {
if (log.isDebugEnabled()) {
log.debug(String.format("Writing to currently configured write-index [%s]" , currentWriteIndex));
} else {
if (log.isDebugEnabled()) {
log.debug(String.format("Writing to the alias's single configured index [%s]" , indexAliasTable.keySet().iterator().next()));
String node = SettingsUtils.getPinnedNode(settings);
if (log.isDebugEnabled()) {
log.debug(String.format("Partition writer instance [%s] assigned to [%s]" , currentInstance, node));
return new RestRepository(settings);