org.elasticsearch.xpack.esql.enrich.EnrichPolicyResolver Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of x-pack-esql Show documentation
Show all versions of x-pack-esql Show documentation
The plugin that powers ESQL for Elasticsearch
The newest version!
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0; you may not use this file except in compliance with the Elastic License
* 2.0.
*/
package org.elasticsearch.xpack.esql.enrich;
import org.elasticsearch.action.ActionListener;
import org.elasticsearch.action.ActionListenerResponseHandler;
import org.elasticsearch.action.search.SearchRequest;
import org.elasticsearch.action.support.ChannelActionListener;
import org.elasticsearch.action.support.ContextPreservingActionListener;
import org.elasticsearch.action.support.RefCountingListener;
import org.elasticsearch.cluster.service.ClusterService;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.util.CollectionUtils;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.concurrent.ConcurrentCollections;
import org.elasticsearch.common.util.concurrent.ThreadContext;
import org.elasticsearch.common.util.iterable.Iterables;
import org.elasticsearch.core.Tuple;
import org.elasticsearch.tasks.Task;
import org.elasticsearch.threadpool.ThreadPool;
import org.elasticsearch.transport.RemoteClusterAware;
import org.elasticsearch.transport.Transport;
import org.elasticsearch.transport.TransportChannel;
import org.elasticsearch.transport.TransportRequest;
import org.elasticsearch.transport.TransportRequestHandler;
import org.elasticsearch.transport.TransportRequestOptions;
import org.elasticsearch.transport.TransportResponse;
import org.elasticsearch.transport.TransportService;
import org.elasticsearch.xpack.core.ClientHelper;
import org.elasticsearch.xpack.core.enrich.EnrichMetadata;
import org.elasticsearch.xpack.core.enrich.EnrichPolicy;
import org.elasticsearch.xpack.esql.analysis.EnrichResolution;
import org.elasticsearch.xpack.esql.core.index.EsIndex;
import org.elasticsearch.xpack.esql.core.type.EsField;
import org.elasticsearch.xpack.esql.core.util.StringUtils;
import org.elasticsearch.xpack.esql.plan.logical.Enrich;
import org.elasticsearch.xpack.esql.session.IndexResolver;
import org.elasticsearch.xpack.esql.type.EsqlDataTypes;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.stream.Collectors;
/**
* Resolves enrich policies across clusters in several steps:
* 1. Calculates the policies that need to be resolved for each cluster, see {@link #lookupPolicies}.
* 2. Sends out {@link LookupRequest} to each cluster to resolve policies. Internally, a remote cluster handles the lookup in two steps:
* - 2.1 Ensures the caller has permission to access the enrich policies.
* - 2.2 For each found enrich policy, uses {@link IndexResolver} to resolve the mappings of the concrete enrich index.
* 3. For each unresolved policy, combines the lookup results to compute the actual enrich policy and mappings depending on the enrich mode.
* This approach requires at most one cross-cluster call for each cluster.
*/
public class EnrichPolicyResolver {
private static final String RESOLVE_ACTION_NAME = "cluster:monitor/xpack/enrich/esql/resolve_policy";
private final ClusterService clusterService;
private final IndexResolver indexResolver;
private final TransportService transportService;
private final ThreadPool threadPool;
public EnrichPolicyResolver(ClusterService clusterService, TransportService transportService, IndexResolver indexResolver) {
this.clusterService = clusterService;
this.transportService = transportService;
this.indexResolver = indexResolver;
this.threadPool = transportService.getThreadPool();
transportService.registerRequestHandler(
RESOLVE_ACTION_NAME,
threadPool.executor(ThreadPool.Names.SEARCH),
LookupRequest::new,
new RequestHandler()
);
}
public record UnresolvedPolicy(String name, Enrich.Mode mode) {
}
/**
* Resolves a set of enrich policies
*
* @param targetClusters the target clusters
* @param unresolvedPolicies the unresolved policies
* @param listener notified with the enrich resolution
*/
public void resolvePolicies(
Collection targetClusters,
Collection unresolvedPolicies,
ActionListener listener
) {
if (unresolvedPolicies.isEmpty() || targetClusters.isEmpty()) {
listener.onResponse(new EnrichResolution());
return;
}
final Set remoteClusters = new HashSet<>(targetClusters);
final boolean includeLocal = remoteClusters.remove(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY);
lookupPolicies(remoteClusters, includeLocal, unresolvedPolicies, listener.map(lookupResponses -> {
final EnrichResolution enrichResolution = new EnrichResolution();
for (UnresolvedPolicy unresolved : unresolvedPolicies) {
Tuple resolved = mergeLookupResults(
unresolved,
calculateTargetClusters(unresolved.mode, includeLocal, remoteClusters),
lookupResponses
);
if (resolved.v1() != null) {
enrichResolution.addResolvedPolicy(unresolved.name, unresolved.mode, resolved.v1());
} else {
assert resolved.v2() != null;
enrichResolution.addError(unresolved.name, unresolved.mode, resolved.v2());
}
}
return enrichResolution;
}));
}
private Collection calculateTargetClusters(Enrich.Mode mode, boolean includeLocal, Set remoteClusters) {
return switch (mode) {
case ANY -> CollectionUtils.appendToCopy(remoteClusters, RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY);
case COORDINATOR -> List.of(RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY);
case REMOTE -> includeLocal
? CollectionUtils.appendToCopy(remoteClusters, RemoteClusterAware.LOCAL_CLUSTER_GROUP_KEY)
: remoteClusters;
};
}
/**
* Resolve an enrich policy by merging the lookup responses from the target clusters.
* @return a resolved enrich policy or an error
*/
private Tuple mergeLookupResults(
UnresolvedPolicy unresolved,
Collection targetClusters,
Map lookupResults
) {
assert targetClusters.isEmpty() == false;
String policyName = unresolved.name;
final Map policies = new HashMap<>();
final List failures = new ArrayList<>();
for (String cluster : targetClusters) {
LookupResponse lookupResult = lookupResults.get(cluster);
if (lookupResult != null) {
ResolvedEnrichPolicy policy = lookupResult.policies.get(policyName);
if (policy != null) {
policies.put(cluster, policy);
} else {
final String failure = lookupResult.failures.get(policyName);
if (failure != null) {
failures.add(failure);
}
}
}
}
if (targetClusters.size() != policies.size()) {
final String reason;
if (failures.isEmpty()) {
List missingClusters = targetClusters.stream().filter(c -> policies.containsKey(c) == false).sorted().toList();
reason = missingPolicyError(policyName, targetClusters, missingClusters);
} else {
reason = "failed to resolve enrich policy [" + policyName + "]; reason " + failures;
}
return Tuple.tuple(null, reason);
}
Map mappings = new HashMap<>();
Map concreteIndices = new HashMap<>();
ResolvedEnrichPolicy last = null;
for (Map.Entry e : policies.entrySet()) {
ResolvedEnrichPolicy curr = e.getValue();
if (last != null && last.matchField().equals(curr.matchField()) == false) {
String error = "enrich policy [" + policyName + "] has different match fields ";
error += "[" + last.matchField() + ", " + curr.matchField() + "] across clusters";
return Tuple.tuple(null, error);
}
if (last != null && last.matchType().equals(curr.matchType()) == false) {
String error = "enrich policy [" + policyName + "] has different match types ";
error += "[" + last.matchType() + ", " + curr.matchType() + "] across clusters";
return Tuple.tuple(null, error);
}
// merge mappings
for (Map.Entry m : curr.mapping().entrySet()) {
EsField field = m.getValue();
field = new EsField(
field.getName(),
EsqlDataTypes.fromTypeName(field.getDataType().typeName()),
field.getProperties(),
field.isAggregatable(),
field.isAlias()
);
EsField old = mappings.putIfAbsent(m.getKey(), field);
if (old != null && old.getDataType().equals(field.getDataType()) == false) {
String error = "field [" + m.getKey() + "] of enrich policy [" + policyName + "] has different data types ";
error += "[" + old.getDataType() + ", " + field.getDataType() + "] across clusters";
return Tuple.tuple(null, error);
}
}
if (last != null) {
Map counts = Maps.newMapWithExpectedSize(last.enrichFields().size());
last.enrichFields().forEach(f -> counts.put(f, 1));
curr.enrichFields().forEach(f -> counts.compute(f, (k, v) -> v == null ? 1 : v + 1));
// should be sorted-then-limit, but this sorted is for testing only
var diff = counts.entrySet().stream().filter(f -> f.getValue() < 2).map(Map.Entry::getKey).limit(20).sorted().toList();
if (diff.isEmpty() == false) {
String detailed = "these fields are missing in some policies: " + diff;
return Tuple.tuple(null, "enrich policy [" + policyName + "] has different enrich fields across clusters; " + detailed);
}
}
// merge concrete indices
concreteIndices.putAll(curr.concreteIndices());
last = curr;
}
assert last != null;
var resolved = new ResolvedEnrichPolicy(last.matchField(), last.matchType(), last.enrichFields(), concreteIndices, mappings);
return Tuple.tuple(resolved, null);
}
private String missingPolicyError(String policyName, Collection targetClusters, List missingClusters) {
// local cluster only
String reason = "cannot find enrich policy [" + policyName + "]";
if (targetClusters.size() == 1 && Iterables.get(missingClusters, 0).isEmpty()) {
// accessing the policy names directly after we have checked the permission.
List potentialMatches = StringUtils.findSimilar(policyName, availablePolicies().keySet());
if (potentialMatches.isEmpty() == false) {
var suggestion = potentialMatches.size() == 1 ? "[" + potentialMatches.get(0) + "]" : "any of " + potentialMatches;
reason += ", did you mean " + suggestion + "?";
}
return reason;
}
String detailed = missingClusters.stream().sorted().map(c -> c.isEmpty() ? "_local" : c).collect(Collectors.joining(", "));
return reason + " on clusters [" + detailed + "]";
}
private void lookupPolicies(
Collection remoteClusters,
boolean includeLocal,
Collection unresolvedPolicies,
ActionListener