
org.elasticsearch.ingest.IngestDocument Maven / Gradle / Ivy
Show all versions of elasticsearch Show documentation
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/
package org.elasticsearch.ingest;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.util.Maps;
import org.elasticsearch.common.util.set.Sets;
import org.elasticsearch.index.VersionType;
import org.elasticsearch.index.mapper.IdFieldMapper;
import org.elasticsearch.index.mapper.IndexFieldMapper;
import org.elasticsearch.index.mapper.RoutingFieldMapper;
import org.elasticsearch.index.mapper.SourceFieldMapper;
import org.elasticsearch.index.mapper.VersionFieldMapper;
import org.elasticsearch.script.CtxMap;
import org.elasticsearch.script.TemplateScript;
import java.time.ZoneOffset;
import java.time.ZonedDateTime;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Base64;
import java.util.Collection;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.function.BiConsumer;
import java.util.stream.Collectors;
/**
* Represents a single document being captured before indexing and holds the source and metadata (like id, type and index).
*/
public final class IngestDocument {
public static final String INGEST_KEY = "_ingest";
public static final String SOURCE_KEY = SourceFieldMapper.NAME; // "_source"
private static final String INGEST_KEY_PREFIX = INGEST_KEY + ".";
private static final String SOURCE_PREFIX = SOURCE_KEY + ".";
public static final String PIPELINE_CYCLE_ERROR_MESSAGE = "Cycle detected for pipeline: ";
static final String TIMESTAMP = "timestamp";
private final IngestCtxMap ctxMap;
private final Map ingestMetadata;
/**
* Shallowly read-only, very limited, map-like view of the ctxMap and ingestMetadata,
* for providing as a model to TemplateScript and ValueSource instances. This avoids the cost of
* constructing a purpose-built map on each template evaluation.
*/
private final DelegatingMapView templateModel;
// Contains all pipelines that have been executed for this document
private final Set executedPipelines = new LinkedHashSet<>();
/**
* An ordered set of the values of the _index that have been used for this document.
*
* IMPORTANT: This is only updated after a top-level pipeline has run (see {@code IngestService#executePipelines(...)}).
*
* For example, if a processor changes the _index for a document from 'foo' to 'bar',
* and then another processor changes the value back to 'foo', then the overall effect
* of the pipeline was that the _index value did not change and so only 'foo' would appear
* in the index history.
*/
private Set indexHistory = new LinkedHashSet<>();
private boolean doNoSelfReferencesCheck = false;
private boolean reroute = false;
public IngestDocument(String index, String id, long version, String routing, VersionType versionType, Map source) {
this.ctxMap = new IngestCtxMap(index, id, version, routing, versionType, ZonedDateTime.now(ZoneOffset.UTC), source);
this.ingestMetadata = new HashMap<>();
this.ingestMetadata.put(TIMESTAMP, ctxMap.getMetadata().getNow());
this.templateModel = initializeTemplateModel();
// initialize the index history by putting the current index into it
this.indexHistory.add(index);
}
// note: these rest of these constructors deal with the data-centric view of the IngestDocument, not the execution-centric view.
// For example, the copy constructor doesn't populate the `executedPipelines` or `indexHistory` (as well as some other fields),
// because those fields are execution-centric.
/**
* Copy constructor that creates a new {@link IngestDocument} which has exactly the same properties as the one provided.
*/
public IngestDocument(IngestDocument other) {
this(
new IngestCtxMap(deepCopyMap(other.ctxMap.getSource()), other.ctxMap.getMetadata().clone()),
deepCopyMap(other.ingestMetadata)
);
}
/**
* Constructor to create an IngestDocument from its constituent maps. The maps are shallow copied.
*/
public IngestDocument(Map sourceAndMetadata, Map ingestMetadata) {
Map source;
Map metadata;
if (sourceAndMetadata instanceof IngestCtxMap ingestCtxMap) {
source = new HashMap<>(ingestCtxMap.getSource());
metadata = new HashMap<>(ingestCtxMap.getMetadata().getMap());
} else {
metadata = Maps.newHashMapWithExpectedSize(Metadata.METADATA_NAMES.size());
source = new HashMap<>(sourceAndMetadata);
for (String key : Metadata.METADATA_NAMES) {
if (sourceAndMetadata.containsKey(key)) {
metadata.put(key, source.remove(key));
}
}
}
this.ctxMap = new IngestCtxMap(source, new IngestDocMetadata(metadata, IngestCtxMap.getTimestamp(ingestMetadata)));
this.ingestMetadata = new HashMap<>(ingestMetadata);
this.templateModel = initializeTemplateModel();
}
/**
* Constructor to create an IngestDocument from its constituent maps.
*/
IngestDocument(IngestCtxMap ctxMap, Map ingestMetadata) {
this.ctxMap = Objects.requireNonNull(ctxMap);
this.ingestMetadata = Objects.requireNonNull(ingestMetadata);
this.templateModel = initializeTemplateModel();
}
private DelegatingMapView initializeTemplateModel() {
return new DelegatingMapView(ctxMap, Map.of(SOURCE_KEY, ctxMap, INGEST_KEY, ingestMetadata));
}
/**
* Returns the value contained in the document for the provided path
* @param path The path within the document in dot-notation
* @param clazz The expected class of the field value
* @return the value for the provided path if existing
* @throws IllegalArgumentException if the path is null, empty, invalid, if the field doesn't exist
* or if the field that is found at the provided path is not of the expected type.
*/
public T getFieldValue(String path, Class clazz) {
return getFieldValue(path, clazz, false);
}
/**
* Returns the value contained in the document for the provided path
*
* @param path The path within the document in dot-notation
* @param clazz The expected class of the field value
* @param ignoreMissing The flag to determine whether to throw an exception when `path` is not found in the document.
* @return the value for the provided path if existing, null otherwise.
* @throws IllegalArgumentException only if ignoreMissing is false and the path is null, empty, invalid, if the field doesn't exist
* or if the field that is found at the provided path is not of the expected type.
*/
public T getFieldValue(String path, Class clazz, boolean ignoreMissing) {
FieldPath fieldPath = new FieldPath(path);
Object context = fieldPath.initialContext;
for (String pathElement : fieldPath.pathElements) {
ResolveResult result = resolve(pathElement, path, context);
if (result.wasSuccessful) {
context = result.resolvedObject;
} else if (ignoreMissing && hasField(path) == false) {
return null;
} else {
throw new IllegalArgumentException(result.errorMessage);
}
}
return cast(path, context, clazz);
}
/**
* Returns the value contained in the document with the provided templated path
* @param pathTemplate The path within the document in dot-notation
* @param clazz The expected class fo the field value
* @return the value for the provided path if existing, null otherwise
* @throws IllegalArgumentException if the pathTemplate is null, empty, invalid, if the field doesn't exist,
* or if the field that is found at the provided path is not of the expected type.
*/
public T getFieldValue(TemplateScript.Factory pathTemplate, Class clazz) {
return getFieldValue(renderTemplate(pathTemplate), clazz);
}
/**
* Returns the value contained in the document for the provided path as a byte array.
* If the path value is a string, a base64 decode operation will happen.
* If the path value is a byte array, it is just returned
* @param path The path within the document in dot-notation
* @return the byte array for the provided path if existing
* @throws IllegalArgumentException if the path is null, empty, invalid, if the field doesn't exist
* or if the field that is found at the provided path is not of the expected type.
*/
public byte[] getFieldValueAsBytes(String path) {
return getFieldValueAsBytes(path, false);
}
/**
* Returns the value contained in the document for the provided path as a byte array.
* If the path value is a string, a base64 decode operation will happen.
* If the path value is a byte array, it is just returned
* @param path The path within the document in dot-notation
* @param ignoreMissing The flag to determine whether to throw an exception when `path` is not found in the document.
* @return the byte array for the provided path if existing
* @throws IllegalArgumentException if the path is null, empty, invalid, if the field doesn't exist
* or if the field that is found at the provided path is not of the expected type.
*/
public byte[] getFieldValueAsBytes(String path, boolean ignoreMissing) {
Object object = getFieldValue(path, Object.class, ignoreMissing);
if (object == null) {
return null;
} else if (object instanceof byte[] bytes) {
return bytes;
} else if (object instanceof String string) {
return Base64.getDecoder().decode(string);
} else {
throw new IllegalArgumentException(
"Content field [" + path + "] of unknown type [" + object.getClass().getName() + "], must be string or byte array"
);
}
}
/**
* Checks whether the document contains a value for the provided templated path
* @param fieldPathTemplate the template for the path within the document in dot-notation
* @return true if the document contains a value for the field, false otherwise
* @throws IllegalArgumentException if the path is null, empty or invalid
*/
public boolean hasField(TemplateScript.Factory fieldPathTemplate) {
return hasField(renderTemplate(fieldPathTemplate));
}
/**
* Checks whether the document contains a value for the provided path
* @param path The path within the document in dot-notation
* @return true if the document contains a value for the field, false otherwise
* @throws IllegalArgumentException if the path is null, empty or invalid.
*/
public boolean hasField(String path) {
return hasField(path, false);
}
/**
* Checks whether the document contains a value for the provided path
* @param path The path within the document in dot-notation
* @param failOutOfRange Whether to throw an IllegalArgumentException if array is accessed outside of its range
* @return true if the document contains a value for the field, false otherwise
* @throws IllegalArgumentException if the path is null, empty or invalid.
*/
public boolean hasField(String path, boolean failOutOfRange) {
FieldPath fieldPath = new FieldPath(path);
Object context = fieldPath.initialContext;
for (int i = 0; i < fieldPath.pathElements.length - 1; i++) {
String pathElement = fieldPath.pathElements[i];
if (context == null) {
return false;
}
if (context instanceof Map, ?> map) {
context = map.get(pathElement);
} else if (context instanceof List> list) {
try {
int index = Integer.parseInt(pathElement);
if (index < 0 || index >= list.size()) {
if (failOutOfRange) {
throw new IllegalArgumentException(
"["
+ index
+ "] is out of bounds for array with length ["
+ list.size()
+ "] as part of path ["
+ path
+ "]"
);
} else {
return false;
}
}
context = list.get(index);
} catch (NumberFormatException e) {
return false;
}
} else {
return false;
}
}
String leafKey = fieldPath.pathElements[fieldPath.pathElements.length - 1];
if (context instanceof Map, ?> map) {
return map.containsKey(leafKey);
}
if (context instanceof List> list) {
try {
int index = Integer.parseInt(leafKey);
if (index >= 0 && index < list.size()) {
return true;
} else {
if (failOutOfRange) {
throw new IllegalArgumentException(
"[" + index + "] is out of bounds for array with length [" + list.size() + "] as part of path [" + path + "]"
);
} else {
return false;
}
}
} catch (NumberFormatException e) {
return false;
}
}
return false;
}
/**
* Removes the field identified by the provided path.
* @param fieldPathTemplate Resolves to the path with dot-notation within the document
* @throws IllegalArgumentException if the path is null, empty, invalid or if the field doesn't exist.
*/
public void removeField(TemplateScript.Factory fieldPathTemplate) {
removeField(renderTemplate(fieldPathTemplate));
}
/**
* Removes the field identified by the provided path.
* @param path the path of the field to be removed
* @throws IllegalArgumentException if the path is null, empty, invalid or if the field doesn't exist.
*/
public void removeField(String path) {
FieldPath fieldPath = new FieldPath(path);
Object context = fieldPath.initialContext;
for (int i = 0; i < fieldPath.pathElements.length - 1; i++) {
ResolveResult result = resolve(fieldPath.pathElements[i], path, context);
if (result.wasSuccessful) {
context = result.resolvedObject;
} else {
throw new IllegalArgumentException(result.errorMessage);
}
}
String leafKey = fieldPath.pathElements[fieldPath.pathElements.length - 1];
if (context instanceof Map, ?> map) {
if (map.containsKey(leafKey)) {
map.remove(leafKey);
return;
}
throw new IllegalArgumentException("field [" + leafKey + "] not present as part of path [" + path + "]");
}
if (context instanceof List> list) {
int index;
try {
index = Integer.parseInt(leafKey);
} catch (NumberFormatException e) {
throw new IllegalArgumentException(
"[" + leafKey + "] is not an integer, cannot be used as an index as part of path [" + path + "]",
e
);
}
if (index < 0 || index >= list.size()) {
throw new IllegalArgumentException(
"[" + index + "] is out of bounds for array with length [" + list.size() + "] as part of path [" + path + "]"
);
}
list.remove(index);
return;
}
if (context == null) {
throw new IllegalArgumentException("cannot remove [" + leafKey + "] from null as part of path [" + path + "]");
}
throw new IllegalArgumentException(
"cannot remove [" + leafKey + "] from object of type [" + context.getClass().getName() + "] as part of path [" + path + "]"
);
}
private static ResolveResult resolve(String pathElement, String fullPath, Object context) {
if (context == null) {
return ResolveResult.error("cannot resolve [" + pathElement + "] from null as part of path [" + fullPath + "]");
}
if (context instanceof Map, ?> map) {
if (map.containsKey(pathElement)) {
return ResolveResult.success(map.get(pathElement));
}
return ResolveResult.error("field [" + pathElement + "] not present as part of path [" + fullPath + "]");
}
if (context instanceof List> list) {
int index;
try {
index = Integer.parseInt(pathElement);
} catch (NumberFormatException e) {
return ResolveResult.error(
"[" + pathElement + "] is not an integer, cannot be used as an index as part of path [" + fullPath + "]"
);
}
if (index < 0 || index >= list.size()) {
return ResolveResult.error(
"[" + index + "] is out of bounds for array with length [" + list.size() + "] as part of path [" + fullPath + "]"
);
}
return ResolveResult.success(list.get(index));
}
return ResolveResult.error(
"cannot resolve ["
+ pathElement
+ "] from object of type ["
+ context.getClass().getName()
+ "] as part of path ["
+ fullPath
+ "]"
);
}
/**
* Appends the provided value to the provided path in the document.
* Any non existing path element will be created.
* If the path identifies a list, the value will be appended to the existing list.
* If the path identifies a scalar, the scalar will be converted to a list and
* the provided value will be added to the newly created list.
* Supports multiple values too provided in forms of list, in that case all the values will be appended to the
* existing (or newly created) list.
* @param path The path within the document in dot-notation
* @param value The value or values to append to the existing ones
* @throws IllegalArgumentException if the path is null, empty or invalid.
*/
public void appendFieldValue(String path, Object value) {
appendFieldValue(path, value, true);
}
/**
* Appends the provided value to the provided path in the document.
* Any non existing path element will be created.
* If the path identifies a list, the value will be appended to the existing list.
* If the path identifies a scalar, the scalar will be converted to a list and
* the provided value will be added to the newly created list.
* Supports multiple values too provided in forms of list, in that case all the values will be appended to the
* existing (or newly created) list.
* @param path The path within the document in dot-notation
* @param value The value or values to append to the existing ones
* @param allowDuplicates When false, any values that already exist in the field will not be added
* @throws IllegalArgumentException if the path is null, empty or invalid.
*/
public void appendFieldValue(String path, Object value, boolean allowDuplicates) {
setFieldValue(path, value, true, allowDuplicates);
}
/**
* Appends the provided value to the provided path in the document.
* Any non existing path element will be created.
* If the path identifies a list, the value will be appended to the existing list.
* If the path identifies a scalar, the scalar will be converted to a list and
* the provided value will be added to the newly created list.
* Supports multiple values too provided in forms of list, in that case all the values will be appended to the
* existing (or newly created) list.
* @param fieldPathTemplate Resolves to the path with dot-notation within the document
* @param valueSource The value source that will produce the value or values to append to the existing ones
* @param allowDuplicates When false, any values that already exist in the field will not be added
* @throws IllegalArgumentException if the path is null, empty or invalid.
*/
public void appendFieldValue(TemplateScript.Factory fieldPathTemplate, ValueSource valueSource, boolean allowDuplicates) {
appendFieldValue(
fieldPathTemplate.newInstance(templateModel).execute(),
valueSource.copyAndResolve(templateModel),
allowDuplicates
);
}
/**
* Sets the provided value to the provided path in the document.
* Any non existing path element will be created.
* If the last item in the path is a list, the value will replace the existing list as a whole.
* Use {@link #appendFieldValue(String, Object)} to append values to lists instead.
* @param path The path within the document in dot-notation
* @param value The value to put in for the path key
* @throws IllegalArgumentException if the path is null, empty, invalid or if the value cannot be set to the
* item identified by the provided path.
*/
public void setFieldValue(String path, Object value) {
setFieldValue(path, value, false, true);
}
/**
* Sets the provided value to the provided path in the document.
* Any non existing path element will be created. If the last element is a list,
* the value will replace the existing list.
* @param fieldPathTemplate Resolves to the path with dot-notation within the document
* @param valueSource The value source that will produce the value to put in for the path key
* @throws IllegalArgumentException if the path is null, empty, invalid or if the value cannot be set to the
* item identified by the provided path.
*/
public void setFieldValue(TemplateScript.Factory fieldPathTemplate, ValueSource valueSource) {
setFieldValue(fieldPathTemplate.newInstance(templateModel).execute(), valueSource.copyAndResolve(templateModel));
}
/**
* Sets the provided value to the provided path in the document.
* Any non existing path element will be created. If the last element is a list,
* the value will replace the existing list.
* @param fieldPathTemplate Resolves to the path with dot-notation within the document
* @param valueSource The value source that will produce the value to put in for the path key
* @param ignoreEmptyValue The flag to determine whether to exit quietly when the value produced by TemplatedValue is null or empty
* @throws IllegalArgumentException if the path is null, empty, invalid or if the value cannot be set to the
* item identified by the provided path.
*/
public void setFieldValue(TemplateScript.Factory fieldPathTemplate, ValueSource valueSource, boolean ignoreEmptyValue) {
Object value = valueSource.copyAndResolve(templateModel);
if (ignoreEmptyValue && valueSource instanceof ValueSource.TemplatedValue) {
if (value == null) {
return;
}
String valueStr = (String) value;
if (valueStr.isEmpty()) {
return;
}
}
setFieldValue(fieldPathTemplate.newInstance(templateModel).execute(), value);
}
/**
* Sets the provided value to the provided path in the document.
* Any non existing path element will be created. If the last element is a list,
* the value will replace the existing list.
* @param fieldPathTemplate Resolves to the path with dot-notation within the document
* @param value The value to put in for the path key
* @param ignoreEmptyValue The flag to determine whether to exit quietly when the value produced by TemplatedValue is null or empty
* @throws IllegalArgumentException if the path is null, empty, invalid or if the value cannot be set to the
* item identified by the provided path.
*/
public void setFieldValue(TemplateScript.Factory fieldPathTemplate, Object value, boolean ignoreEmptyValue) {
if (ignoreEmptyValue) {
if (value == null) {
return;
}
if (value instanceof String string) {
if (string.isEmpty()) {
return;
}
}
}
setFieldValue(fieldPathTemplate.newInstance(templateModel).execute(), value);
}
private void setFieldValue(String path, Object value, boolean append, boolean allowDuplicates) {
FieldPath fieldPath = new FieldPath(path);
Object context = fieldPath.initialContext;
for (int i = 0; i < fieldPath.pathElements.length - 1; i++) {
String pathElement = fieldPath.pathElements[i];
if (context == null) {
throw new IllegalArgumentException("cannot resolve [" + pathElement + "] from null as part of path [" + path + "]");
}
if (context instanceof Map) {
@SuppressWarnings("unchecked")
Map map = (Map) context;
if (map.containsKey(pathElement)) {
context = map.get(pathElement);
} else {
HashMap