org.elasticsearch.search.aggregations.bucket.terms.support.IncludeExclude Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.terms.support;
import com.carrotsearch.hppc.LongHashSet;
import com.carrotsearch.hppc.LongSet;
import org.apache.lucene.index.RandomAccessOrds;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.lucene.util.NumericUtils;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
import org.apache.lucene.util.automaton.CompiledAutomaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.RegExp;
import org.elasticsearch.ElasticsearchParseException;
import org.elasticsearch.common.ParseField;
import org.elasticsearch.common.ParseFieldMatcher;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.io.stream.Writeable;
import org.elasticsearch.common.xcontent.ToXContent;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.common.xcontent.XContentParser;
import org.elasticsearch.search.DocValueFormat;
import java.io.IOException;
import java.util.HashSet;
import java.util.Map;
import java.util.Objects;
import java.util.Set;
import java.util.SortedSet;
import java.util.TreeSet;
/**
* Defines the include/exclude regular expression filtering for string terms aggregation. In this filtering logic,
* exclusion has precedence, where the {@code include} is evaluated first and then the {@code exclude}.
*/
public class IncludeExclude implements Writeable, ToXContent {
private static final ParseField INCLUDE_FIELD = new ParseField("include");
private static final ParseField EXCLUDE_FIELD = new ParseField("exclude");
private static final ParseField PATTERN_FIELD = new ParseField("pattern");
// The includeValue and excludeValue ByteRefs which are the result of the parsing
// process are converted into a LongFilter when used on numeric fields
// in the index.
public static class LongFilter {
private LongSet valids;
private LongSet invalids;
private LongFilter(int numValids, int numInvalids) {
if (numValids > 0) {
valids = new LongHashSet(numValids);
}
if (numInvalids > 0) {
invalids = new LongHashSet(numInvalids);
}
}
public boolean accept(long value) {
return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value)));
}
private void addAccept(long val) {
valids.add(val);
}
private void addReject(long val) {
invalids.add(val);
}
}
// Only used for the 'map' execution mode (ie. scripts)
public abstract static class StringFilter {
public abstract boolean accept(BytesRef value);
}
static class AutomatonBackedStringFilter extends StringFilter {
private final ByteRunAutomaton runAutomaton;
private AutomatonBackedStringFilter(Automaton automaton) {
this.runAutomaton = new ByteRunAutomaton(automaton);
}
/**
* Returns whether the given value is accepted based on the {@code include} & {@code exclude} patterns.
*/
@Override
public boolean accept(BytesRef value) {
return runAutomaton.run(value.bytes, value.offset, value.length);
}
}
static class TermListBackedStringFilter extends StringFilter {
private final Set valids;
private final Set invalids;
public TermListBackedStringFilter(Set includeValues, Set excludeValues) {
this.valids = includeValues;
this.invalids = excludeValues;
}
/**
* Returns whether the given value is accepted based on the
* {@code include} & {@code exclude} sets.
*/
@Override
public boolean accept(BytesRef value) {
return ((valids == null) || (valids.contains(value))) && ((invalids == null) || (!invalids.contains(value)));
}
}
public abstract static class OrdinalsFilter {
public abstract LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException;
}
static class AutomatonBackedOrdinalsFilter extends OrdinalsFilter {
private final CompiledAutomaton compiled;
private AutomatonBackedOrdinalsFilter(Automaton automaton) {
this.compiled = new CompiledAutomaton(automaton);
}
/**
* Computes which global ordinals are accepted by this IncludeExclude instance.
*
*/
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals)
throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
TermsEnum globalTermsEnum;
Terms globalTerms = new DocValuesTerms(globalOrdinals);
// TODO: specialize based on compiled.type: for ALL and prefixes (sinkState >= 0 ) we can avoid i/o and just set bits.
globalTermsEnum = compiled.getTermsEnum(globalTerms);
for (BytesRef term = globalTermsEnum.next(); term != null; term = globalTermsEnum.next()) {
acceptedGlobalOrdinals.set(globalTermsEnum.ord());
}
return acceptedGlobalOrdinals;
}
}
static class TermListBackedOrdinalsFilter extends OrdinalsFilter {
private final SortedSet includeValues;
private final SortedSet excludeValues;
public TermListBackedOrdinalsFilter(SortedSet includeValues, SortedSet excludeValues) {
this.includeValues = includeValues;
this.excludeValues = excludeValues;
}
@Override
public LongBitSet acceptedGlobalOrdinals(RandomAccessOrds globalOrdinals) throws IOException {
LongBitSet acceptedGlobalOrdinals = new LongBitSet(globalOrdinals.getValueCount());
if (includeValues != null) {
for (BytesRef term : includeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
acceptedGlobalOrdinals.set(ord);
}
}
} else if (acceptedGlobalOrdinals.length() > 0) {
// default to all terms being acceptable
acceptedGlobalOrdinals.set(0, acceptedGlobalOrdinals.length());
}
if (excludeValues != null) {
for (BytesRef term : excludeValues) {
long ord = globalOrdinals.lookupTerm(term);
if (ord >= 0) {
acceptedGlobalOrdinals.clear(ord);
}
}
}
return acceptedGlobalOrdinals;
}
}
private final RegExp include, exclude;
private final SortedSet includeValues, excludeValues;
/**
* @param include The regular expression pattern for the terms to be included
* @param exclude The regular expression pattern for the terms to be excluded
*/
public IncludeExclude(RegExp include, RegExp exclude) {
if (include == null && exclude == null) {
throw new IllegalArgumentException();
}
this.include = include;
this.exclude = exclude;
this.includeValues = null;
this.excludeValues = null;
}
public IncludeExclude(String include, String exclude) {
this(include == null ? null : new RegExp(include), exclude == null ? null : new RegExp(exclude));
}
/**
* @param includeValues The terms to be included
* @param excludeValues The terms to be excluded
*/
public IncludeExclude(SortedSet includeValues, SortedSet excludeValues) {
if (includeValues == null && excludeValues == null) {
throw new IllegalArgumentException();
}
this.include = null;
this.exclude = null;
this.includeValues = includeValues;
this.excludeValues = excludeValues;
}
public IncludeExclude(String[] includeValues, String[] excludeValues) {
this(convertToBytesRefSet(includeValues), convertToBytesRefSet(excludeValues));
}
public IncludeExclude(double[] includeValues, double[] excludeValues) {
this(convertToBytesRefSet(includeValues), convertToBytesRefSet(excludeValues));
}
public IncludeExclude(long[] includeValues, long[] excludeValues) {
this(convertToBytesRefSet(includeValues), convertToBytesRefSet(excludeValues));
}
/**
* Read from a stream.
*/
public IncludeExclude(StreamInput in) throws IOException {
if (in.readBoolean()) {
includeValues = null;
excludeValues = null;
String includeString = in.readOptionalString();
include = includeString == null ? null : new RegExp(includeString);
String excludeString = in.readOptionalString();
exclude = excludeString == null ? null : new RegExp(excludeString);
return;
}
include = null;
exclude = null;
if (in.readBoolean()) {
int size = in.readVInt();
includeValues = new TreeSet<>();
for (int i = 0; i < size; i++) {
includeValues.add(in.readBytesRef());
}
} else {
includeValues = null;
}
if (in.readBoolean()) {
int size = in.readVInt();
excludeValues = new TreeSet<>();
for (int i = 0; i < size; i++) {
excludeValues.add(in.readBytesRef());
}
} else {
excludeValues = null;
}
}
@Override
public void writeTo(StreamOutput out) throws IOException {
boolean regexBased = isRegexBased();
out.writeBoolean(regexBased);
if (regexBased) {
out.writeOptionalString(include == null ? null : include.getOriginalString());
out.writeOptionalString(exclude == null ? null : exclude.getOriginalString());
} else {
boolean hasIncludes = includeValues != null;
out.writeBoolean(hasIncludes);
if (hasIncludes) {
out.writeVInt(includeValues.size());
for (BytesRef value : includeValues) {
out.writeBytesRef(value);
}
}
boolean hasExcludes = excludeValues != null;
out.writeBoolean(hasExcludes);
if (hasExcludes) {
out.writeVInt(excludeValues.size());
for (BytesRef value : excludeValues) {
out.writeBytesRef(value);
}
}
}
}
private static SortedSet convertToBytesRefSet(String[] values) {
SortedSet returnSet = null;
if (values != null) {
returnSet = new TreeSet<>();
for (String value : values) {
returnSet.add(new BytesRef(value));
}
}
return returnSet;
}
private static SortedSet convertToBytesRefSet(double[] values) {
SortedSet returnSet = null;
if (values != null) {
returnSet = new TreeSet<>();
for (double value : values) {
returnSet.add(new BytesRef(String.valueOf(value)));
}
}
return returnSet;
}
private static SortedSet convertToBytesRefSet(long[] values) {
SortedSet returnSet = null;
if (values != null) {
returnSet = new TreeSet<>();
for (long value : values) {
returnSet.add(new BytesRef(String.valueOf(value)));
}
}
return returnSet;
}
/**
* Terms adapter around doc values.
*/
private static class DocValuesTerms extends Terms {
private final SortedSetDocValues values;
DocValuesTerms(SortedSetDocValues values) {
this.values = values;
}
@Override
public TermsEnum iterator() throws IOException {
return values.termsEnum();
}
@Override
public long size() throws IOException {
return -1;
}
@Override
public long getSumTotalTermFreq() throws IOException {
return -1;
}
@Override
public long getSumDocFreq() throws IOException {
return -1;
}
@Override
public int getDocCount() throws IOException {
return -1;
}
@Override
public boolean hasFreqs() {
return false;
}
@Override
public boolean hasOffsets() {
return false;
}
@Override
public boolean hasPositions() {
return false;
}
@Override
public boolean hasPayloads() {
return false;
}
}
public static class Parser {
public boolean token(String currentFieldName, XContentParser.Token token, XContentParser parser,
ParseFieldMatcher parseFieldMatcher, Map otherOptions) throws IOException {
if (token == XContentParser.Token.VALUE_STRING) {
if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) {
otherOptions.put(INCLUDE_FIELD, parser.text());
} else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) {
otherOptions.put(EXCLUDE_FIELD, parser.text());
} else {
return false;
}
return true;
}
if (token == XContentParser.Token.START_ARRAY) {
if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) {
otherOptions.put(INCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser)));
return true;
}
if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) {
otherOptions.put(EXCLUDE_FIELD, new TreeSet<>(parseArrayToSet(parser)));
return true;
}
return false;
}
if (token == XContentParser.Token.START_OBJECT) {
if (parseFieldMatcher.match(currentFieldName, INCLUDE_FIELD)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) {
otherOptions.put(INCLUDE_FIELD, parser.text());
}
}
}
} else if (parseFieldMatcher.match(currentFieldName, EXCLUDE_FIELD)) {
while ((token = parser.nextToken()) != XContentParser.Token.END_OBJECT) {
if (token == XContentParser.Token.FIELD_NAME) {
currentFieldName = parser.currentName();
} else if (token == XContentParser.Token.VALUE_STRING) {
if (parseFieldMatcher.match(currentFieldName, PATTERN_FIELD)) {
otherOptions.put(EXCLUDE_FIELD, parser.text());
}
}
}
} else {
return false;
}
return true;
}
return false;
}
private Set parseArrayToSet(XContentParser parser) throws IOException {
final Set set = new HashSet<>();
if (parser.currentToken() != XContentParser.Token.START_ARRAY) {
throw new ElasticsearchParseException("Missing start of array in include/exclude clause");
}
while (parser.nextToken() != XContentParser.Token.END_ARRAY) {
if (!parser.currentToken().isValue()) {
throw new ElasticsearchParseException("Array elements in include/exclude clauses should be string values");
}
set.add(new BytesRef(parser.text()));
}
return set;
}
public IncludeExclude createIncludeExclude(Map otherOptions) {
Object includeObject = otherOptions.get(INCLUDE_FIELD);
String include = null;
SortedSet includeValues = null;
if (includeObject != null) {
if (includeObject instanceof String) {
include = (String) includeObject;
} else if (includeObject instanceof SortedSet) {
includeValues = (SortedSet) includeObject;
}
}
Object excludeObject = otherOptions.get(EXCLUDE_FIELD);
String exclude = null;
SortedSet excludeValues = null;
if (excludeObject != null) {
if (excludeObject instanceof String) {
exclude = (String) excludeObject;
} else if (excludeObject instanceof SortedSet) {
excludeValues = (SortedSet) excludeObject;
}
}
RegExp includePattern = include != null ? new RegExp(include) : null;
RegExp excludePattern = exclude != null ? new RegExp(exclude) : null;
if (includePattern != null || excludePattern != null) {
if (includeValues != null || excludeValues != null) {
throw new IllegalArgumentException("Can only use regular expression include/exclude or a set of values, not both");
}
return new IncludeExclude(includePattern, excludePattern);
} else if (includeValues != null || excludeValues != null) {
return new IncludeExclude(includeValues, excludeValues);
} else {
return null;
}
}
}
public boolean isRegexBased() {
return include != null || exclude != null;
}
private Automaton toAutomaton() {
Automaton a = null;
if (include != null) {
a = include.toAutomaton();
} else if (includeValues != null) {
a = Automata.makeStringUnion(includeValues);
} else {
a = Automata.makeAnyString();
}
if (exclude != null) {
a = Operations.minus(a, exclude.toAutomaton(), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
} else if (excludeValues != null) {
a = Operations.minus(a, Automata.makeStringUnion(excludeValues), Operations.DEFAULT_MAX_DETERMINIZED_STATES);
}
return a;
}
public StringFilter convertToStringFilter(DocValueFormat format) {
if (isRegexBased()) {
return new AutomatonBackedStringFilter(toAutomaton());
}
return new TermListBackedStringFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}
private static SortedSet parseForDocValues(SortedSet endUserFormattedValues, DocValueFormat format) {
SortedSet result = endUserFormattedValues;
if (endUserFormattedValues != null) {
if (format != DocValueFormat.RAW) {
result = new TreeSet<>();
for (BytesRef formattedVal : endUserFormattedValues) {
result.add(format.parseBytesRef(formattedVal.utf8ToString()));
}
}
}
return result;
}
public OrdinalsFilter convertToOrdinalsFilter(DocValueFormat format) {
if (isRegexBased()) {
return new AutomatonBackedOrdinalsFilter(toAutomaton());
}
return new TermListBackedOrdinalsFilter(parseForDocValues(includeValues, format), parseForDocValues(excludeValues, format));
}
public LongFilter convertToLongFilter(DocValueFormat format) {
int numValids = includeValues == null ? 0 : includeValues.size();
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
result.addAccept(format.parseLong(val.utf8ToString(), false, null));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
result.addReject(format.parseLong(val.utf8ToString(), false, null));
}
}
return result;
}
public LongFilter convertToDoubleFilter() {
int numValids = includeValues == null ? 0 : includeValues.size();
int numInvalids = excludeValues == null ? 0 : excludeValues.size();
LongFilter result = new LongFilter(numValids, numInvalids);
if (includeValues != null) {
for (BytesRef val : includeValues) {
double dval = Double.parseDouble(val.utf8ToString());
result.addAccept(NumericUtils.doubleToSortableLong(dval));
}
}
if (excludeValues != null) {
for (BytesRef val : excludeValues) {
double dval = Double.parseDouble(val.utf8ToString());
result.addReject(NumericUtils.doubleToSortableLong(dval));
}
}
return result;
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
if (include != null) {
builder.field(INCLUDE_FIELD.getPreferredName(), include.getOriginalString());
}
if (includeValues != null) {
builder.startArray(INCLUDE_FIELD.getPreferredName());
for (BytesRef value : includeValues) {
builder.value(value.utf8ToString());
}
builder.endArray();
}
if (exclude != null) {
builder.field(EXCLUDE_FIELD.getPreferredName(), exclude.getOriginalString());
}
if (excludeValues != null) {
builder.startArray(EXCLUDE_FIELD.getPreferredName());
for (BytesRef value : excludeValues) {
builder.value(value.utf8ToString());
}
builder.endArray();
}
return builder;
}
@Override
public int hashCode() {
return Objects.hash(include == null ? null : include.getOriginalString(), exclude == null ? null : exclude.getOriginalString(),
includeValues, excludeValues);
}
@Override
public boolean equals(Object obj) {
if (obj == null) {
return false;
} if (getClass() != obj.getClass()) {
return false;
}
IncludeExclude other = (IncludeExclude) obj;
return Objects.equals(include == null ? null : include.getOriginalString(), other.include == null ? null : other.include.getOriginalString())
&& Objects.equals(exclude == null ? null : exclude.getOriginalString(), other.exclude == null ? null : other.exclude.getOriginalString())
&& Objects.equals(includeValues, other.includeValues)
&& Objects.equals(excludeValues, other.excludeValues);
}
}