Many resources are needed to download a project. Please understand that we have to compensate our server costs. Thank you in advance. Project price only 1 $
You can buy this project and download/modify it how often you want.
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
/*
* Modifications Copyright OpenSearch Contributors. See
* GitHub history for details.
*/
package org.opensearch.search.aggregations.bucket.terms;
import org.opensearch.common.ParseField;
import org.opensearch.common.io.stream.StreamInput;
import org.opensearch.common.io.stream.StreamOutput;
import org.opensearch.common.xcontent.ObjectParser;
import org.opensearch.common.xcontent.XContentBuilder;
import org.opensearch.common.xcontent.XContentParser;
import org.opensearch.index.query.AbstractQueryBuilder;
import org.opensearch.index.query.QueryBuilder;
import org.opensearch.index.query.QueryShardContext;
import org.opensearch.search.aggregations.AbstractAggregationBuilder;
import org.opensearch.search.aggregations.AggregationBuilder;
import org.opensearch.search.aggregations.AggregationInitializationException;
import org.opensearch.search.aggregations.AggregatorFactories.Builder;
import org.opensearch.search.aggregations.AggregatorFactory;
import org.opensearch.search.aggregations.bucket.terms.TermsAggregator.BucketCountThresholds;
import org.opensearch.search.aggregations.bucket.terms.heuristic.SignificanceHeuristic;
import java.io.IOException;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Objects;
public class SignificantTextAggregationBuilder extends AbstractAggregationBuilder {
public static final String NAME = "significant_text";
static final ParseField FIELD_NAME = new ParseField("field");
static final ParseField SOURCE_FIELDS_NAME = new ParseField("source_fields");
static final ParseField FILTER_DUPLICATE_TEXT_FIELD_NAME = new ParseField("filter_duplicate_text");
static final TermsAggregator.BucketCountThresholds DEFAULT_BUCKET_COUNT_THRESHOLDS =
SignificantTermsAggregationBuilder.DEFAULT_BUCKET_COUNT_THRESHOLDS;
static final SignificanceHeuristic DEFAULT_SIGNIFICANCE_HEURISTIC = SignificantTermsAggregationBuilder.DEFAULT_SIGNIFICANCE_HEURISTIC;
private String fieldName = null;
private String[] sourceFieldNames = null;
private boolean filterDuplicateText = false;
private IncludeExclude includeExclude = null;
private QueryBuilder filterBuilder = null;
private TermsAggregator.BucketCountThresholds bucketCountThresholds = new BucketCountThresholds(DEFAULT_BUCKET_COUNT_THRESHOLDS);
private SignificanceHeuristic significanceHeuristic = DEFAULT_SIGNIFICANCE_HEURISTIC;
private static final ObjectParser PARSER = new ObjectParser<>(
SignificantTextAggregationBuilder.NAME,
SignificanceHeuristic.class,
SignificantTextAggregationBuilder::significanceHeuristic,
null
);
static {
PARSER.declareInt(SignificantTextAggregationBuilder::shardSize, TermsAggregationBuilder.SHARD_SIZE_FIELD_NAME);
PARSER.declareLong(SignificantTextAggregationBuilder::minDocCount, TermsAggregationBuilder.MIN_DOC_COUNT_FIELD_NAME);
PARSER.declareLong(SignificantTextAggregationBuilder::shardMinDocCount, TermsAggregationBuilder.SHARD_MIN_DOC_COUNT_FIELD_NAME);
PARSER.declareInt(SignificantTextAggregationBuilder::size, TermsAggregationBuilder.REQUIRED_SIZE_FIELD_NAME);
PARSER.declareString(SignificantTextAggregationBuilder::fieldName, FIELD_NAME);
PARSER.declareStringArray(SignificantTextAggregationBuilder::sourceFieldNames, SOURCE_FIELDS_NAME);
PARSER.declareBoolean(SignificantTextAggregationBuilder::filterDuplicateText, FILTER_DUPLICATE_TEXT_FIELD_NAME);
PARSER.declareObject(
SignificantTextAggregationBuilder::backgroundFilter,
(p, context) -> AbstractQueryBuilder.parseInnerQueryBuilder(p),
SignificantTermsAggregationBuilder.BACKGROUND_FILTER
);
PARSER.declareField(
(b, v) -> b.includeExclude(IncludeExclude.merge(v, b.includeExclude())),
IncludeExclude::parseInclude,
IncludeExclude.INCLUDE_FIELD,
ObjectParser.ValueType.OBJECT_ARRAY_OR_STRING
);
PARSER.declareField(
(b, v) -> b.includeExclude(IncludeExclude.merge(b.includeExclude(), v)),
IncludeExclude::parseExclude,
IncludeExclude.EXCLUDE_FIELD,
ObjectParser.ValueType.STRING_ARRAY
);
}
public static SignificantTextAggregationBuilder parse(String aggregationName, XContentParser parser) throws IOException {
return PARSER.parse(parser, new SignificantTextAggregationBuilder(aggregationName, null), null);
}
protected SignificantTextAggregationBuilder(
SignificantTextAggregationBuilder clone,
Builder factoriesBuilder,
Map metadata
) {
super(clone, factoriesBuilder, metadata);
this.bucketCountThresholds = new BucketCountThresholds(clone.bucketCountThresholds);
this.fieldName = clone.fieldName;
this.filterBuilder = clone.filterBuilder;
this.filterDuplicateText = clone.filterDuplicateText;
this.includeExclude = clone.includeExclude;
this.significanceHeuristic = clone.significanceHeuristic;
this.sourceFieldNames = clone.sourceFieldNames;
}
@Override
protected AggregationBuilder shallowCopy(Builder factoriesBuilder, Map metadata) {
return new SignificantTextAggregationBuilder(this, factoriesBuilder, metadata);
}
protected TermsAggregator.BucketCountThresholds getBucketCountThresholds() {
return new TermsAggregator.BucketCountThresholds(bucketCountThresholds);
}
public TermsAggregator.BucketCountThresholds bucketCountThresholds() {
return bucketCountThresholds;
}
@Override
public SignificantTextAggregationBuilder subAggregations(Builder subFactories) {
throw new AggregationInitializationException(
"Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"
);
}
@Override
public SignificantTextAggregationBuilder subAggregation(AggregationBuilder aggregation) {
throw new AggregationInitializationException(
"Aggregator [" + name + "] of type [" + getType() + "] cannot accept sub-aggregations"
);
}
public SignificantTextAggregationBuilder bucketCountThresholds(TermsAggregator.BucketCountThresholds bucketCountThresholds) {
if (bucketCountThresholds == null) {
throw new IllegalArgumentException("[bucketCountThresholds] must not be null: [" + name + "]");
}
this.bucketCountThresholds = bucketCountThresholds;
return this;
}
/**
* Sets the size - indicating how many term buckets should be returned
* (defaults to 10)
*/
public SignificantTextAggregationBuilder size(int size) {
if (size <= 0) {
throw new IllegalArgumentException("[size] must be greater than 0. Found [" + size + "] in [" + name + "]");
}
bucketCountThresholds.setRequiredSize(size);
return this;
}
/**
* Sets the shard_size - indicating the number of term buckets each shard
* will return to the coordinating node (the node that coordinates the
* search execution). The higher the shard size is, the more accurate the
* results are.
*/
public SignificantTextAggregationBuilder shardSize(int shardSize) {
if (shardSize <= 0) {
throw new IllegalArgumentException("[shardSize] must be greater than 0. Found [" + shardSize + "] in [" + name + "]");
}
bucketCountThresholds.setShardSize(shardSize);
return this;
}
/**
* Sets the name of the text field that will be the subject of this
* aggregation.
*/
public SignificantTextAggregationBuilder fieldName(String fieldName) {
this.fieldName = fieldName;
return this;
}
/**
* Selects the fields to load from _source JSON and analyze.
* If none are specified, the indexed "fieldName" value is assumed
* to also be the name of the JSON field holding the value
*/
public SignificantTextAggregationBuilder sourceFieldNames(List names) {
this.sourceFieldNames = names.toArray(new String[names.size()]);
return this;
}
/**
* Control if duplicate paragraphs of text should try be filtered from the
* statistical text analysis. Can improve results but slows down analysis.
* Default is false.
*/
public SignificantTextAggregationBuilder filterDuplicateText(boolean filterDuplicateText) {
this.filterDuplicateText = filterDuplicateText;
return this;
}
/**
* Set the minimum document count terms should have in order to appear in
* the response.
*/
public SignificantTextAggregationBuilder minDocCount(long minDocCount) {
if (minDocCount < 0) {
throw new IllegalArgumentException(
"[minDocCount] must be greater than or equal to 0. Found [" + minDocCount + "] in [" + name + "]"
);
}
bucketCountThresholds.setMinDocCount(minDocCount);
return this;
}
/**
* Set the minimum document count terms should have on the shard in order to
* appear in the response.
*/
public SignificantTextAggregationBuilder shardMinDocCount(long shardMinDocCount) {
if (shardMinDocCount < 0) {
throw new IllegalArgumentException(
"[shardMinDocCount] must be greater than or equal to 0. Found [" + shardMinDocCount + "] in [" + name + "]"
);
}
bucketCountThresholds.setShardMinDocCount(shardMinDocCount);
return this;
}
public SignificantTextAggregationBuilder backgroundFilter(QueryBuilder backgroundFilter) {
if (backgroundFilter == null) {
throw new IllegalArgumentException("[backgroundFilter] must not be null: [" + name + "]");
}
this.filterBuilder = backgroundFilter;
return this;
}
public QueryBuilder backgroundFilter() {
return filterBuilder;
}
/**
* Set terms to include and exclude from the aggregation results
*/
public SignificantTextAggregationBuilder includeExclude(IncludeExclude includeExclude) {
this.includeExclude = includeExclude;
return this;
}
/**
* Get terms to include and exclude from the aggregation results
*/
public IncludeExclude includeExclude() {
return includeExclude;
}
public SignificantTextAggregationBuilder significanceHeuristic(SignificanceHeuristic significanceHeuristic) {
if (significanceHeuristic == null) {
throw new IllegalArgumentException("[significanceHeuristic] must not be null: [" + name + "]");
}
this.significanceHeuristic = significanceHeuristic;
return this;
}
public SignificanceHeuristic significanceHeuristic() {
return significanceHeuristic;
}
/**
* @param name
* the name of this aggregation
* @param fieldName
* the name of the text field that will be the subject of this
* aggregation
*
*/
public SignificantTextAggregationBuilder(String name, String fieldName) {
super(name);
this.fieldName = fieldName;
}
/**
* Read from a stream.
*/
public SignificantTextAggregationBuilder(StreamInput in) throws IOException {
super(in);
fieldName = in.readString();
filterDuplicateText = in.readBoolean();
bucketCountThresholds = new BucketCountThresholds(in);
filterBuilder = in.readOptionalNamedWriteable(QueryBuilder.class);
includeExclude = in.readOptionalWriteable(IncludeExclude::new);
significanceHeuristic = in.readNamedWriteable(SignificanceHeuristic.class);
sourceFieldNames = in.readOptionalStringArray();
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
out.writeString(fieldName);
out.writeBoolean(filterDuplicateText);
bucketCountThresholds.writeTo(out);
out.writeOptionalNamedWriteable(filterBuilder);
out.writeOptionalWriteable(includeExclude);
out.writeNamedWriteable(significanceHeuristic);
out.writeOptionalStringArray(sourceFieldNames);
}
@Override
public BucketCardinality bucketCardinality() {
return BucketCardinality.MANY;
}
@Override
protected AggregatorFactory doBuild(QueryShardContext queryShardContext, AggregatorFactory parent, Builder subFactoriesBuilder)
throws IOException {
SignificanceHeuristic executionHeuristic = this.significanceHeuristic.rewrite(queryShardContext);
return new SignificantTextAggregatorFactory(
name,
includeExclude,
filterBuilder,
bucketCountThresholds,
executionHeuristic,
queryShardContext,
parent,
subFactoriesBuilder,
fieldName,
sourceFieldNames,
filterDuplicateText,
metadata
);
}
@Override
protected XContentBuilder internalXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
bucketCountThresholds.toXContent(builder, params);
if (fieldName != null) {
builder.field(FIELD_NAME.getPreferredName(), fieldName);
}
if (sourceFieldNames != null) {
builder.array(SOURCE_FIELDS_NAME.getPreferredName(), sourceFieldNames);
}
if (filterDuplicateText) {
builder.field(FILTER_DUPLICATE_TEXT_FIELD_NAME.getPreferredName(), filterDuplicateText);
}
if (filterBuilder != null) {
builder.field(SignificantTermsAggregationBuilder.BACKGROUND_FILTER.getPreferredName(), filterBuilder);
}
if (includeExclude != null) {
includeExclude.toXContent(builder, params);
}
significanceHeuristic.toXContent(builder, params);
builder.endObject();
return builder;
}
@Override
public int hashCode() {
return Objects.hash(
super.hashCode(),
bucketCountThresholds,
fieldName,
filterDuplicateText,
filterBuilder,
includeExclude,
significanceHeuristic,
Arrays.hashCode(sourceFieldNames)
);
}
@Override
public boolean equals(Object obj) {
if (this == obj) return true;
if (obj == null || getClass() != obj.getClass()) return false;
if (super.equals(obj) == false) return false;
SignificantTextAggregationBuilder other = (SignificantTextAggregationBuilder) obj;
return Objects.equals(bucketCountThresholds, other.bucketCountThresholds)
&& Objects.equals(fieldName, other.fieldName)
&& Arrays.equals(sourceFieldNames, other.sourceFieldNames)
&& filterDuplicateText == other.filterDuplicateText
&& Objects.equals(filterBuilder, other.filterBuilder)
&& Objects.equals(includeExclude, other.includeExclude)
&& Objects.equals(significanceHeuristic, other.significanceHeuristic);
}
@Override
public String getType() {
return NAME;
}
}