org.elasticsearch.search.aggregations.bucket.significant.SignificantStringTerms Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of elasticsearch Show documentation
Show all versions of elasticsearch Show documentation
Elasticsearch subproject :server
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.aggregations.bucket.significant;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.search.aggregations.AggregationStreams;
import org.elasticsearch.search.aggregations.InternalAggregation;
import org.elasticsearch.search.aggregations.InternalAggregations;
import org.elasticsearch.search.aggregations.bucket.BucketStreamContext;
import org.elasticsearch.search.aggregations.bucket.BucketStreams;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristic;
import org.elasticsearch.search.aggregations.bucket.significant.heuristics.SignificanceHeuristicStreams;
import org.elasticsearch.search.aggregations.pipeline.PipelineAggregator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
/**
*
*/
public class SignificantStringTerms extends InternalSignificantTerms {
public static final InternalAggregation.Type TYPE = new Type("significant_terms", "sigsterms");
public static final AggregationStreams.Stream STREAM = new AggregationStreams.Stream() {
@Override
public SignificantStringTerms readResult(StreamInput in) throws IOException {
SignificantStringTerms buckets = new SignificantStringTerms();
buckets.readFrom(in);
return buckets;
}
};
private final static BucketStreams.Stream BUCKET_STREAM = new BucketStreams.Stream() {
@Override
public Bucket readResult(StreamInput in, BucketStreamContext context) throws IOException {
Bucket buckets = new Bucket((long) context.attributes().get("subsetSize"), (long) context.attributes().get("supersetSize"));
buckets.readFrom(in);
return buckets;
}
@Override
public BucketStreamContext getBucketStreamContext(Bucket bucket) {
BucketStreamContext context = new BucketStreamContext();
Map attributes = new HashMap<>();
attributes.put("subsetSize", bucket.subsetSize);
attributes.put("supersetSize", bucket.supersetSize);
context.attributes(attributes);
return context;
}
};
public static void registerStream() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
BucketStreams.registerStream(BUCKET_STREAM, TYPE.stream());
}
public static void registerStreams() {
AggregationStreams.registerStream(STREAM, TYPE.stream());
}
public static class Bucket extends InternalSignificantTerms.Bucket {
BytesRef termBytes;
public Bucket(long subsetSize, long supersetSize) {
// for serialization
super(subsetSize, supersetSize);
}
public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations) {
super(subsetDf, subsetSize, supersetDf, supersetSize, aggregations);
this.termBytes = term;
}
public Bucket(BytesRef term, long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations, double score) {
this(term, subsetDf, subsetSize, supersetDf, supersetSize, aggregations);
this.score = score;
}
@Override
public Number getKeyAsNumber() {
// this method is needed for scripted numeric aggregations
return Double.parseDouble(termBytes.utf8ToString());
}
@Override
int compareTerm(SignificantTerms.Bucket other) {
return BytesRef.getUTF8SortedAsUnicodeComparator().compare(termBytes, ((Bucket) other).termBytes);
}
@Override
public String getKeyAsString() {
return termBytes.utf8ToString();
}
@Override
public String getKey() {
return getKeyAsString();
}
@Override
Bucket newBucket(long subsetDf, long subsetSize, long supersetDf, long supersetSize, InternalAggregations aggregations) {
return new Bucket(termBytes, subsetDf, subsetSize, supersetDf, supersetSize, aggregations);
}
@Override
public void readFrom(StreamInput in) throws IOException {
termBytes = in.readBytesRef();
subsetDf = in.readVLong();
supersetDf = in.readVLong();
score = in.readDouble();
aggregations = InternalAggregations.readAggregations(in);
}
@Override
public void writeTo(StreamOutput out) throws IOException {
out.writeBytesRef(termBytes);
out.writeVLong(subsetDf);
out.writeVLong(supersetDf);
out.writeDouble(getSignificanceScore());
aggregations.writeTo(out);
}
@Override
public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException {
builder.startObject();
builder.utf8Field(CommonFields.KEY, termBytes);
builder.field(CommonFields.DOC_COUNT, getDocCount());
builder.field("score", score);
builder.field("bg_count", supersetDf);
aggregations.toXContentInternal(builder, params);
builder.endObject();
return builder;
}
}
SignificantStringTerms() {} // for serialization
public SignificantStringTerms(long subsetSize, long supersetSize, String name, int requiredSize, long minDocCount,
SignificanceHeuristic significanceHeuristic, List buckets,
List pipelineAggregators,
Map metaData) {
super(subsetSize, supersetSize, name, requiredSize, minDocCount, significanceHeuristic, buckets, pipelineAggregators, metaData);
}
@Override
public Type type() {
return TYPE;
}
@Override
public SignificantStringTerms create(List buckets) {
return new SignificantStringTerms(this.subsetSize, this.supersetSize, this.name, this.requiredSize, this.minDocCount,
this.significanceHeuristic, buckets, this.pipelineAggregators(), this.metaData);
}
@Override
public Bucket createBucket(InternalAggregations aggregations, SignificantStringTerms.Bucket prototype) {
return new Bucket(prototype.termBytes, prototype.subsetDf, prototype.subsetSize, prototype.supersetDf, prototype.supersetSize,
aggregations);
}
@Override
protected SignificantStringTerms create(long subsetSize, long supersetSize, List buckets,
InternalSignificantTerms prototype) {
return new SignificantStringTerms(subsetSize, supersetSize, prototype.getName(), prototype.requiredSize, prototype.minDocCount,
prototype.significanceHeuristic, buckets, prototype.pipelineAggregators(), prototype.getMetaData());
}
@Override
protected void doReadFrom(StreamInput in) throws IOException {
this.requiredSize = readSize(in);
this.minDocCount = in.readVLong();
this.subsetSize = in.readVLong();
this.supersetSize = in.readVLong();
significanceHeuristic = SignificanceHeuristicStreams.read(in);
int size = in.readVInt();
List buckets = new ArrayList<>(size);
for (int i = 0; i < size; i++) {
Bucket bucket = new Bucket(subsetSize, supersetSize);
bucket.readFrom(in);
buckets.add(bucket);
}
this.buckets = buckets;
this.bucketMap = null;
}
@Override
protected void doWriteTo(StreamOutput out) throws IOException {
writeSize(requiredSize, out);
out.writeVLong(minDocCount);
out.writeVLong(subsetSize);
out.writeVLong(supersetSize);
significanceHeuristic.writeTo(out);
out.writeVInt(buckets.size());
for (InternalSignificantTerms.Bucket bucket : buckets) {
bucket.writeTo(out);
}
}
@Override
public XContentBuilder doXContentBody(XContentBuilder builder, Params params) throws IOException {
builder.field("doc_count", subsetSize);
builder.startArray(CommonFields.BUCKETS);
for (InternalSignificantTerms.Bucket bucket : buckets) {
//There is a condition (presumably when only one shard has a bucket?) where reduce is not called
// and I end up with buckets that contravene the user's min_doc_count criteria in my reducer
if (bucket.subsetDf >= minDocCount) {
bucket.toXContent(builder, params);
}
}
builder.endArray();
return builder;
}
}