All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.protocol.xpack.graph.GraphExploreRequest Maven / Gradle / Ivy

/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */
package org.elasticsearch.protocol.xpack.graph;

import org.elasticsearch.action.ActionRequest;
import org.elasticsearch.action.ActionRequestValidationException;
import org.elasticsearch.action.IndicesRequest;
import org.elasticsearch.action.ValidateActions;
import org.elasticsearch.action.support.IndicesOptions;
import org.elasticsearch.common.Strings;
import org.elasticsearch.common.io.stream.StreamInput;
import org.elasticsearch.common.io.stream.StreamOutput;
import org.elasticsearch.common.unit.TimeValue;
import org.elasticsearch.common.xcontent.ToXContentObject;
import org.elasticsearch.common.xcontent.XContentBuilder;
import org.elasticsearch.index.query.QueryBuilder;
import org.elasticsearch.search.aggregations.bucket.sampler.SamplerAggregationBuilder;
import org.elasticsearch.search.aggregations.bucket.terms.SignificantTerms;
import org.elasticsearch.search.aggregations.bucket.terms.TermsAggregator;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;

/**
 * Holds the criteria required to guide the exploration of connected terms which
 * can be returned as a graph.
 */
public class GraphExploreRequest extends ActionRequest implements IndicesRequest.Replaceable, ToXContentObject {

    public static final String NO_HOPS_ERROR_MESSAGE = "Graph explore request must have at least one hop";
    public static final String NO_VERTICES_ERROR_MESSAGE = "Graph explore hop must have at least one VertexRequest";
    private String[] indices = Strings.EMPTY_ARRAY;
    private IndicesOptions indicesOptions = IndicesOptions.fromOptions(false, false, true, false);
    private String[] types = Strings.EMPTY_ARRAY;
    private String routing;
    private TimeValue timeout;

    private int sampleSize = SamplerAggregationBuilder.DEFAULT_SHARD_SAMPLE_SIZE;
    private String sampleDiversityField;
    private int maxDocsPerDiversityValue;
    private boolean useSignificance = true;
    private boolean returnDetailedInfo;

    private List hops = new ArrayList<>();

    public GraphExploreRequest() {
    }

    /**
     * Constructs a new graph request to run against the provided indices. No
     * indices means it will run against all indices.
     */
    public GraphExploreRequest(String... indices) {
        this.indices = indices;
    }

    @Override
    public ActionRequestValidationException validate() {
        ActionRequestValidationException validationException = null;
        if (hops.size() == 0) {
            validationException = ValidateActions.addValidationError(NO_HOPS_ERROR_MESSAGE, validationException);
        }
        for (Hop hop : hops) {
            validationException = hop.validate(validationException);
        }
        return validationException;
    }

    @Override
    public String[] indices() {
        return this.indices;
    }

    @Override
    public GraphExploreRequest indices(String... indices) {
        this.indices = indices;
        return this;
    }

    @Override
    public IndicesOptions indicesOptions() {
        return indicesOptions;
    }

    public GraphExploreRequest indicesOptions(IndicesOptions indicesOptions) {
        if (indicesOptions == null) {
            throw new IllegalArgumentException("IndicesOptions must not be null");
        }
        this.indicesOptions = indicesOptions;
        return this;
    }

    /**
     * The document types to execute the explore against. Defaults to be executed against
     * all types.
     *
     * @deprecated Types are in the process of being removed. Instead of using a type, prefer to
     * filter on a field on the document.
     */
    @Deprecated
    public String[] types() {
        return this.types;
    }

    /**
     * The document types to execute the explore request against. Defaults to be executed against
     * all types.
     *
     * @deprecated Types are in the process of being removed. Instead of using a type, prefer to
     * filter on a field on the document.
     */
    @Deprecated
    public GraphExploreRequest types(String... types) {
        this.types = types;
        return this;
    }

    public GraphExploreRequest(StreamInput in) throws IOException {
        super(in);

        indices = in.readStringArray();
        indicesOptions = IndicesOptions.readIndicesOptions(in);
        types = in.readStringArray();
        routing = in.readOptionalString();
        timeout = in.readOptionalTimeValue();
        sampleSize = in.readInt();
        sampleDiversityField = in.readOptionalString();
        maxDocsPerDiversityValue = in.readInt();

        useSignificance = in.readBoolean();
        returnDetailedInfo = in.readBoolean();

        int numHops = in.readInt();
        Hop parentHop = null;
        for (int i = 0; i < numHops; i++) {
            Hop hop = new Hop(parentHop);
            hop.readFrom(in);
            hops.add(hop);
            parentHop = hop;
        }
    }

    public String routing() {
        return this.routing;
    }

    public GraphExploreRequest routing(String routing) {
        this.routing = routing;
        return this;
    }

    public GraphExploreRequest routing(String... routings) {
        this.routing = Strings.arrayToCommaDelimitedString(routings);
        return this;
    }

    public TimeValue timeout() {
        return timeout;
    }

    /**
     * Graph exploration can be set to timeout after the given period. Search
     * operations involved in each hop are limited to the remaining time
     * available but can still overrun due to the nature of their "best efforts"
     * timeout support. When a timeout occurs partial results are returned.
     *
     * @param timeout
     *            a {@link TimeValue} object which determines the maximum length
     *            of time to spend exploring
     */
    public GraphExploreRequest timeout(TimeValue timeout) {
        if (timeout == null) {
            throw new IllegalArgumentException("timeout must not be null");
        }
        this.timeout = timeout;
        return this;
    }

    public GraphExploreRequest timeout(String timeout) {
        timeout(TimeValue.parseTimeValue(timeout, null, getClass().getSimpleName() + ".timeout"));
        return this;
    }

    @Override
    public void writeTo(StreamOutput out) throws IOException {
        super.writeTo(out);
        out.writeStringArray(indices);
        indicesOptions.writeIndicesOptions(out);
        out.writeStringArray(types);
        out.writeOptionalString(routing);
        out.writeOptionalTimeValue(timeout);

        out.writeInt(sampleSize);
        out.writeOptionalString(sampleDiversityField);
        out.writeInt(maxDocsPerDiversityValue);

        out.writeBoolean(useSignificance);
        out.writeBoolean(returnDetailedInfo);
        out.writeInt(hops.size());
        for (Iterator iterator = hops.iterator(); iterator.hasNext();) {
            Hop hop = iterator.next();
            hop.writeTo(out);
        }
    }

    @Override
    public String toString() {
        return "graph explore [" + Arrays.toString(indices) + "][" + Arrays.toString(types) + "]";
    }

    /**
     * The number of top-matching documents that are considered during each hop
     * (default is {@link SamplerAggregationBuilder#DEFAULT_SHARD_SAMPLE_SIZE}
     * Very small values (less than 50) may not provide sufficient
     * weight-of-evidence to identify significant connections between terms.
     * 

* Very large values (many thousands) are not recommended with loosely * defined queries (fuzzy queries or those with many OR clauses). This is * because any useful signals in the best documents are diluted with * irrelevant noise from low-quality matches. Performance is also typically * better with smaller samples as there are less look-ups required for * background frequencies of terms found in the documents *

* * @param maxNumberOfDocsPerHop * shard-level sample size in documents */ public void sampleSize(int maxNumberOfDocsPerHop) { sampleSize = maxNumberOfDocsPerHop; } public int sampleSize() { return sampleSize; } /** * Optional choice of single-value field on which to diversify sampled * search results */ public void sampleDiversityField(String name) { sampleDiversityField = name; } public String sampleDiversityField() { return sampleDiversityField; } /** * Optional number of permitted docs with same value in sampled search * results. Must also declare which field using sampleDiversityField */ public void maxDocsPerDiversityValue(int maxDocs) { this.maxDocsPerDiversityValue = maxDocs; } public int maxDocsPerDiversityValue() { return maxDocsPerDiversityValue; } /** * Controls the choice of algorithm used to select interesting terms. The * default value is true which means terms are selected based on * significance (see the {@link SignificantTerms} aggregation) rather than * popularity (using the {@link TermsAggregator}). * * @param value * true if the significant_terms algorithm should be used. */ public void useSignificance(boolean value) { this.useSignificance = value; } public boolean useSignificance() { return useSignificance; } /** * Return detailed information about vertex frequencies as part of JSON * results - defaults to false * * @param value * true if detailed information is required in JSON responses */ public void returnDetailedInfo(boolean value) { this.returnDetailedInfo = value; } public boolean returnDetailedInfo() { return returnDetailedInfo; } /** * Add a stage in the graph exploration. Each hop represents a stage of * querying elasticsearch to identify terms which can then be connnected to * other terms in a subsequent hop. * * @param guidingQuery * optional choice of query which influences which documents are * considered in this stage * @return a {@link Hop} object that holds settings for a stage in the graph * exploration */ public Hop createNextHop(QueryBuilder guidingQuery) { Hop parent = null; if (hops.size() > 0) { parent = hops.get(hops.size() - 1); } Hop newHop = new Hop(parent); newHop.guidingQuery = guidingQuery; hops.add(newHop); return newHop; } public int getHopNumbers() { return hops.size(); } public Hop getHop(int hopNumber) { return hops.get(hopNumber); } public static class TermBoost { String term; float boost; public TermBoost(String term, float boost) { super(); this.term = term; if (boost <= 0) { throw new IllegalArgumentException("Boosts must be a positive non-zero number"); } this.boost = boost; } TermBoost() { } public String getTerm() { return term; } public float getBoost() { return boost; } void readFrom(StreamInput in) throws IOException { this.term = in.readString(); this.boost = in.readFloat(); } void writeTo(StreamOutput out) throws IOException { out.writeString(term); out.writeFloat(boost); } } @Override public XContentBuilder toXContent(XContentBuilder builder, Params params) throws IOException { builder.startObject(); builder.startObject("controls"); { if (sampleSize != SamplerAggregationBuilder.DEFAULT_SHARD_SAMPLE_SIZE) { builder.field("sample_size", sampleSize); } if (sampleDiversityField != null) { builder.startObject("sample_diversity"); builder.field("field", sampleDiversityField); builder.field("max_docs_per_value", maxDocsPerDiversityValue); builder.endObject(); } builder.field("use_significance", useSignificance); if (returnDetailedInfo) { builder.field("return_detailed_stats", returnDetailedInfo); } } builder.endObject(); for (Hop hop : hops) { if (hop.parentHop != null) { builder.startObject("connections"); } hop.toXContent(builder, params); } for (Hop hop : hops) { if (hop.parentHop != null) { builder.endObject(); } } builder.endObject(); return builder; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy