All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.schema.processing.PredicateProcessor Maven / Gradle / Ivy

There is a newer version: 8.458.13
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.processing;

import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.schema.RankProfileRegistry;
import com.yahoo.document.DataType;
import com.yahoo.document.datatypes.IntegerFieldValue;
import com.yahoo.document.datatypes.LongFieldValue;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
import com.yahoo.schema.document.BooleanIndexDefinition;
import com.yahoo.schema.document.SDField;
import com.yahoo.vespa.documentmodel.DocumentSummary;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.expressions.ConstantExpression;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
import com.yahoo.vespa.indexinglanguage.expressions.OptimizePredicateExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.SetVarExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.model.container.search.QueryProfiles;

import static com.yahoo.prelude.fastsearch.VespaBackend.SORTABLE_ATTRIBUTES_SUMMARY_CLASS;

import java.util.ArrayList;
import java.util.List;

/**
 * Validates the predicate fields.
 *
 * @author Lester Solbakken
 */
public class PredicateProcessor extends Processor {

    public PredicateProcessor(Schema schema, DeployLogger deployLogger, RankProfileRegistry rankProfileRegistry, QueryProfiles queryProfiles) {
        super(schema, deployLogger, rankProfileRegistry, queryProfiles);
    }

    @Override
    public void process(boolean validate, boolean documentsOnly) {
        for (SDField field : schema.allConcreteFields()) {
            if (field.getDataType() == DataType.PREDICATE) {
                if (validate && field.doesIndexing()) {
                    fail(schema, field, "Use 'attribute' instead of 'index'.");
                }
                if (field.doesAttributing()) {
                    Attribute attribute = field.getAttributes().get(field.getName());
                    for (Index index : field.getIndices().values()) {
                        BooleanIndexDefinition booleanDefinition = index.getBooleanIndexDefiniton();
                        if (validate && (booleanDefinition == null ||  ! booleanDefinition.hasArity())) {
                            fail(schema, field, "Missing arity value in predicate field.");
                        }
                        if (validate && (booleanDefinition.getArity() < 2)) {
                            fail(schema, field, "Invalid arity value in predicate field, must be greater than 1.");
                        }
                        double threshold = booleanDefinition.getDensePostingListThreshold();
                        if (validate && (threshold <= 0 || threshold > 1)) {
                            fail(schema, field, "Invalid dense-posting-list-threshold value in predicate field. " +
                                                "Value must be in range (0..1].");
                        }

                        attribute.setArity(booleanDefinition.getArity());
                        attribute.setLowerBound(booleanDefinition.getLowerBound());
                        attribute.setUpperBound(booleanDefinition.getUpperBound());

                        attribute.setDensePostingListThreshold(threshold);
                        addPredicateOptimizationIlScript(field, booleanDefinition);
                    }
                    DocumentSummary summary = schema.getSummariesInThis().get(SORTABLE_ATTRIBUTES_SUMMARY_CLASS);
                    if (summary != null) {
                        summary.remove(attribute.getName());
                    }
                    for (SummaryField summaryField : schema.getSummaryFields(field)) {
                        summaryField.setTransform(SummaryTransform.NONE);
                    }
                }
            } else if (validate && field.getDataType().getPrimitiveType() == DataType.PREDICATE) {
                fail(schema, field, "Collections of predicates are not allowed.");
            } else if (validate && field.getDataType() == DataType.RAW && field.doesIndexing()) {
                fail(schema, field, "Indexing of RAW fields is not supported.");
            } else if (validate) {
                // if field is not a predicate, disallow predicate-related index parameters
                for (Index index : field.getIndices().values()) {
                    if (index.getBooleanIndexDefiniton() != null) {
                        BooleanIndexDefinition def = index.getBooleanIndexDefiniton();
                        if (def.hasArity()) {
                            fail(schema, field, "Arity parameter is used only for predicate type fields.");
                        } else if (def.hasLowerBound() || def.hasUpperBound()) {
                            fail(schema, field, "Parameters lower-bound and upper-bound are used only for predicate type fields.");
                        } else if (def.hasDensePostingListThreshold()) {
                            fail(schema, field, "Parameter dense-posting-list-threshold is used only for predicate type fields.");
                        }
                    }
                }
            }
        }
    }

    private void addPredicateOptimizationIlScript(SDField field, BooleanIndexDefinition booleanIndexDefiniton) {
        Expression script = field.getIndexingScript();
        if (script == null) return;

        script = new StatementExpression(makeSetPredicateVariablesScript(booleanIndexDefiniton), script);

        ExpressionConverter converter = new PredicateOutputTransformer(schema);
        field.setIndexingScript(schema.getName(), new ScriptExpression((StatementExpression)converter.convert(script)));
    }

    private Expression makeSetPredicateVariablesScript(BooleanIndexDefinition options) {
        List expressions = new ArrayList<>();
        expressions.add(new ConstantExpression(new IntegerFieldValue(options.getArity())));
        expressions.add(new SetVarExpression("arity"));
        if (options.hasLowerBound()) {
            expressions.add(new ConstantExpression(new LongFieldValue(options.getLowerBound())));
            expressions.add(new SetVarExpression("lower_bound"));
        }
        if (options.hasUpperBound()) {
            expressions.add(new ConstantExpression(new LongFieldValue(options.getUpperBound())));
            expressions.add(new SetVarExpression("upper_bound"));
        }
        return new StatementExpression(expressions);
    }

    private static class PredicateOutputTransformer extends TypedTransformProvider {

        PredicateOutputTransformer(Schema schema) {
            super(OptimizePredicateExpression.class, schema);
        }

        @Override
        protected boolean requiresTransform(Expression exp, DataType fieldType) {
            return exp instanceof OutputExpression && fieldType == DataType.PREDICATE;
        }

        @Override
        protected Expression newTransform(DataType fieldType) {
            return new OptimizePredicateExpression();
        }

    }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy