All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.schema.derived.IndexingScript Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.derived;

import com.yahoo.schema.Schema;
import com.yahoo.schema.document.GeoPos;
import com.yahoo.schema.document.ImmutableSDField;
import com.yahoo.vespa.configdefinition.IlscriptsConfig;
import com.yahoo.vespa.configdefinition.IlscriptsConfig.Ilscript.Builder;
import com.yahoo.vespa.indexinglanguage.ExpressionConverter;
import com.yahoo.vespa.indexinglanguage.ExpressionVisitor;
import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ClearStateExpression;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
import com.yahoo.vespa.indexinglanguage.expressions.GuardExpression;
import com.yahoo.vespa.indexinglanguage.expressions.InputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.OutputExpression;
import com.yahoo.vespa.indexinglanguage.expressions.PassthroughExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.SetLanguageExpression;
import com.yahoo.vespa.indexinglanguage.expressions.StatementExpression;
import com.yahoo.vespa.indexinglanguage.expressions.TokenizeExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ZCurveExpression;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;

/**
 * An indexing language script derived from a schema. An indexing script contains a set of indexing
 * statements, organized in a composite structure of indexing code snippets.
 *
 * @author bratseth
 */
public final class IndexingScript extends Derived {

    private final List docFields = new ArrayList<>();
    private final List expressions = new ArrayList<>();
    private List fieldsSettingLanguage;
    private final boolean isStreaming;

    public IndexingScript(Schema schema, boolean isStreaming) {
        this.isStreaming = isStreaming;
        derive(schema);
    }

    @Override
    protected void derive(Schema schema) {
        fieldsSettingLanguage = fieldsSettingLanguage(schema);
        if (fieldsSettingLanguage.size() == 1) // Assume this language should be used for all fields
            addExpression(fieldsSettingLanguage.get(0).getIndexingScript());
        super.derive(schema);
    }

    @Override
    protected void derive(ImmutableSDField field, Schema schema) {
        if (field.isImportedField()) return;

        if (field.hasFullIndexingDocprocRights())
            docFields.add(field.getName());

        if (field.usesStructOrMap() && ! GeoPos.isAnyPos(field)) return; // unsupported
        if (fieldsSettingLanguage.size() == 1 && fieldsSettingLanguage.get(0).equals(field)) return; // Already added

        addExpression(field.getIndexingScript());
    }

    private void addExpression(ScriptExpression expression) {
        if ( expression.isEmpty()) return;
        expressions.add(new StatementExpression(new ClearStateExpression(), new GuardExpression(expression)));
    }

    private List fieldsSettingLanguage(Schema schema) {
        return schema.allFieldsList().stream()
                     .filter(field -> ! field.isImportedField())
                     .filter(field -> field.containsExpression(SetLanguageExpression.class))
                     .toList();
    }

    public Iterable expressions() {
        return Collections.unmodifiableCollection(expressions);
    }

    @Override
    public String getDerivedName() {
        return "ilscripts";
    }

    public void getConfig(IlscriptsConfig.Builder configBuilder) {
        // Append
        IlscriptsConfig.Ilscript.Builder ilscriptBuilder = new IlscriptsConfig.Ilscript.Builder();
        ilscriptBuilder.doctype(getName());
        ilscriptBuilder.docfield(docFields);
        addContentInOrder(ilscriptBuilder);
        configBuilder.ilscript(ilscriptBuilder);
    }

    public void export(String toDirectory) throws IOException {
        var builder = new IlscriptsConfig.Builder();
        getConfig(builder);
        export(toDirectory, builder.build());
    }

    private static class DropTokenize extends ExpressionConverter {
        @Override
        protected boolean shouldConvert(Expression exp) {
            return exp instanceof TokenizeExpression;
        }

        @Override
        protected Expression doConvert(Expression exp) {
            return null;
        }
    }

    // for streaming, drop zcurve conversion to attribute with suffix
    private static class DropZcurve extends ExpressionConverter {
        private static final String zSuffix = "_zcurve";
        private static final int zSuffixLen = zSuffix.length();
        private boolean seenZcurve = false;

        @Override
        protected boolean shouldConvert(Expression exp) {
            if (exp instanceof ZCurveExpression) {
                seenZcurve = true;
                return true;
            }
            if (seenZcurve && exp instanceof AttributeExpression attrExp) {
                return attrExp.getFieldName().endsWith(zSuffix);
            }
            return false;
        }

        @Override
        protected Expression doConvert(Expression exp) {
            if (exp instanceof ZCurveExpression) {
                return null;
            }
            if (exp instanceof AttributeExpression attrExp) {
                String orig = attrExp.getFieldName();
                int len = orig.length();
                if (len > zSuffixLen && orig.endsWith(zSuffix)) {
                    String fieldName = orig.substring(0, len - zSuffixLen);
                    var result = new AttributeExpression(fieldName);
                    return result;
                }
            }
            return exp;
        }
    }

    private void addContentInOrder(IlscriptsConfig.Ilscript.Builder ilscriptBuilder) {
        ArrayList later = new ArrayList<>();
        Set touchedFields = new HashSet<>();
        for (Expression expression : expressions) {
            if (isStreaming) {
                expression = expression.convertChildren(new DropTokenize());
                expression = expression.convertChildren(new DropZcurve());
            }
            if (modifiesSelf(expression) && ! setsLanguage(expression)) {
                later.add(expression);
            } else {
                ilscriptBuilder.content(expression.toString());
            }

            FieldScanVisitor fieldFetcher = new FieldScanVisitor();
            fieldFetcher.visit(expression);
            touchedFields.addAll(fieldFetcher.touchedFields());
        }
        for (Expression exp : later) {
            ilscriptBuilder.content(exp.toString());
        }
        generateSyntheticStatementsForUntouchedFields(ilscriptBuilder, touchedFields);
    }

    private void generateSyntheticStatementsForUntouchedFields(Builder ilscriptBuilder, Set touchedFields) {
        Set fieldsWithSyntheticStatements = new HashSet<>(docFields);
        fieldsWithSyntheticStatements.removeAll(touchedFields);
        List orderedFields = new ArrayList<>(fieldsWithSyntheticStatements);
        Collections.sort(orderedFields);
        for (String fieldName : orderedFields) {
            StatementExpression copyField = new StatementExpression(new InputExpression(fieldName),
                    new PassthroughExpression(fieldName));
            ilscriptBuilder.content(copyField.toString());
        }
    }

    private boolean setsLanguage(Expression expression) {
        SetsLanguageVisitor visitor = new SetsLanguageVisitor();
        visitor.visit(expression);
        return visitor.setsLanguage;
    }

    private boolean modifiesSelf(Expression expression) {
        ModifiesSelfVisitor visitor = new ModifiesSelfVisitor();
        visitor.visit(expression);
        return visitor.modifiesSelf();
    }

    private static class ModifiesSelfVisitor extends ExpressionVisitor {

        private String inputField = null;
        private String outputField = null;

        public boolean modifiesSelf() { return outputField != null && outputField.equals(inputField); }

        @Override
        protected void doVisit(Expression expression) {
            if (modifiesSelf()) return;

            if (expression instanceof InputExpression) {
                inputField = ((InputExpression) expression).getFieldName();
            }
            if (expression instanceof OutputExpression) {
                outputField = ((OutputExpression) expression).getFieldName();
            }
        }
    }

    private static class SetsLanguageVisitor extends ExpressionVisitor {

        boolean setsLanguage = false;

        @Override
        protected void doVisit(Expression expression) {
            if (expression instanceof SetLanguageExpression)
                setsLanguage = true;
        }

    }

    private static class FieldScanVisitor extends ExpressionVisitor {
        List touchedFields = new ArrayList<>();
        List candidates = new ArrayList<>();

        @Override
        protected void doVisit(Expression exp) {
            if (exp instanceof OutputExpression) {
                touchedFields.add(((OutputExpression) exp).getFieldName());
            }
            if (exp instanceof InputExpression) {
                candidates.add(((InputExpression) exp).getFieldName());
            }
            if (exp instanceof ZCurveExpression) {
                touchedFields.addAll(candidates);
            }
        }

        Collection touchedFields() {
            Collection output = touchedFields;
            touchedFields = null; // deny re-use to try and avoid obvious bugs
            return output;
        }
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy