All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.schema.document.SDField Maven / Gradle / Ivy

// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.document;

import com.yahoo.document.CollectionDataType;
import com.yahoo.document.DataType;
import com.yahoo.document.DocumentType;
import com.yahoo.document.Field;
import com.yahoo.document.MapDataType;
import com.yahoo.document.StructDataType;
import com.yahoo.document.TensorDataType;
import com.yahoo.document.WeightedSetDataType;
import com.yahoo.documentmodel.OwnedTemporaryType;
import com.yahoo.documentmodel.TemporaryUnknownType;
import com.yahoo.language.Linguistics;
import com.yahoo.language.process.Embedder;
import com.yahoo.language.simple.SimpleLinguistics;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
import com.yahoo.tensor.TensorType;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.indexinglanguage.ExpressionSearcher;
import com.yahoo.vespa.indexinglanguage.ExpressionVisitor;
import com.yahoo.vespa.indexinglanguage.ScriptParserContext;
import com.yahoo.vespa.indexinglanguage.expressions.AttributeExpression;
import com.yahoo.vespa.indexinglanguage.expressions.Expression;
import com.yahoo.vespa.indexinglanguage.expressions.IndexExpression;
import com.yahoo.vespa.indexinglanguage.expressions.LowerCaseExpression;
import com.yahoo.vespa.indexinglanguage.expressions.ScriptExpression;
import com.yahoo.vespa.indexinglanguage.expressions.SummaryExpression;
import com.yahoo.vespa.indexinglanguage.parser.IndexingInput;
import com.yahoo.vespa.indexinglanguage.parser.ParseException;

import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.TreeMap;


/**
 * The field class represents a document field. It is used in
 * the Document class to get and set fields. Each SDField has a name, a numeric ID,
 * a data type. The numeric ID is used when the fields are stored
 * in serialized form.
 *
 * @author bratseth
 */
public class SDField extends Field implements ImmutableSDField {

    /** Use this field for modifying index-structure, even if it doesn't have any indexing code */
    private boolean indexStructureField = false;

    /** The indexing statements to be applied to this value during indexing */
    private ScriptExpression indexingScript = new ScriptExpression();

    /** The default rank type for indices of this field */
    private RankType rankType = RankType.DEFAULT;

    /** Rank settings in a "rank" block for the field. */
    private final Ranking ranking = new Ranking();

    /**
     * The literal boost of this field. This boost is added to a rank score
     * when a query term matched as query term exactly (unnormalized and unstemmed).
     * Non-positive boosts causes no boosting, 0 allows boosts
     * to be specified in other rank profiles, while negative values
     * turns the capability off.
     */
    private int literalBoost = -1;

    /** 
     * The weight of this field. This is a percentage,
     * so 100 is default to provide the identity transform. 
     */
    private int weight = 100;

    /**
     * Indicates what kind of matching should be done on this field
     */
    private Matching matching = new Matching();

    private Dictionary dictionary = null;

    /** Attribute settings, or null if there are none */
    private final Map attributes = new TreeMap<>();

    /**
     * The stemming setting of this field, or null to use the default.
     * Default is determined by the owning search definition.
     */
    private Stemming stemming = null;

    /** How content of this field should be accent normalized etc. */
    private NormalizeLevel normalizing = new NormalizeLevel();

    /** Extra query commands of this field */
    private final List queryCommands = new java.util.ArrayList<>(0);

    /** Summary fields defined in this field */
    private final Map summaryFields = new java.util.LinkedHashMap<>(0);

    /** The explicitly index settings on this field */
    private final Map indices = new java.util.LinkedHashMap<>();

    private boolean idOverride = false;

    /** Struct fields defined in this field */
    private final Map structFields = new java.util.LinkedHashMap<>(0);

    /** The document that this field was declared in, or null */
    private final SDDocumentType repoDocType;

    /** The aliases declared for this field. May pertain to indexes or attributes */
    private final Map aliasToName = new HashMap<>();

    private boolean isExtraField = false;

    private boolean wasConfiguredToDoAttributing = false;
    private boolean wasConfiguredToDoIndexing = false;

    /**
     * Creates a new field. This method is only used to create reserved fields.
     *
     * @param name the name of the field
     * @param dataType the datatype of the field
    */
    public SDField(SDDocumentType repo, String name, int id, DataType dataType) {
        super(name, id, dataType);
        this.repoDocType = repo;
        populate(name, dataType);
    }

    public SDField(String name, DataType dataType) {
        this(null, name, dataType);
    }

    /** Creates a new field */
    public SDField(SDDocumentType repo, String name, DataType dataType) {
        this(repo, name, dataType, null);
    }

    /** Creates a new field */
    protected SDField(SDDocumentType repo, String name, DataType dataType, SDDocumentType owner) {
        this(repo, name, dataType, owner, null, 0);
    }

    /**
     * Creates a new field
     *
     * @param name the name of the field
     * @param dataType the datatype of the field
     * @param owner the owning document (used to check for id collisions)
     * @param fieldMatching the matching object to set for the field
     */
    protected SDField(SDDocumentType repo,
                      String name,
                      DataType dataType,
                      SDDocumentType owner,
                      Matching fieldMatching,
                      int recursion)
    {
        super(name, dataType, owner == null ? null : owner.getDocumentType());
        this.repoDocType = repo;
        this.structFieldDepth = recursion;
        if (fieldMatching != null)
            this.setMatching(fieldMatching);
        populate(name, dataType);
    }

    private int structFieldDepth = 0;

    private void populate(String name, DataType dataType) {
        if (dataType instanceof TensorDataType) {
            TensorType type = ((TensorDataType)dataType).getTensorType();
            if (type.dimensions().stream().anyMatch(d -> d.isIndexed() && d.size().isEmpty()))
                throw new IllegalArgumentException("Illegal type in field " + name + " type " + type +
                                                   ": Dense tensor dimensions must have a size");
            addQueryCommand("type " + type);
        }
        else if (dataType instanceof WeightedSetDataType) {
            var nested = ((WeightedSetDataType) dataType).getNestedType().getName();
            addQueryCommand("type WeightedSet<" + nested + ">");
        }
        else {
            addQueryCommand("type " + dataType.getName());
        }
    }

    public void setIsExtraField(boolean isExtra) {
        isExtraField = isExtra;
    }

    @Override
    public boolean isExtraField() {
        return isExtraField;
    }

    public boolean isDocumentField() { return ! isExtraField; }

    @Override
    public boolean isImportedField() {
        return false;
    }

    @Override
    public boolean doesAttributing() {
        return containsExpression(AttributeExpression.class);
    }

    @Override
    public boolean doesIndexing() {
        return containsExpression(IndexExpression.class);
    }

    public boolean doesSummarying() {
        if (usesStruct()) {
            for (SDField structField : getStructFields()) {
                if (structField.doesSummarying()) {
                    return true;
                }
            }
        }
        return containsExpression(SummaryExpression.class);
    }

    @Override
    public boolean doesLowerCasing() {
        return containsExpression(LowerCaseExpression.class);
    }

    @Override
    public  boolean containsExpression(Class searchFor) {
        return findExpression(searchFor) != null;
    }

    private  T findExpression(Class searchFor) {
        return new ExpressionSearcher<>(searchFor).searchIn(indexingScript);
    }

    public void addSummaryFieldSources(SummaryField summaryField) {
        if (usesStruct()) {
            /*
             * How this works for structs: When at least one sub-field in a struct is to
             * be used for summary, that whole struct field is included in summary.cfg. Then,
             * vsmsummary.cfg specifies the sub-fields used for each struct field.
             * So we recurse into each struct, adding the destination classes set for each sub-field
             * to the main summary-field for the struct field.
             */
            for (SDField structField : getStructFields()) {
                for (SummaryField sumF : structField.getSummaryFields().values()) {
                    for (String dest : sumF.getDestinations()) {
                        summaryField.addDestination(dest);
                    }
                }
                structField.addSummaryFieldSources(summaryField);
            }
        } else {
            if (doesSummarying()) {
                summaryField.addSource(getName());
            }
        }
    }

    private boolean doneStructFields = false;

    private void actuallyMakeStructFields() {
        if (doneStructFields) return;
        if (getFirstStructOrMapRecursive() == null) {
            doneStructFields = true;
            return;
        }
        var sdoc = repoDocType;
        var dataType = getDataType();

        java.util.function.BiConsumer supplyStructField = (fieldName, fieldType) -> {
            if (structFields.containsKey(fieldName)) return;
            Matching subFieldMatching = new Matching();
            subFieldMatching.merge(this.matching);
            String subName = getName().concat(".").concat(fieldName);
            var subField = new SDField(sdoc, subName, fieldType, null,
                                       subFieldMatching, structFieldDepth + 1);
            structFields.put(fieldName, subField);
        };

        if (dataType instanceof MapDataType mdt) {
            supplyStructField.accept("key", mdt.getKeyType());
            supplyStructField.accept("value", mdt.getValueType());
        } else {
            if (structFieldDepth >= 10) {
                // too risky, infinite recursion
                doneStructFields = true;
                return;
            }
            if (dataType instanceof CollectionDataType) {
                dataType = ((CollectionDataType)dataType).getNestedType();
            }
            if ((dataType instanceof MapDataType) || (dataType instanceof CollectionDataType)) {
                // "array of map" or "array of array" will not have any struct fields
                // TODO: consider what this would mean
                doneStructFields = true;
                return;
            }
            SDDocumentType subType = sdoc != null ? sdoc.getType(dataType.getName()) : null;
            if (dataType instanceof TemporaryUnknownType && subType != null) {
                for (Field field : subType.fieldSet()) {
                    supplyStructField.accept(field.getName(), field.getDataType());
                }
            } else if (dataType instanceof OwnedTemporaryType && subType != null) {
                for (Field field : subType.fieldSet()) {
                    supplyStructField.accept(field.getName(), field.getDataType());
                }
            } else if (dataType instanceof StructDataType sdt) {
                for (Field field : sdt.getFields()) {
                    supplyStructField.accept(field.getName(), field.getDataType());
                }
            }
            if ((subType == null) && (!structFields.isEmpty())) {
                throw new IllegalArgumentException("Cannot find matching (repo=" + sdoc + ") for subfields in "
                                                   + this + " [" + getDataType() + getDataType().getClass() +
                                                   "] with " + structFields.size() + " struct fields");
            }
            // populate struct fields with matching
            if (subType != null) {
                for (Field f : subType.fieldSet()) {
                    if (f instanceof SDField sdField) {
                        SDField subField = structFields.get(sdField.getName());
                        if (subField != null) {
                            // we just made this with a copy of our matching (see above)
                            Matching subFieldMatching = subField.getMatching();
                            subFieldMatching.merge(sdField.getMatching());
                            subField.setMatching(subFieldMatching);
                        }
                    } else {
                        throw new IllegalArgumentException("Field in struct is not SDField " + f.getName());
                    }
                }
            }
            // else ("missing subtype for struct fields in: " + this + " type " + getDataType() + " [" + getDataType().getClass().getSimpleName() + "]");
        }
        doneStructFields = true;
    }

    public void setId(int fieldId, DocumentType owner) {
        super.setId(fieldId, owner);
        idOverride = true;
    }

    public StructDataType getFirstStructRecursive() {
        DataType dataType = getDataType();
        while (true) { // Currently no nesting of collections
            if (dataType instanceof CollectionDataType) {
                dataType = ((CollectionDataType)dataType).getNestedType();
            } else if (dataType instanceof MapDataType) {
                dataType = ((MapDataType)dataType).getValueType();
            } else {
                break;
            }
        }
        return (dataType instanceof StructDataType) ? (StructDataType)dataType : null;
    }

    private DataType getFirstStructOrMapRecursive() {
        DataType dataType = getDataType();
        while (dataType instanceof CollectionDataType) { // Currently no nesting of collections
            dataType = ((CollectionDataType)dataType).getNestedType();
        }
        return (dataType instanceof StructDataType || dataType instanceof MapDataType) ? dataType : null;
    }

    private boolean usesStruct() {
        DataType dt = getFirstStructRecursive();
        return (dt != null);
    }

    @Override
    public boolean usesStructOrMap() {
        DataType dt = getFirstStructOrMapRecursive();
        return (dt != null);
    }

    @Override
    public boolean wasConfiguredToDoAttributing() {
        return wasConfiguredToDoAttributing;
    }

    @Override
    public boolean wasConfiguredToDoIndexing() {
        return wasConfiguredToDoIndexing;
    }

    @Override
    public boolean hasSingleAttribute() {
        if (getAttributes().size() != 1) {
            return false;
        }
        return (getAttributes().get(getName()) != null);
    }

    /** Parse an indexing expression which will use the simple linguistics implementation suitable for testing */
    public void parseIndexingScript(String schemaName, String script) {
        parseIndexingScript(schemaName, script, new SimpleLinguistics(), Embedder.throwsOnUse.asMap());
    }

    public void parseIndexingScript(String schemaName, String script, Linguistics linguistics, Map embedders) {
        try {
            ScriptParserContext config = new ScriptParserContext(linguistics, embedders);
            config.setInputStream(new IndexingInput(script));
            setIndexingScript(schemaName, ScriptExpression.newInstance(config));
        } catch (ParseException e) {
            throw new IllegalArgumentException("Failed to parse script '" + script + "'", e);
        }
    }

    /** Sets the indexing script of this, or null to not use a script */

    public void setIndexingScript(String schemaName, ScriptExpression exp) {
        if (exp == null) {
            exp = new ScriptExpression();
        }
        indexingScript = exp;
        if (indexingScript.isEmpty()) {
            return; // TODO: This causes empty expressions not to be propagate to struct fields!! BAD BAD BAD!!
        }
        if (!wasConfiguredToDoAttributing()) {
            wasConfiguredToDoAttributing = doesAttributing();
        }
        if (!wasConfiguredToDoIndexing()) {
            wasConfiguredToDoIndexing = doesIndexing();
        }
        if (!usesStructOrMap()) {
            new ExpressionVisitor() {

                @Override
                protected void doVisit(Expression exp) {
                    if (!(exp instanceof AttributeExpression)) {
                        return;
                    }
                    String fieldName = ((AttributeExpression)exp).getFieldName();
                    if (fieldName == null) {
                        fieldName = getName();
                    }
                    Attribute attribute = attributes.get(fieldName);
                    if (attribute == null) {
                        addAttribute(new Attribute(schemaName, fieldName, fieldName, getDataType()));
                    }
                }
            }.visit(indexingScript);
        }
        for (SDField structField : getStructFields()) {
            structField.setIndexingScript(schemaName, exp);
        }
    }

    @Override
    public ScriptExpression getIndexingScript() { return indexingScript; }

    @SuppressWarnings("deprecation")
    @Override
    public void setDataType(DataType type) {
        if (type.equals(DataType.URI)) { // Different defaults, naturally
            normalizing.inferLowercase();
            stemming = Stemming.NONE;
        }
        this.dataType = type;
        if ( ! idOverride) {
            this.fieldId = calculateIdV7(null);
        }
    }

    @Override
    public boolean isIndexStructureField() {
        return indexStructureField;
    }

    public void setIndexStructureField(boolean indexStructureField) {
        this.indexStructureField = indexStructureField;
    }

    @Override
    public boolean hasIndex() {
        return (getIndexingScript() != null) && doesIndexing();
    }

    /** Sets the literal boost of this field */
    public void setLiteralBoost(int literalBoost) { this.literalBoost=literalBoost; }

    /**
     * Returns the literal boost of this field. This boost is added to a literal score
     * when a query term matched as query term exactly (unnormalized and unstemmed).
     * Default is non-positive.
     */
    @Override
    public int getLiteralBoost() { return literalBoost; }

    /** Sets the weight of this field */
    public void setWeight(int weight) { this.weight=weight; }

    /** Returns the weight of this field, or 0 if nothing is set */
    @Override
    public int getWeight() { return weight; }

    /**
     * Returns what kind of matching type should be applied.
     */
    @Override
    public Matching getMatching() { return matching; }

    /**
     * Sets what kind of matching type should be applied.
     * (Token matching is default, PREFIX, SUBSTRING, SUFFIX are alternatives)
     */
    public void setMatching(Matching matching) { this.matching=matching; }

    /**
     * Returns Dictionary settings.
     */
    public Dictionary getDictionary() { return dictionary; }
    public Dictionary getOrSetDictionary() {
        if (dictionary == null) {
            dictionary = new Dictionary();
        }
        return dictionary;
    }

    /**
     * Set the matching type for this field and all subfields.
     */
    // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right
    //       Matching object for struct fields at lookup time instead.
    public void setMatchingType(MatchType type) {
        this.getMatching().setType(type);
        for (SDField structField : getStructFields()) {
            structField.setMatchingType(type);
        }
    }

    /**
     * Set the matching type for this field and all subfields.
     */
    // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right
    //       Matching object for struct fields at lookup time instead.
    public void setMatchingCase(Case casing) {
        this.getMatching().setCase(casing);
        for (SDField structField : getStructFields()) {
            structField.setMatchingCase(casing);
        }
    }
    /**
     * Set matching algorithm for this field and all subfields.
     */
    // TODO: When this is not the same as getMatching().setthis we have a potential for inconsistency. Find the right
    //       Matching object for struct fields at lookup time instead.
    public void setMatchingAlgorithm(MatchAlgorithm algorithm) {
        this.getMatching().setAlgorithm(algorithm);
        for (SDField structField : getStructFields()) {
            structField.getMatching().setAlgorithm(algorithm);
        }
    }

    /** Adds an explicit index defined in this field */
    public Index addIndex(Index index) {
        indices.put(index.getName(), index);
        return index;
    }

    /**
     * Returns an index, or null if no index with this name has had
     * some explicit settings applied in this field (even if this returns null,
     * the index may be implicitly defined by an indexing statement)
     */
    @Override
    public Index getIndex(String name) {
        return indices.get(name);
    }

    /**
     * Returns an index if this field has one (implicitly or
     * explicitly) targeting the given name.
     */
    @Override
    public boolean existsIndex(String name) {
        if (indices.get(name) != null) return true;
        return name.equals(getName()) && doesIndexing();
    }

    /**
     * Defined indices on this field
     * @return defined indices on this
     */
    @Override
    public Map getIndices() {
        return indices;
    }

    /**
     * Sets the default rank type of this fields indices, and sets this rank type
     * to all indices explicitly defined here which has no index set.
     * (This complex behavior is dues to the fact than we would prefer to have rank types
     * per field, not per index)
     */
    public void setRankType(RankType rankType) {
        this.rankType = rankType;
        for (Index index : getIndices().values()) {
            if (index.getRankType() == null)
                index.setRankType(rankType);
        }

    }

    /** Returns the rank settings set in a "rank" block for this field. This is never null. */
    @Override
    public Ranking getRanking() { return ranking; }

    /** Returns the default rank type of indices of this field, or null if nothing is set */
    @Override
    public RankType getRankType() { return this.rankType; }

    /** Returns the search-time attribute settings of this field or null if none is set. */
     // TODO: Make unmodifiable
    @Override
    public Map getAttributes() { return attributes; }

    public Attribute getAttribute() {
        return attributes.get(getName());
    }

    public Attribute addAttribute(Attribute attribute) {
        String name = attribute.getName();
        if (name == null || name.isEmpty()) {
            name = getName();
            attribute.setName(name);
        }
        attributes.put(attribute.getName(),attribute);
        return attribute;
    }

    /**
     * Returns the stemming setting of this field.
     * Default is determined by the owning schema.
     *
     * @return the stemming setting of this, or null, to use the default
     */
    @Override
    public Stemming getStemming() { return stemming; }

    /**
     * Whether this field should be stemmed in this schema
     */
    @Override
    public Stemming getStemming(Schema schema) {
        if (stemming != null)
            return stemming;
        else
            return schema.getStemming();
    }

    @Override
    public Field asField() {
        return this;
    }

    /**
     * Sets how this field should be stemmed, or set to null to use the default.
     */
    public void setStemming(Stemming stemming) {
        this.stemming = stemming;
    }

    /** Returns an unmodifiable map of the summary fields defined in this */
    @Override
    public Map getSummaryFields() {
        return Collections.unmodifiableMap(summaryFields);
    }

    public void removeSummaryFields() {
        summaryFields.clear();
    }

    /** Adds a summary field */
    public void addSummaryField(SummaryField summaryField) {
        summaryFields.put(summaryField.getName(),summaryField);
    }

    /**
     * Returns a summary field defined (implicitly or explicitly) by this field.
     * Returns null if there is no such summary field defined.
     */
    @Override
    public SummaryField getSummaryField(String name) {
        return summaryFields.get(name);
    }

    /**
     * Returns a summary field defined (implicitly or explicitly) by this field.
     *
     * @param create true to create the summary field and add it to this field before returning if it is missing
     * @return the summary field, or null if not present and create is false
     */
    public SummaryField getSummaryField(String name, boolean create) {
        SummaryField summaryField=summaryFields.get(name);
        if (summaryField==null && create) {
            summaryField=new SummaryField(name, getDataType());
            addSummaryField(summaryField);
        }
        return summaryFields.get(name);
    }

    /** Returns list of static struct fields */
    @Override
    public Collection getStructFields() {
        actuallyMakeStructFields();
        return structFields.values();
    }

    /**
     * Returns a struct field defined in this field,
     * potentially traversing into nested structs.
     * Returns null if there is no such struct field defined.
     */
    @Override
    public SDField getStructField(String name) {
        actuallyMakeStructFields();
        if (name.contains(".")) {
            String superFieldName = name.substring(0,name.indexOf("."));
            String subFieldName = name.substring(name.indexOf(".")+1);
            SDField superField = structFields.get(superFieldName);
            if (superField != null) {
                return superField.getStructField(subFieldName);
            }
            return null;
        }
        return structFields.get(name);
    }

    /**
     * Returns how the content of this field should be accent normalized etc
     */
    @Override
    public NormalizeLevel getNormalizing() { return normalizing; }

    /**
     * Change how the content of this field should be accent normalized etc
     */
    public void setNormalizing(NormalizeLevel level) { normalizing = level; }

    public void addQueryCommand(String name) {
       queryCommands.add(name);
    }

    public boolean hasQueryCommand(String name) {
        return queryCommands.contains(name);
    }

    /** Returns a list of query commands */
    @Override
    public List getQueryCommands() { return queryCommands; }

    @Override
    public boolean equals(Object other) {
        if ( ! (other instanceof SDField)) return false;
        return super.equals(other);
    }

    @Override
    public int hashCode() {
        return getName().hashCode();
    }

    @Override
    public String toString() {
        return "field '" + getName() + "'";
    }

    /** The aliases declared for this field */
    @Override
    public Map getAliasToName() {
        return aliasToName;
    }

    @Override
    public boolean hasFullIndexingDocprocRights() {
        Attribute self = getAttributes().get(getName());
        return (!isExtraField() || ((self != null) && self.isMutable()));
    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy