All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.yahoo.schema.parser.ConvertParsedFields Maven / Gradle / Ivy

There is a newer version: 8.441.21
Show newest version
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.parser;

import com.yahoo.document.DataType;
import com.yahoo.schema.document.GeoPos;
import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
import com.yahoo.schema.document.BooleanIndexDefinition;
import com.yahoo.schema.document.Case;
import com.yahoo.schema.document.Dictionary;
import com.yahoo.schema.document.NormalizeLevel;
import com.yahoo.schema.document.RankType;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.schema.document.Sorting;
import com.yahoo.schema.document.annotation.SDAnnotationType;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;

import java.util.Locale;
import java.util.Map;
import java.util.logging.Level;

/**
 * Helper for converting ParsedField etc to SDField with settings
 *
 * @author arnej27959
 **/
public class ConvertParsedFields {

    private final TypeResolver context;
    private final Map structProxies;
    
    ConvertParsedFields(TypeResolver context, Map structProxies) {
        this.context = context;
        this.structProxies = structProxies;
    }

    static void convertMatchSettings(SDField field, ParsedMatchSettings parsed) {
        parsed.getMatchType().ifPresent(matchingType -> field.setMatchingType(matchingType));
        parsed.getMatchCase().ifPresent(casing -> field.setMatchingCase(casing));
        parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize));
        parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength));
        parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences));
        parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength));
        parsed.getMatchAlgorithm().ifPresent
            (matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm));
        parsed.getExactTerminator().ifPresent
            (exactMatchTerminator -> field.getMatching().setExactMatchTerminator(exactMatchTerminator));
    }

    void convertSorting(Schema schema, SDField field, ParsedSorting parsed, String name) {
        Attribute attribute = field.getAttributes().get(name);
        if (attribute == null) {
            attribute = new Attribute(schema.getName(), field.getName(), name, field.getDataType());
            field.addAttribute(attribute);
        }
        Sorting sorting = attribute.getSorting();
        if (parsed.getAscending()) {
            sorting.setAscending();
        } else {
            sorting.setDescending();
        }
        parsed.getFunction().ifPresent(function -> sorting.setFunction(function));
        parsed.getStrength().ifPresent(strength -> sorting.setStrength(strength));
        parsed.getLocale().ifPresent(locale -> sorting.setLocale(locale));
    }

    void convertAttribute(Schema schema, SDField field, ParsedAttribute parsed) {
        String name = parsed.name();
        String fieldName = field.getName();
        Attribute attribute = null;
        if (fieldName.endsWith("." + name)) {
            attribute = field.getAttributes().get(field.getName());
        }
        if (attribute == null) {
            attribute = field.getAttributes().get(name);
            if (attribute == null) {
                attribute = new Attribute(schema.getName(), field.getName(), name, field.getDataType());
                field.addAttribute(attribute);
            }
        }
        attribute.setPaged(parsed.getPaged());
        attribute.setFastSearch(parsed.getFastSearch());
        if (parsed.getFastRank()) {
            attribute.setFastRank(parsed.getFastRank());
        }
        attribute.setFastAccess(parsed.getFastAccess());
        attribute.setMutable(parsed.getMutable());
        attribute.setEnableOnlyBitVector(parsed.getEnableOnlyBitVector());

        // attribute.setTensorType(?)

        for (String alias : parsed.getAliases()) {
            field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias));
        }
        var distanceMetric = parsed.getDistanceMetric();
        if (distanceMetric.isPresent()) {
            String upper = distanceMetric.get().toUpperCase(Locale.ENGLISH);
            upper = upper.replace('-', '_');
            attribute.setDistanceMetric(Attribute.DistanceMetric.valueOf(upper));
        }
        var sorting = parsed.getSorting();
        if (sorting.isPresent()) {
            convertSorting(schema, field, sorting.get(), name);
        }
    }

    private void convertRankType(SDField field, String indexName, String rankType) {
        RankType type = RankType.fromString(rankType);
        if (indexName == null || indexName.equals("")) {
            field.setRankType(type); // Set default if the index is not specified.
        } else {
            Index index = field.getIndex(indexName);
            if (index == null) {
                index = new Index(indexName);
                field.addIndex(index);
            }
            index.setRankType(type);
        }
    }

    private void convertNormalizing(SDField field, String setting) {
        NormalizeLevel.Level level;
        if ("none".equals(setting)) {
            level = NormalizeLevel.Level.NONE;
        } else if ("codepoint".equals(setting)) {
            level = NormalizeLevel.Level.CODEPOINT;
        } else if ("lowercase".equals(setting)) {
            level = NormalizeLevel.Level.LOWERCASE;
        } else if ("accent".equals(setting)) {
            level = NormalizeLevel.Level.ACCENT;
        } else if ("all".equals(setting)) {
            level = NormalizeLevel.Level.ACCENT;
        } else {
            throw new IllegalArgumentException("invalid normalizing setting: " + setting);
        }
        field.setNormalizing(new NormalizeLevel(level, true));
    }

    // from grammar, things that can be inside struct-field block
    private void convertCommonFieldSettings(Schema schema, SDField field, ParsedField parsed) {
        convertMatchSettings(field, parsed.matchSettings());
        var indexing = parsed.getIndexing();
        if (indexing.isPresent()) {
            field.setIndexingScript(schema.getName(), indexing.get().script());
        }
        if (field.doesAttributing() && !GeoPos.isAnyPos(field.getDataType())) {
            Attribute.validateDataType(schema.getName(), field.getName(), field.getDataType());
        }
        parsed.getWeight().ifPresent(value -> field.setWeight(value));
        parsed.getStemming().ifPresent(value -> field.setStemming(value));
        parsed.getNormalizing().ifPresent(value -> convertNormalizing(field, value));
        for (var attribute : parsed.getAttributes()) {
            convertAttribute(schema, field, attribute);
        }
        for (var summaryField : parsed.getSummaryFields()) {
            var dataType = field.getDataType();
            var otherType = summaryField.getType();
            if (otherType != null && summaryField.getHasExplicitType()) {
                schema.getDeployLogger().log(Level.WARNING, () -> "For schema '" + schema.getName() +
                        "', field '" + field.getName() +
                        "', summary '" + summaryField.name() +
                        "': Specifying the type is deprecated, ignored and will be an error in Vespa 9." +
                        " Remove the type specification to silence this warning.");
                dataType = context.resolveType(otherType);
            }
            convertSummaryField(field, summaryField, dataType);
        }
        for (String command : parsed.getQueryCommands()) {
            field.addQueryCommand(command);
        }
        for (var structField : parsed.getStructFields()) {
            convertStructField(schema, field, structField);
        }
        if (parsed.hasLiteral()) {
            field.getRanking().setLiteral(true);
        }
        if (parsed.hasFilter()) {
            field.getRanking().setFilter(true);
        }
        if (parsed.hasNormal()) {
            field.getRanking().setNormal(true);
        }
    }

    private void convertStructField(Schema schema, SDField field, ParsedField parsed) {
        SDField structField = field.getStructField(parsed.name());
        if (structField == null ) {
            throw new IllegalArgumentException("Struct field '" + parsed.name() + "' has not been defined in struct " +
                                               "for field '" + field.getName() + "'.");
        }
        convertCommonFieldSettings(schema, structField, parsed);
    }

    private void convertExtraFieldSettings(Schema schema, SDField field, ParsedField parsed) {
        String name = parsed.name();
        for (var dictOp : parsed.getDictionaryOptions()) {
            var dictionary = field.getOrSetDictionary();
            switch (dictOp) {
                case HASH -> dictionary.updateType(Dictionary.Type.HASH);
                case BTREE -> dictionary.updateType(Dictionary.Type.BTREE);
                case CASED -> dictionary.updateMatch(Case.CASED);
                case UNCASED -> dictionary.updateMatch(Case.UNCASED);
            }
        }
        for (var index : parsed.getIndexes()) {
            convertIndex(field, index);
        }
        for (var alias : parsed.getAliases()) {
            field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias));
        }
        parsed.getRankTypes().forEach((indexName, rankType) -> convertRankType(field, indexName, rankType));
        parsed.getSorting().ifPresent(sortInfo -> convertSorting(schema, field, sortInfo, name));
        if (parsed.hasBolding()) {
            // TODO must it be so ugly:
            SummaryField summaryField = field.getSummaryField(name, true);
            summaryField.addSource(name);
            summaryField.addDestination("default");
            summaryField.setTransform(summaryField.getTransform().bold());
        }
    }

    static void convertSummaryFieldSettings(SummaryField summary, ParsedSummaryField parsed) {
        var transform = SummaryTransform.NONE;
        if (parsed.getMatchedElementsOnly()) {
            transform = SummaryTransform.MATCHED_ELEMENTS_FILTER;
        } else if (parsed.getDynamic()) {
            transform = SummaryTransform.DYNAMICTEASER;
        } else if (parsed.getTokens()) {
            transform = SummaryTransform.TOKENS;
        }
        if (parsed.getBolded()) {
            transform = transform.bold();
        }
        summary.setTransform(transform);
        for (String source : parsed.getSources()) {
            summary.addSource(source);
        }
        for (String destination : parsed.getDestinations()) {
            summary.addDestination(destination);
        }
        summary.setImplicit(false);
    }

    private void convertSummaryField(SDField field, ParsedSummaryField parsed, DataType type) {
        var summary = new SummaryField(parsed.name(), type);
        convertSummaryFieldSettings(summary, parsed);
        summary.addDestination("default");
        if (parsed.getSources().isEmpty()) {
            summary.addSource(field.getName());
        }
        field.addSummaryField(summary);
    }

    private void convertIndex(SDField field, ParsedIndex parsed) {
        String indexName = parsed.name();
        Index index = field.getIndex(indexName);
        if (index == null) {
            index = new Index(indexName);
            field.addIndex(index);
        }
        convertIndexSettings(index, parsed);
    }

    private void convertIndexSettings(Index index, ParsedIndex parsed) {
        parsed.getPrefix().ifPresent(prefix -> index.setPrefix(prefix));
        for (String alias : parsed.getAliases()) {
            index.addAlias(alias);
        }
        parsed.getStemming().ifPresent(stemming -> index.setStemming(stemming));
        var arity = parsed.getArity();
        var lowerBound = parsed.getLowerBound();
        var upperBound = parsed.getUpperBound();
        var densePostingListThreshold = parsed.getDensePostingListThreshold();
        if (arity.isPresent() || 
            lowerBound.isPresent() ||
            upperBound.isPresent() ||
            densePostingListThreshold.isPresent())
        {
            var bid = new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold);
            index.setBooleanIndexDefiniton(bid);
        }
        parsed.getEnableBm25().ifPresent(enableBm25 -> index.setInterleavedFeatures(enableBm25));
        parsed.getHnswIndexParams().ifPresent
            (hnswIndexParams -> index.setHnswIndexParams(hnswIndexParams));
    }

    SDField convertDocumentField(Schema schema, SDDocumentType document, ParsedField parsed) {
        String name = parsed.name();
        DataType dataType = context.resolveType(parsed.getType());
        var field = new SDField(document, name, dataType);
        convertCommonFieldSettings(schema, field, parsed);
        convertExtraFieldSettings(schema, field, parsed);
        document.addField(field);
        return field;
    }

    void convertExtraField(Schema schema, ParsedField parsed) {
        String name = parsed.name();
        DataType dataType = context.resolveType(parsed.getType());
        var field = new SDField(schema.getDocument(), name, dataType);
        convertCommonFieldSettings(schema, field, parsed);
        convertExtraFieldSettings(schema, field, parsed);
        schema.addExtraField(field);
    }

    void convertExtraIndex(Schema schema, ParsedIndex parsed) {
        Index index = new Index(parsed.name());
        convertIndexSettings(index, parsed);
        schema.addIndex(index);
    }

    SDDocumentType convertStructDeclaration(Schema schema, SDDocumentType document, ParsedStruct parsed) {
        // TODO - can we cleanup this mess
        var structProxy = new SDDocumentType(parsed.name(), schema);
        for (var parsedField : parsed.getFields()) {
            var fieldType = context.resolveType(parsedField.getType());
            var field = new SDField(document, parsedField.name(), fieldType);
            convertCommonFieldSettings(schema, field, parsedField);
            structProxy.addField(field);
            if (parsedField.hasIdOverride()) {
                structProxy.setFieldId(field, parsedField.idOverride());
            }
        }
        for (var inherit: parsed.getResolvedInherits()) {
            structProxy.inherit(structProxies.get(inherit.getFullName()));
        }
        structProxy.setStruct(context.resolveStruct(parsed));
        structProxies.put(parsed.getFullName(), structProxy);
        return structProxy;
    }

    void convertAnnotation(Schema schema, SDDocumentType document, ParsedAnnotation parsed) {
        SDAnnotationType annType = context.resolveAnnotation(parsed.name());
        var withStruct = parsed.getStruct();
        if (withStruct.isPresent()) {
            ParsedStruct parsedStruct = withStruct.get();
            SDDocumentType structProxy = convertStructDeclaration(schema, document, parsedStruct);
            structProxy.setStruct(context.resolveStruct(parsedStruct));
            annType.setSdDocType(structProxy);
        }
        document.addAnnotation(annType);
    }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy