com.yahoo.schema.parser.ConvertParsedFields Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema.parser;
import com.yahoo.document.DataType;
import com.yahoo.schema.document.GeoPos;
import com.yahoo.schema.parser.ConvertParsedTypes.TypeResolver;
import com.yahoo.schema.Index;
import com.yahoo.schema.Schema;
import com.yahoo.schema.document.Attribute;
import com.yahoo.schema.document.BooleanIndexDefinition;
import com.yahoo.schema.document.Case;
import com.yahoo.schema.document.Dictionary;
import com.yahoo.schema.document.NormalizeLevel;
import com.yahoo.schema.document.RankType;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.document.SDField;
import com.yahoo.schema.document.Sorting;
import com.yahoo.schema.document.annotation.SDAnnotationType;
import com.yahoo.vespa.documentmodel.SummaryField;
import com.yahoo.vespa.documentmodel.SummaryTransform;
import java.util.Locale;
import java.util.Map;
import java.util.logging.Level;
/**
* Helper for converting ParsedField etc to SDField with settings
*
* @author arnej27959
**/
public class ConvertParsedFields {
private final TypeResolver context;
private final Map structProxies;
ConvertParsedFields(TypeResolver context, Map structProxies) {
this.context = context;
this.structProxies = structProxies;
}
static void convertMatchSettings(SDField field, ParsedMatchSettings parsed) {
parsed.getMatchType().ifPresent(matchingType -> field.setMatchingType(matchingType));
parsed.getMatchCase().ifPresent(casing -> field.setMatchingCase(casing));
parsed.getGramSize().ifPresent(gramSize -> field.getMatching().setGramSize(gramSize));
parsed.getMaxLength().ifPresent(maxLength -> field.getMatching().maxLength(maxLength));
parsed.getMaxTermOccurrences().ifPresent(maxTermOccurrences -> field.getMatching().maxTermOccurrences(maxTermOccurrences));
parsed.getMaxTokenLength().ifPresent(maxTokenLength -> field.getMatching().maxTokenLength(maxTokenLength));
parsed.getMatchAlgorithm().ifPresent
(matchingAlgorithm -> field.setMatchingAlgorithm(matchingAlgorithm));
parsed.getExactTerminator().ifPresent
(exactMatchTerminator -> field.getMatching().setExactMatchTerminator(exactMatchTerminator));
}
void convertSorting(Schema schema, SDField field, ParsedSorting parsed, String name) {
Attribute attribute = field.getAttributes().get(name);
if (attribute == null) {
attribute = new Attribute(schema.getName(), field.getName(), name, field.getDataType());
field.addAttribute(attribute);
}
Sorting sorting = attribute.getSorting();
if (parsed.getAscending()) {
sorting.setAscending();
} else {
sorting.setDescending();
}
parsed.getFunction().ifPresent(function -> sorting.setFunction(function));
parsed.getStrength().ifPresent(strength -> sorting.setStrength(strength));
parsed.getLocale().ifPresent(locale -> sorting.setLocale(locale));
}
void convertAttribute(Schema schema, SDField field, ParsedAttribute parsed) {
String name = parsed.name();
String fieldName = field.getName();
Attribute attribute = null;
if (fieldName.endsWith("." + name)) {
attribute = field.getAttributes().get(field.getName());
}
if (attribute == null) {
attribute = field.getAttributes().get(name);
if (attribute == null) {
attribute = new Attribute(schema.getName(), field.getName(), name, field.getDataType());
field.addAttribute(attribute);
}
}
attribute.setPaged(parsed.getPaged());
attribute.setFastSearch(parsed.getFastSearch());
if (parsed.getFastRank()) {
attribute.setFastRank(parsed.getFastRank());
}
attribute.setFastAccess(parsed.getFastAccess());
attribute.setMutable(parsed.getMutable());
attribute.setEnableOnlyBitVector(parsed.getEnableOnlyBitVector());
// attribute.setTensorType(?)
for (String alias : parsed.getAliases()) {
field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias));
}
var distanceMetric = parsed.getDistanceMetric();
if (distanceMetric.isPresent()) {
String upper = distanceMetric.get().toUpperCase(Locale.ENGLISH);
upper = upper.replace('-', '_');
attribute.setDistanceMetric(Attribute.DistanceMetric.valueOf(upper));
}
var sorting = parsed.getSorting();
if (sorting.isPresent()) {
convertSorting(schema, field, sorting.get(), name);
}
}
private void convertRankType(SDField field, String indexName, String rankType) {
RankType type = RankType.fromString(rankType);
if (indexName == null || indexName.equals("")) {
field.setRankType(type); // Set default if the index is not specified.
} else {
Index index = field.getIndex(indexName);
if (index == null) {
index = new Index(indexName);
field.addIndex(index);
}
index.setRankType(type);
}
}
private void convertNormalizing(SDField field, String setting) {
NormalizeLevel.Level level;
if ("none".equals(setting)) {
level = NormalizeLevel.Level.NONE;
} else if ("codepoint".equals(setting)) {
level = NormalizeLevel.Level.CODEPOINT;
} else if ("lowercase".equals(setting)) {
level = NormalizeLevel.Level.LOWERCASE;
} else if ("accent".equals(setting)) {
level = NormalizeLevel.Level.ACCENT;
} else if ("all".equals(setting)) {
level = NormalizeLevel.Level.ACCENT;
} else {
throw new IllegalArgumentException("invalid normalizing setting: " + setting);
}
field.setNormalizing(new NormalizeLevel(level, true));
}
// from grammar, things that can be inside struct-field block
private void convertCommonFieldSettings(Schema schema, SDField field, ParsedField parsed) {
convertMatchSettings(field, parsed.matchSettings());
var indexing = parsed.getIndexing();
if (indexing.isPresent()) {
field.setIndexingScript(schema.getName(), indexing.get().script());
}
if (field.doesAttributing() && !GeoPos.isAnyPos(field.getDataType())) {
Attribute.validateDataType(schema.getName(), field.getName(), field.getDataType());
}
parsed.getWeight().ifPresent(value -> field.setWeight(value));
parsed.getStemming().ifPresent(value -> field.setStemming(value));
parsed.getNormalizing().ifPresent(value -> convertNormalizing(field, value));
for (var attribute : parsed.getAttributes()) {
convertAttribute(schema, field, attribute);
}
for (var summaryField : parsed.getSummaryFields()) {
var dataType = field.getDataType();
var otherType = summaryField.getType();
if (otherType != null && summaryField.getHasExplicitType()) {
schema.getDeployLogger().log(Level.WARNING, () -> "For schema '" + schema.getName() +
"', field '" + field.getName() +
"', summary '" + summaryField.name() +
"': Specifying the type is deprecated, ignored and will be an error in Vespa 9." +
" Remove the type specification to silence this warning.");
dataType = context.resolveType(otherType);
}
convertSummaryField(field, summaryField, dataType);
}
for (String command : parsed.getQueryCommands()) {
field.addQueryCommand(command);
}
for (var structField : parsed.getStructFields()) {
convertStructField(schema, field, structField);
}
if (parsed.hasLiteral()) {
field.getRanking().setLiteral(true);
}
if (parsed.hasFilter()) {
field.getRanking().setFilter(true);
}
if (parsed.hasNormal()) {
field.getRanking().setNormal(true);
}
}
private void convertStructField(Schema schema, SDField field, ParsedField parsed) {
SDField structField = field.getStructField(parsed.name());
if (structField == null ) {
throw new IllegalArgumentException("Struct field '" + parsed.name() + "' has not been defined in struct " +
"for field '" + field.getName() + "'.");
}
convertCommonFieldSettings(schema, structField, parsed);
}
private void convertExtraFieldSettings(Schema schema, SDField field, ParsedField parsed) {
String name = parsed.name();
for (var dictOp : parsed.getDictionaryOptions()) {
var dictionary = field.getOrSetDictionary();
switch (dictOp) {
case HASH -> dictionary.updateType(Dictionary.Type.HASH);
case BTREE -> dictionary.updateType(Dictionary.Type.BTREE);
case CASED -> dictionary.updateMatch(Case.CASED);
case UNCASED -> dictionary.updateMatch(Case.UNCASED);
}
}
for (var index : parsed.getIndexes()) {
convertIndex(field, index);
}
for (var alias : parsed.getAliases()) {
field.getAliasToName().put(alias, parsed.lookupAliasedFrom(alias));
}
parsed.getRankTypes().forEach((indexName, rankType) -> convertRankType(field, indexName, rankType));
parsed.getSorting().ifPresent(sortInfo -> convertSorting(schema, field, sortInfo, name));
if (parsed.hasBolding()) {
// TODO must it be so ugly:
SummaryField summaryField = field.getSummaryField(name, true);
summaryField.addSource(name);
summaryField.addDestination("default");
summaryField.setTransform(summaryField.getTransform().bold());
}
}
static void convertSummaryFieldSettings(SummaryField summary, ParsedSummaryField parsed) {
var transform = SummaryTransform.NONE;
if (parsed.getMatchedElementsOnly()) {
transform = SummaryTransform.MATCHED_ELEMENTS_FILTER;
} else if (parsed.getDynamic()) {
transform = SummaryTransform.DYNAMICTEASER;
} else if (parsed.getTokens()) {
transform = SummaryTransform.TOKENS;
}
if (parsed.getBolded()) {
transform = transform.bold();
}
summary.setTransform(transform);
for (String source : parsed.getSources()) {
summary.addSource(source);
}
for (String destination : parsed.getDestinations()) {
summary.addDestination(destination);
}
summary.setImplicit(false);
}
private void convertSummaryField(SDField field, ParsedSummaryField parsed, DataType type) {
var summary = new SummaryField(parsed.name(), type);
convertSummaryFieldSettings(summary, parsed);
summary.addDestination("default");
if (parsed.getSources().isEmpty()) {
summary.addSource(field.getName());
}
field.addSummaryField(summary);
}
private void convertIndex(SDField field, ParsedIndex parsed) {
String indexName = parsed.name();
Index index = field.getIndex(indexName);
if (index == null) {
index = new Index(indexName);
field.addIndex(index);
}
convertIndexSettings(index, parsed);
}
private void convertIndexSettings(Index index, ParsedIndex parsed) {
parsed.getPrefix().ifPresent(prefix -> index.setPrefix(prefix));
for (String alias : parsed.getAliases()) {
index.addAlias(alias);
}
parsed.getStemming().ifPresent(stemming -> index.setStemming(stemming));
var arity = parsed.getArity();
var lowerBound = parsed.getLowerBound();
var upperBound = parsed.getUpperBound();
var densePostingListThreshold = parsed.getDensePostingListThreshold();
if (arity.isPresent() ||
lowerBound.isPresent() ||
upperBound.isPresent() ||
densePostingListThreshold.isPresent())
{
var bid = new BooleanIndexDefinition(arity, lowerBound, upperBound, densePostingListThreshold);
index.setBooleanIndexDefiniton(bid);
}
parsed.getEnableBm25().ifPresent(enableBm25 -> index.setInterleavedFeatures(enableBm25));
parsed.getHnswIndexParams().ifPresent
(hnswIndexParams -> index.setHnswIndexParams(hnswIndexParams));
}
SDField convertDocumentField(Schema schema, SDDocumentType document, ParsedField parsed) {
String name = parsed.name();
DataType dataType = context.resolveType(parsed.getType());
var field = new SDField(document, name, dataType);
convertCommonFieldSettings(schema, field, parsed);
convertExtraFieldSettings(schema, field, parsed);
document.addField(field);
return field;
}
void convertExtraField(Schema schema, ParsedField parsed) {
String name = parsed.name();
DataType dataType = context.resolveType(parsed.getType());
var field = new SDField(schema.getDocument(), name, dataType);
convertCommonFieldSettings(schema, field, parsed);
convertExtraFieldSettings(schema, field, parsed);
schema.addExtraField(field);
}
void convertExtraIndex(Schema schema, ParsedIndex parsed) {
Index index = new Index(parsed.name());
convertIndexSettings(index, parsed);
schema.addIndex(index);
}
SDDocumentType convertStructDeclaration(Schema schema, SDDocumentType document, ParsedStruct parsed) {
// TODO - can we cleanup this mess
var structProxy = new SDDocumentType(parsed.name(), schema);
for (var parsedField : parsed.getFields()) {
var fieldType = context.resolveType(parsedField.getType());
var field = new SDField(document, parsedField.name(), fieldType);
convertCommonFieldSettings(schema, field, parsedField);
structProxy.addField(field);
if (parsedField.hasIdOverride()) {
structProxy.setFieldId(field, parsedField.idOverride());
}
}
for (var inherit: parsed.getResolvedInherits()) {
structProxy.inherit(structProxies.get(inherit.getFullName()));
}
structProxy.setStruct(context.resolveStruct(parsed));
structProxies.put(parsed.getFullName(), structProxy);
return structProxy;
}
void convertAnnotation(Schema schema, SDDocumentType document, ParsedAnnotation parsed) {
SDAnnotationType annType = context.resolveAnnotation(parsed.name());
var withStruct = parsed.getStruct();
if (withStruct.isPresent()) {
ParsedStruct parsedStruct = withStruct.get();
SDDocumentType structProxy = convertStructDeclaration(schema, document, parsedStruct);
structProxy.setStruct(context.resolveStruct(parsedStruct));
annType.setSdDocType(structProxy);
}
document.addAnnotation(annType);
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy