com.yahoo.schema.RankProfile Maven / Gradle / Ivy
// Copyright Vespa.ai. Licensed under the terms of the Apache 2.0 license. See LICENSE in the project root.
package com.yahoo.schema;
import ai.vespa.rankingexpression.importer.configmodelview.ImportedMlModels;
import com.google.common.collect.ImmutableMap;
import com.yahoo.config.application.api.ApplicationPackage;
import com.yahoo.config.application.api.DeployLogger;
import com.yahoo.path.Path;
import com.yahoo.search.query.profile.QueryProfileRegistry;
import com.yahoo.search.query.profile.types.FieldDescription;
import com.yahoo.search.query.profile.types.QueryProfileType;
import com.yahoo.search.query.ranking.Diversity;
import com.yahoo.schema.document.Attribute;
import com.yahoo.schema.document.ImmutableSDField;
import com.yahoo.schema.document.SDDocumentType;
import com.yahoo.schema.expressiontransforms.ExpressionTransforms;
import com.yahoo.schema.expressiontransforms.RankProfileTransformContext;
import com.yahoo.schema.expressiontransforms.InputRecorder;
import com.yahoo.schema.parser.ParseException;
import com.yahoo.search.schema.RankProfile.InputType;
import com.yahoo.searchlib.rankingexpression.ExpressionFunction;
import com.yahoo.searchlib.rankingexpression.FeatureList;
import com.yahoo.searchlib.rankingexpression.RankingExpression;
import com.yahoo.searchlib.rankingexpression.Reference;
import com.yahoo.searchlib.rankingexpression.rule.Arguments;
import com.yahoo.searchlib.rankingexpression.rule.ExpressionNode;
import com.yahoo.searchlib.rankingexpression.rule.ReferenceNode;
import com.yahoo.tensor.Tensor;
import com.yahoo.tensor.TensorType;
import java.io.IOException;
import java.io.Reader;
import java.io.StringReader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.OptionalDouble;
import java.util.Set;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.function.Supplier;
import java.util.logging.Level;
import java.util.stream.Collectors;
import java.util.stream.Stream;
/**
* Represents a rank profile - a named set of ranking settings
*
* @author bratseth
*/
public class RankProfile implements Cloneable {
public final static String FIRST_PHASE = "firstphase";
public final static String SECOND_PHASE = "secondphase";
public final static String GLOBAL_PHASE = "globalphase";
/** The schema-unique name of this rank profile */
private final String name;
/** The schema owning this profile, or null if global (owned by a model) */
private final ImmutableSchema schema;
private final List inheritedNames = new ArrayList<>();
/** The resolved inherited profiles, or null when not resolved. */
private List inherited;
private MatchPhaseSettings matchPhase = null;
private DiversitySettings diversity = null;
protected Set rankSettings = new java.util.LinkedHashSet<>();
/** The ranking expression to be used for first phase */
private RankingExpressionFunction firstPhaseRanking = null;
/** The ranking expression to be used for second phase */
private RankingExpressionFunction secondPhaseRanking = null;
/** The ranking expression to be used for global-phase */
private RankingExpressionFunction globalPhaseRanking = null;
/** Number of hits to be reranked in second phase, -1 means use default */
private int rerankCount = -1;
/** Number of hits to be reranked in global-phase, -1 means use default */
private int globalPhaseRerankCount = -1;
/** Mysterious attribute */
private int keepRankCount = -1;
private int numThreadsPerSearch = -1;
private int minHitsPerThread = -1;
private int numSearchPartitions = -1;
private Double termwiseLimit = null;
private Double postFilterThreshold = null;
private Double approximateThreshold = null;
private Double targetHitsMaxAdjustmentFactor = null;
/** The drop limit used to drop hits with rank score less than or equal to this value */
private double rankScoreDropLimit = -Double.MAX_VALUE;
private double secondPhaseRankScoreDropLimit = -Double.MAX_VALUE;
private Set summaryFeatures;
private String inheritedSummaryFeaturesProfileName;
private Set matchFeatures;
private Set hiddenMatchFeatures;
private String inheritedMatchFeaturesProfileName;
private Set rankFeatures;
/** The properties of this - a multimap */
private Map> rankProperties = new LinkedHashMap<>();
private Boolean ignoreDefaultRankFeatures = null;
private Map functions = new LinkedHashMap<>();
// This cache must be invalidated every time modifications are done to 'functions'.
private CachedFunctions allFunctionsCached = null;
private Map inputs = new LinkedHashMap<>();
private Map constants = new LinkedHashMap<>();
private final Map onnxModels = new LinkedHashMap<>();
private Set filterFields = new HashSet<>();
private final RankProfileRegistry rankProfileRegistry;
private final TypeSettings attributeTypes = new TypeSettings();
private List allFieldsList;
private Boolean strict;
private Boolean useSignificanceModel;
private final ApplicationPackage applicationPackage;
private final DeployLogger deployLogger;
/**
* Creates a new rank profile for a particular schema
*
* @param name the name of the new profile
* @param schema the schema owning this profile
* @param rankProfileRegistry the {@link com.yahoo.schema.RankProfileRegistry} to use for storing
* and looking up rank profiles.
*/
public RankProfile(String name, Schema schema, RankProfileRegistry rankProfileRegistry) {
this(name, Objects.requireNonNull(schema, "schema cannot be null"),
schema.applicationPackage(), schema.getDeployLogger(),
rankProfileRegistry);
}
/**
* Creates a global rank profile
*
* @param name the name of the new profile
*/
public RankProfile(String name, Schema schema, ApplicationPackage applicationPackage, DeployLogger deployLogger,
RankProfileRegistry rankProfileRegistry) {
this.name = Objects.requireNonNull(name, "name cannot be null");
this.schema = schema;
this.rankProfileRegistry = rankProfileRegistry;
this.applicationPackage = applicationPackage;
this.deployLogger = deployLogger;
}
public String name() { return name; }
/** Returns the search definition owning this, or null if it is global */
public ImmutableSchema schema() { return schema; }
/** Returns the application this is part of */
public ApplicationPackage applicationPackage() {
return applicationPackage;
}
private Stream allFields() {
if (schema == null) return Stream.empty();
if (allFieldsList == null) {
allFieldsList = schema.allFieldsList();
}
return allFieldsList.stream();
}
private Stream allImportedFields() {
return schema != null ? schema.allImportedFields() : Stream.empty();
}
/**
* Returns whether type checking should fail if this profile accesses query features that are
* not defined in query profile types.
*
* Default is false.
*/
public boolean isStrict() {
Boolean declaredStrict = declaredStrict();
if (declaredStrict != null) return declaredStrict;
return false;
}
/** Returns the strict value declared in this or any parent profile. */
public Boolean declaredStrict() {
if (strict != null) return strict;
return uniquelyInherited(p -> p.declaredStrict(), "strict").orElse(null);
}
public void setStrict(Boolean strict) {
this.strict = strict;
}
public void setUseSignificanceModel(Boolean useSignificanceModel) {
this.useSignificanceModel = useSignificanceModel;
}
public boolean useSignificanceModel() {
if (useSignificanceModel != null) return useSignificanceModel;
return uniquelyInherited(RankProfile::useSignificanceModel, "use-model")
.orElse(false); // Disabled by default
}
/**
* Adds a profile to those inherited by this.
* The profile must belong to this schema (directly or by inheritance).
*/
public void inherit(String inheritedName) {
inherited = null;
inheritedNames.add(inheritedName);
}
/** Returns the names of the profiles this inherits, if any. */
public List inheritedNames() { return Collections.unmodifiableList(inheritedNames); }
/** Returns the rank profiles inherited by this. */
private List inherited() {
if (inheritedNames.isEmpty()) return List.of();
if (inherited != null) return inherited;
inherited = resolveInheritedProfiles(schema);
List children = new ArrayList<>();
children.add(createFullyQualifiedName());
inherited.forEach(profile -> verifyNoInheritanceCycle(children, profile));
return inherited;
}
private String createFullyQualifiedName() {
return (schema != null)
? (schema.getName() + "." + name())
: name();
}
private void verifyNoInheritanceCycle(List children, RankProfile parent) {
children.add(parent.createFullyQualifiedName());
String root = children.get(0);
if (root.equals(parent.createFullyQualifiedName()))
throw new IllegalArgumentException("There is a cycle in the inheritance for rank-profile '" + root + "' = " + children);
for (RankProfile parentInherited : parent.inherited())
verifyNoInheritanceCycle(children, parentInherited);
}
private List resolveInheritedProfiles(ImmutableSchema schema) {
List inherited = new ArrayList<>();
for (String inheritedName : inheritedNames) {
RankProfile inheritedProfile = schema == null
? rankProfileRegistry.getGlobal(inheritedName)
: resolveInheritedProfile(schema, inheritedName);
if (inheritedProfile == null)
throw new IllegalArgumentException("rank-profile '" + name() + "' inherits '" + inheritedName +
"', but this is not found in " +
((schema() != null) ? schema() : " global rank profiles"));
inherited.add(inheritedProfile);
}
return inherited;
}
private RankProfile resolveInheritedProfile(ImmutableSchema schema, String inheritedName) {
SDDocumentType documentType = schema.getDocument();
if (documentType != null) {
if (name.equals(inheritedName)) {
// If you seemingly inherit yourself, you are actually referencing a rank-profile in one of your inherited schemas
for (SDDocumentType baseType : documentType.getInheritedTypes()) {
RankProfile resolvedFromBase = rankProfileRegistry.resolve(baseType, inheritedName);
if (resolvedFromBase != null) return resolvedFromBase;
}
}
return rankProfileRegistry.resolve(documentType, inheritedName);
}
return rankProfileRegistry.get(schema.getName(), inheritedName);
}
/** Returns whether this profile inherits (directly or indirectly) the given profile name. */
public boolean inherits(String name) {
for (RankProfile inheritedProfile : inherited()) {
if (inheritedProfile.name().equals(name)) return true;
if (inheritedProfile.inherits(name)) return true;
}
return false;
}
public void setMatchPhase(MatchPhaseSettings settings) {
settings.checkValid();
this.matchPhase = settings;
}
public MatchPhaseSettings getMatchPhase() {
if (matchPhase != null) return matchPhase;
return uniquelyInherited(RankProfile::getMatchPhase, "match phase settings").orElse(null);
}
public void setDiversity(DiversitySettings value) {
value.checkValid();
diversity = value;
}
public DiversitySettings getDiversity() {
if (diversity != null) return diversity;
return uniquelyInherited(RankProfile::getDiversity, "diversity settings").orElse(null);
}
/** Returns the uniquely determined property, where non-empty is defined as non-null */
private Optional uniquelyInherited(Function propertyRetriever,
String propertyDescription) {
return uniquelyInherited(propertyRetriever, Objects::nonNull, propertyDescription);
}
/**
* Returns the property retrieved by the given function, if it is only present in a single unique variant
* among all profiled inherited by this, or empty if not present.
* Note that for properties that don't implement a values-based equals this reverts to the stricter condition that
* only one inherited profile can define a non-empty value at all.
*
* @throws IllegalArgumentException if the inherited profiles defines multiple different values of the property
*/
private Optional uniquelyInherited(Function propertyRetriever,
Predicate nonEmptyValueFilter,
String propertyDescription) {
Set uniqueProperties = inherited().stream()
.map(propertyRetriever)
.filter(nonEmptyValueFilter)
.collect(Collectors.toSet());
if (uniqueProperties.isEmpty()) return Optional.empty();
if (uniqueProperties.size() == 1) return uniqueProperties.stream().findAny();
throw new IllegalArgumentException("Only one of the profiles inherited by " + this + " can contain " +
propertyDescription + ", but it is present in multiple");
}
public void addRankSetting(RankSetting rankSetting) {
rankSettings.add(rankSetting);
}
public void addRankSetting(String fieldName, RankSetting.Type type, Object value) {
addRankSetting(new RankSetting(fieldName, type, value));
}
/**
* Returns the a rank setting of a field, or null if there is no such rank setting in this profile
*
* @param field the field whose settings to return
* @param type the type that the field is required to be
* @return the rank setting found, or null
*/
RankSetting getDeclaredRankSetting(String field, RankSetting.Type type) {
for (Iterator i = declaredRankSettingIterator(); i.hasNext(); ) {
RankSetting setting = i.next();
if (setting.getFieldName().equals(field) && setting.getType() == type) {
return setting;
}
}
return null;
}
/**
* Returns a rank setting of field or index, or null if there is no such rank setting in this profile or one it
* inherits
*
* @param field the field whose settings to return
* @param type the type that the field is required to be
* @return the rank setting found, or null
*/
public RankSetting getRankSetting(String field, RankSetting.Type type) {
RankSetting rankSetting = getDeclaredRankSetting(field, type);
if (rankSetting != null) return rankSetting;
return uniquelyInherited(p -> p.getRankSetting(field, type), "rank setting " + type + " on " + field).orElse(null);
}
/**
* Returns the rank settings in this rank profile
*
* @return an iterator for the declared rank setting
*/
public Iterator declaredRankSettingIterator() {
return Collections.unmodifiableSet(rankSettings).iterator();
}
/**
* Returns all settings in this profile or any profile it inherits
*
* @return an iterator for all rank settings of this
*/
public Iterator rankSettingIterator() {
return rankSettings().iterator();
}
/**
* Returns a snapshot of the rank settings of this and everything it inherits.
* Changes to the returned set will not be reflected in this rank profile.
*/
public Set rankSettings() {
Set settings = new LinkedHashSet<>();
for (RankProfile inheritedProfile : inherited()) {
for (RankSetting setting : inheritedProfile.rankSettings()) {
if (settings.contains(setting))
throw new IllegalArgumentException(setting + " is present in " + inheritedProfile + " inherited by " +
this + ", but is also present in another profile inherited by it");
settings.add(setting);
}
}
// TODO: Here we do things in the wrong order to not break tests. Reverse this.
Set finalSettings = new LinkedHashSet<>(rankSettings);
finalSettings.addAll(settings);
return finalSettings;
}
public void add(Constant constant) {
constants.put(constant.name(), constant);
}
/** Returns an unmodifiable view of the constants declared in this */
public Map declaredConstants() { return Collections.unmodifiableMap(constants); }
/** Returns an unmodifiable view of the constants available in this */
public Map constants() {
Map allConstants = new HashMap<>();
for (var inheritedProfile : inherited()) {
for (var constant : inheritedProfile.constants().values()) {
if (allConstants.containsKey(constant.name()))
throw new IllegalArgumentException(constant + "' is present in " +
inheritedProfile + " inherited by " +
this + ", but is also present in another profile inherited by it");
allConstants.put(constant.name(), constant);
}
}
if (schema != null)
allConstants.putAll(schema.constants());
allConstants.putAll(constants);
return allConstants;
}
public void add(OnnxModel model) {
onnxModels.put(model.getName(), model);
}
/** Returns an unmodifiable map of the onnx models declared in this. */
public Map declaredOnnxModels() { return onnxModels; }
/** Returns an unmodifiable map of the onnx models available in this. */
public Map onnxModels() {
Map allModels = new HashMap<>();
for (var inheritedProfile : inherited()) {
for (var model : inheritedProfile.onnxModels().values()) {
if (allModels.containsKey(model.getName()))
throw new IllegalArgumentException(model + "' is present in " +
inheritedProfile + " inherited by " +
this + ", but is also present in another profile inherited by it");
allModels.put(model.getName(), model);
}
}
if (schema != null)
allModels.putAll(schema.onnxModels());
allModels.putAll(onnxModels);
return allModels;
}
public void addAttributeType(String attributeName, String attributeType) {
attributeTypes.addType(attributeName, attributeType);
}
public Map getAttributeTypes() {
return attributeTypes.getTypes();
}
/**
* Returns the ranking expression to use by this. This expression must not be edited.
* Returns null if no expression is set.
*/
public RankingExpression getFirstPhaseRanking() {
RankingExpressionFunction function = getFirstPhase();
if (function == null) return null;
return function.function.getBody();
}
public RankingExpressionFunction getFirstPhase() {
if (firstPhaseRanking != null) return firstPhaseRanking;
return uniquelyInherited(RankProfile::getFirstPhase, "first-phase expression").orElse(null);
}
void setFirstPhaseRanking(RankingExpression rankingExpression) {
this.firstPhaseRanking = new RankingExpressionFunction(new ExpressionFunction(FIRST_PHASE, List.of(), rankingExpression), false);
}
public void setFirstPhaseRanking(String expression) {
try {
firstPhaseRanking = new RankingExpressionFunction(parseRankingExpression(FIRST_PHASE, List.of(), expression), false);
} catch (ParseException e) {
throw new IllegalArgumentException("Invalid first-phase function", e);
}
}
/**
* Returns the ranking expression to use by this. This expression must not be edited.
* Returns null if no expression is set.
*/
public RankingExpression getSecondPhaseRanking() {
RankingExpressionFunction function = getSecondPhase();
if (function == null) return null;
return function.function().getBody();
}
public RankingExpressionFunction getSecondPhase() {
if (secondPhaseRanking != null) return secondPhaseRanking;
return uniquelyInherited(RankProfile::getSecondPhase, "second-phase expression").orElse(null);
}
public void setSecondPhaseRanking(String expression) {
try {
secondPhaseRanking = new RankingExpressionFunction(parseRankingExpression(SECOND_PHASE, List.of(), expression), false);
}
catch (ParseException e) {
throw new IllegalArgumentException("Invalid second-phase function", e);
}
}
public RankingExpression getGlobalPhaseRanking() {
RankingExpressionFunction function = getGlobalPhase();
if (function == null) return null;
return function.function().getBody();
}
public RankingExpressionFunction getGlobalPhase() {
if (globalPhaseRanking != null) return globalPhaseRanking;
return uniquelyInherited(RankProfile::getGlobalPhase, "global-phase expression").orElse(null);
}
public void setGlobalPhaseRanking(String expression) {
try {
globalPhaseRanking = new RankingExpressionFunction(parseRankingExpression(GLOBAL_PHASE, List.of(), expression), false);
}
catch (ParseException e) {
throw new IllegalArgumentException("Invalid global-phase function", e);
}
}
// TODO: Below we have duplicate methods for summary and match features: Encapsulate this in a single parametrized
// class instead (and probably make rank features work the same).
/**
* Sets the name this should inherit the summary features of.
* Without setting this, this will either have the summary features of the single parent setting them,
* or if summary features are set in this, only have the summary features in this.
* With this set the resulting summary features of this will be the superset of those defined in this and
* the final (with inheritance included) summary features of the given parent.
* The profile must be one which is directly inherited by this.
*/
public void setInheritedSummaryFeatures(String parentProfile) {
if ( ! inheritedNames().contains(parentProfile))
throw new IllegalArgumentException("This can only inherit the summary features of a directly inherited profile, " +
"but is attempting to inherit '" + parentProfile);
this.inheritedSummaryFeaturesProfileName = parentProfile;
}
/**
* Sets the name of a profile this should inherit the match features of.
* Without setting this, this will either have the match features of the single parent setting them,
* or if match features are set in this, only have the match features in this.
* With this set the resulting match features of this will be the superset of those defined in this and
* the final (with inheritance included) match features of the given parent.
* The profile must be one which which is directly inherited by this.
*
*/
public void setInheritedMatchFeatures(String parentProfile) {
if ( ! inheritedNames().contains(parentProfile))
throw new IllegalArgumentException("This can only inherit the match features of a directly inherited profile," +
"but is attempting to inherit '" + parentProfile);
this.inheritedMatchFeaturesProfileName = parentProfile;
}
/** Returns a read-only view of the summary features to use in this profile. This is never null */
public Set getSummaryFeatures() {
if (inheritedSummaryFeaturesProfileName != null && summaryFeatures != null) {
Set combined = new HashSet<>();
RankProfile inherited = inherited().stream()
.filter(p -> p.name().equals(inheritedSummaryFeaturesProfileName))
.findAny()
.orElseThrow();
combined.addAll(inherited.getSummaryFeatures());
combined.addAll(summaryFeatures);
return Collections.unmodifiableSet(combined);
}
if (summaryFeatures != null) return Collections.unmodifiableSet(summaryFeatures);
return uniquelyInherited(RankProfile::getSummaryFeatures, f -> ! f.isEmpty(), "summary features")
.orElse(Set.of());
}
/** Returns a read-only view of the match features to use in this profile. This is never null */
public Set getMatchFeatures() {
if (inheritedMatchFeaturesProfileName != null && matchFeatures != null) {
Set combined = new HashSet<>();
RankProfile inherited = inherited().stream()
.filter(p -> p.name().equals(inheritedMatchFeaturesProfileName))
.findAny()
.orElseThrow();
combined.addAll(inherited.getMatchFeatures());
combined.addAll(matchFeatures);
return Collections.unmodifiableSet(combined);
}
if (matchFeatures != null) return Collections.unmodifiableSet(matchFeatures);
return uniquelyInherited(RankProfile::getMatchFeatures, f -> ! f.isEmpty(), "match features")
.orElse(Set.of());
}
public Set getHiddenMatchFeatures() {
if (hiddenMatchFeatures != null) return Collections.unmodifiableSet(hiddenMatchFeatures);
return uniquelyInherited(RankProfile::getHiddenMatchFeatures, f -> ! f.isEmpty(), "hidden match features")
.orElse(Set.of());
}
private void addImplicitMatchFeatures(List list) {
if (hiddenMatchFeatures == null)
hiddenMatchFeatures = new LinkedHashSet<>();
var current = getMatchFeatures();
for (var features : list) {
for (ReferenceNode feature : features) {
if (! current.contains(feature)) {
hiddenMatchFeatures.add(feature);
}
}
}
}
/** Adds the content of the given feature list to the internal list of summary features. */
public void addSummaryFeatures(FeatureList features) {
if (summaryFeatures == null)
summaryFeatures = new LinkedHashSet<>();
for (ReferenceNode feature : features) {
summaryFeatures.add(feature);
}
}
/** Adds the content of the given feature list to the internal list of match features. */
public void addMatchFeatures(FeatureList features) {
if (matchFeatures == null)
matchFeatures = new LinkedHashSet<>();
for (ReferenceNode feature : features) {
matchFeatures.add(feature);
}
}
/** Returns a read-only view of the rank features to use in this profile. This is never null */
public Set getRankFeatures() {
if (rankFeatures != null) return Collections.unmodifiableSet(rankFeatures);
return uniquelyInherited(RankProfile::getRankFeatures, f -> ! f.isEmpty(), "summary-features")
.orElse(Set.of());
}
/**
* Adds the content of the given feature list to the internal list of rank features.
*
* @param features The features to add.
*/
public void addRankFeatures(FeatureList features) {
if (rankFeatures == null)
rankFeatures = new LinkedHashSet<>();
for (ReferenceNode feature : features) {
rankFeatures.add(feature);
}
}
/** Returns a read only flattened list view of the rank properties to use in this profile. This is never null. */
public List getRankProperties() {
List properties = new ArrayList<>();
for (List propertyList : getRankPropertyMap().values()) {
properties.addAll(propertyList);
}
return Collections.unmodifiableList(properties);
}
/** Returns a read only map view of the rank properties to use in this profile. This is never null. */
public Map> getRankPropertyMap() {
if (rankProperties.isEmpty() && inherited().isEmpty()) return Map.of();
if (inherited().isEmpty()) return Collections.unmodifiableMap(rankProperties);
var inheritedProperties = uniquelyInherited(RankProfile::getRankPropertyMap, m -> ! m.isEmpty(), "rank-properties")
.orElse(Map.of());
if (rankProperties.isEmpty()) return inheritedProperties;
// Neither is null
Map> combined = new LinkedHashMap<>(inheritedProperties);
combined.putAll(rankProperties); // Don't combine values across inherited properties
return Collections.unmodifiableMap(combined);
}
public void addRankProperty(String name, String parameter) {
addRankProperty(new RankProperty(name, parameter));
}
/*
* set a rank-property that should be a single-value parameter;
* if the same name is used multiple times, that parameter must be identical each time.
*/
public void setRankProperty(String name, String parameter) {
var old = rankProperties.get(name);
if (old != null) {
if (old.size() != 1) {
throw new IllegalStateException("setRankProperty used for multi-valued property " + name);
}
var oldVal = old.get(0).getValue();
if (! oldVal.equals(parameter)) {
throw new IllegalArgumentException("setRankProperty would change property " + name + " from " + oldVal + " to " + parameter);
}
} else {
addRankProperty(new RankProperty(name, parameter));
}
}
private void addRankProperty(RankProperty rankProperty) {
// Just the usual multimap semantics here
rankProperties.computeIfAbsent(rankProperty.getName(), (String key) -> new ArrayList<>(1)).add(rankProperty);
}
public void setRerankCount(int rerankCount) { this.rerankCount = rerankCount; }
public int getRerankCount() {
if (rerankCount >= 0) return rerankCount;
return uniquelyInherited(RankProfile::getRerankCount, c -> c >= 0, "rerank-count").orElse(-1);
}
public void setGlobalPhaseRerankCount(int count) { this.globalPhaseRerankCount = count; }
public int getGlobalPhaseRerankCount() {
if (globalPhaseRerankCount >= 0) return globalPhaseRerankCount;
return uniquelyInherited(RankProfile::getGlobalPhaseRerankCount, c -> c >= 0, "global-phase rerank-count").orElse(-1);
}
public void setNumThreadsPerSearch(int numThreads) { this.numThreadsPerSearch = numThreads; }
public int getNumThreadsPerSearch() {
if (numThreadsPerSearch >= 0) return numThreadsPerSearch;
return uniquelyInherited(RankProfile::getNumThreadsPerSearch, n -> n >= 0, "num-threads-per-search")
.orElse(-1);
}
public void setMinHitsPerThread(int minHits) { this.minHitsPerThread = minHits; }
public int getMinHitsPerThread() {
if (minHitsPerThread >= 0) return minHitsPerThread;
return uniquelyInherited(RankProfile::getMinHitsPerThread, n -> n >= 0, "min-hits-per-search").orElse(-1);
}
public void setNumSearchPartitions(int numSearchPartitions) { this.numSearchPartitions = numSearchPartitions; }
public int getNumSearchPartitions() {
if (numSearchPartitions >= 0) return numSearchPartitions;
return uniquelyInherited(RankProfile::getNumSearchPartitions, n -> n >= 0, "num-search-partitions").orElse(-1);
}
public void setTermwiseLimit(double termwiseLimit) { this.termwiseLimit = termwiseLimit; }
public void setPostFilterThreshold(double threshold) { this.postFilterThreshold = threshold; }
public void setApproximateThreshold(double threshold) { this.approximateThreshold = threshold; }
public void setTargetHitsMaxAdjustmentFactor(double factor) { this.targetHitsMaxAdjustmentFactor = factor; }
public OptionalDouble getTermwiseLimit() {
if (termwiseLimit != null) return OptionalDouble.of(termwiseLimit);
return uniquelyInherited(RankProfile::getTermwiseLimit, OptionalDouble::isPresent, "termwise-limit")
.orElse(OptionalDouble.empty());
}
public OptionalDouble getPostFilterThreshold() {
if (postFilterThreshold != null) {
return OptionalDouble.of(postFilterThreshold);
}
return uniquelyInherited(RankProfile::getPostFilterThreshold, OptionalDouble::isPresent, "post-filter-threshold").orElse(OptionalDouble.empty());
}
public OptionalDouble getApproximateThreshold() {
if (approximateThreshold != null) {
return OptionalDouble.of(approximateThreshold);
}
return uniquelyInherited(RankProfile::getApproximateThreshold, OptionalDouble::isPresent, "approximate-threshold").orElse(OptionalDouble.empty());
}
public OptionalDouble getTargetHitsMaxAdjustmentFactor() {
if (targetHitsMaxAdjustmentFactor != null) {
return OptionalDouble.of(targetHitsMaxAdjustmentFactor);
}
return uniquelyInherited(RankProfile::getTargetHitsMaxAdjustmentFactor, OptionalDouble::isPresent, "target-hits-max-adjustment-factor").orElse(OptionalDouble.empty());
}
/** Whether we should ignore the default rank features. Set to null to use inherited */
public void setIgnoreDefaultRankFeatures(Boolean ignoreDefaultRankFeatures) {
this.ignoreDefaultRankFeatures = ignoreDefaultRankFeatures;
}
public Boolean getIgnoreDefaultRankFeatures() {
if (ignoreDefaultRankFeatures != null) return ignoreDefaultRankFeatures;
return uniquelyInherited(RankProfile::getIgnoreDefaultRankFeatures, "ignore-default-rank-features").orElse(false);
}
public void setKeepRankCount(int rerankArraySize) { this.keepRankCount = rerankArraySize; }
public int getKeepRankCount() {
if (keepRankCount >= 0) return keepRankCount;
return uniquelyInherited(RankProfile::getKeepRankCount, c -> c >= 0, "keep-rank-count").orElse(-1);
}
public void setRankScoreDropLimit(double rankScoreDropLimit) { this.rankScoreDropLimit = rankScoreDropLimit; }
public double getRankScoreDropLimit() {
if (rankScoreDropLimit > -Double.MAX_VALUE) return rankScoreDropLimit;
return uniquelyInherited(RankProfile::getRankScoreDropLimit, c -> c > -Double.MAX_VALUE, "rank.score-drop-limit")
.orElse(rankScoreDropLimit);
}
public void setSecondPhaseRankScoreDropLimit(double limit) { this.secondPhaseRankScoreDropLimit = limit; }
public double getSecondPhaseRankScoreDropLimit() {
if (secondPhaseRankScoreDropLimit > -Double.MAX_VALUE) {
return secondPhaseRankScoreDropLimit;
}
return uniquelyInherited(RankProfile::getSecondPhaseRankScoreDropLimit, c -> c > -Double.MAX_VALUE, "second-phase rank-score-drop-limit")
.orElse(secondPhaseRankScoreDropLimit);
}
public void addFunction(String name, List arguments, String expression, boolean inline) {
try {
addFunction(parseRankingExpression(name, arguments, expression), inline);
}
catch (ParseException e) {
throw new IllegalArgumentException("Invalid function '" + name + "'", e);
}
}
/** Adds a function and returns it */
public RankingExpressionFunction addFunction(ExpressionFunction function, boolean inline) {
RankingExpressionFunction rankingExpressionFunction = new RankingExpressionFunction(function, inline);
if (functions.containsKey(function.getName())) {
deployLogger.log(Level.WARNING, "Function '" + function.getName() + "' is defined twice " +
"in rank profile '" + this.name + "'");
}
functions.put(function.getName(), rankingExpressionFunction);
allFunctionsCached = null;
return rankingExpressionFunction;
}
/**
* Adds the type of an input feature consumed by this profile.
* All inputs must either be declared through this or in query profile types,
* otherwise they are assumes to be scalars.
*/
public void addInput(Reference reference, Input input) {
if (inputs.containsKey(reference)) {
Input existing = inputs().get(reference);
if (! input.equals(existing))
throw new IllegalArgumentException("Duplicate input: Has both " + input + " and existing " + existing);
}
inputs.put(reference, input);
}
/** Returns the inputs of this, which also includes all inputs of the parents of this. */
// This is less restrictive than most other constructs in allowing inputs to be defined in all parent profiles
// because inputs are tied closer to functions than the profile itself.
public Map inputs() {
if (inputs.isEmpty() && inherited().isEmpty()) return Map.of();
if (inherited().isEmpty()) return Collections.unmodifiableMap(inputs);
// Combine
Map allInputs = new LinkedHashMap<>();
for (var inheritedProfile : inherited()) {
for (var input : inheritedProfile.inputs().entrySet()) {
Input existing = allInputs.get(input.getKey());
if (existing != null && ! existing.equals(input.getValue()))
throw new IllegalArgumentException(this + " inherits " + inheritedProfile + " which contains " +
input.getValue() + ", but this is already defined as " +
existing + " in another profile this inherits");
allInputs.put(input.getKey(), input.getValue());
}
}
allInputs.putAll(inputs);
return Collections.unmodifiableMap(allInputs);
}
public static class MutateOperation {
public enum Phase { on_match, on_first_phase, on_second_phase, on_summary}
final Phase phase;
final String attribute;
final String operation;
public MutateOperation(Phase phase, String attribute, String operation) {
this.phase = phase;
this.attribute = attribute;
this.operation = operation;
}
}
private final List mutateOperations = new ArrayList<>();
public void addMutateOperation(MutateOperation op) {
mutateOperations.add(op);
String prefix = "vespa.mutate." + op.phase.toString();
addRankProperty(prefix + ".attribute", op.attribute);
addRankProperty(prefix + ".operation", op.operation);
}
public void addMutateOperation(MutateOperation.Phase phase, String attribute, String operation) {
addMutateOperation(new MutateOperation(phase, attribute, operation));
}
public List getMutateOperations() { return mutateOperations; }
public RankingExpressionFunction findFunction(String name) {
RankingExpressionFunction function = functions.get(name);
if (function != null) return function;
return uniquelyInherited(p -> p.findFunction(name), "function '" + name + "'").orElse(null);
}
/** Returns an unmodifiable snapshot of the functions in this */
public Map getFunctions() {
updateCachedFunctions();
return allFunctionsCached.allRankingExpressionFunctions;
}
private ImmutableMap getExpressionFunctions() {
updateCachedFunctions();
return allFunctionsCached.allExpressionFunctions;
}
private void updateCachedFunctions() {
if (needToUpdateFunctionCache()) {
allFunctionsCached = new CachedFunctions(gatherAllFunctions());
}
}
private Map gatherAllFunctions() {
if (functions.isEmpty() && inherited().isEmpty()) return Map.of();
if (inherited().isEmpty()) return Collections.unmodifiableMap(new LinkedHashMap<>(functions));
// Combine
Map allFunctions = new LinkedHashMap<>();
for (var inheritedProfile : inherited()) {
for (var function : inheritedProfile.getFunctions().entrySet()) {
if (allFunctions.containsKey(function.getKey()))
throw new IllegalArgumentException(this + " inherits " + inheritedProfile + " which contains " +
function.getValue() + ", but this function is already " +
"defined in another profile this inherits");
allFunctions.put(function.getKey(), function.getValue());
}
}
allFunctions.putAll(functions);
return Collections.unmodifiableMap(allFunctions);
}
private boolean needToUpdateFunctionCache() {
if (inherited().stream().anyMatch(RankProfile::needToUpdateFunctionCache)) return true;
return allFunctionsCached == null;
}
public Set filterFields() { return filterFields; }
/** Returns all filter fields in this profile and any profile it inherits. */
public Set allFilterFields() {
Set inheritedFilterFields = uniquelyInherited(RankProfile::allFilterFields, fields -> ! fields.isEmpty(),
"filter fields").orElse(Set.of());
if (inheritedFilterFields.isEmpty()) return Collections.unmodifiableSet(filterFields);
Set combined = new LinkedHashSet<>(inheritedFilterFields);
combined.addAll(filterFields());
return combined;
}
private ExpressionFunction parseRankingExpression(String name, List arguments, String expression) throws ParseException {
if (expression.trim().isEmpty())
throw new ParseException("Empty expression");
try (Reader rankingExpressionReader = openRankingExpressionReader(name, expression.trim())) {
return new ExpressionFunction(name, arguments, new RankingExpression(name, rankingExpressionReader));
}
catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) {
ParseException exception = new ParseException("Invalid expression '" + expression.trim());
throw (ParseException)exception.initCause(e);
}
catch (IOException e) {
throw new RuntimeException("IOException parsing ranking expression '" + name + "'", e);
}
}
private static String extractFileName(String expression) {
String fileName = expression.substring("file:".length()).trim();
if ( ! fileName.endsWith(ApplicationPackage.RANKEXPRESSION_NAME_SUFFIX))
fileName = fileName + ApplicationPackage.RANKEXPRESSION_NAME_SUFFIX;
return fileName;
}
private Reader openRankingExpressionReader(String expName, String expression) {
if (!expression.startsWith("file:")) return new StringReader(expression);
String fileName = extractFileName(expression);
Path.fromString(fileName); // No ".."
if (fileName.contains("/")) // See ticket 4102122
throw new IllegalArgumentException("In " + name() + ", " + expName + ", ranking references file '" +
fileName + "' in a different directory, which is not supported.");
return schema.getRankingExpression(fileName);
}
/** Shallow clones this */
@Override
public RankProfile clone() {
try {
RankProfile clone = (RankProfile)super.clone();
clone.rankSettings = new LinkedHashSet<>(this.rankSettings);
clone.matchPhase = this.matchPhase; // hmm?
clone.diversity = this.diversity;
clone.summaryFeatures = summaryFeatures != null ? new LinkedHashSet<>(this.summaryFeatures) : null;
clone.matchFeatures = matchFeatures != null ? new LinkedHashSet<>(this.matchFeatures) : null;
clone.rankFeatures = rankFeatures != null ? new LinkedHashSet<>(this.rankFeatures) : null;
clone.rankProperties = new LinkedHashMap<>(this.rankProperties);
clone.inputs = new LinkedHashMap<>(this.inputs);
clone.functions = new LinkedHashMap<>(this.functions);
clone.allFunctionsCached = null;
clone.filterFields = new HashSet<>(this.filterFields);
clone.constants = new HashMap<>(this.constants);
return clone;
}
catch (CloneNotSupportedException e) {
throw new RuntimeException("Won't happen", e);
}
}
/**
* Returns a copy of this where the content is optimized for execution.
* Compiled profiles should never be modified.
*/
public RankProfile compile(QueryProfileRegistry queryProfiles, ImportedMlModels importedModels) {
try {
RankProfile compiled = this.clone();
compiled.compileThis(queryProfiles, importedModels);
return compiled;
}
catch (IllegalArgumentException e) {
throw new IllegalArgumentException("Rank profile '" + name() + "' is invalid", e);
}
}
private void compileThis(QueryProfileRegistry queryProfiles, ImportedMlModels importedModels) {
checkNameCollisions(getFunctions(), constants());
ExpressionTransforms expressionTransforms = new ExpressionTransforms();
Map featureTypes = featureTypes();
// Function compiling first pass: compile inline functions without resolving other functions
Map inlineFunctions =
compileFunctions(this::getInlineFunctions, queryProfiles, featureTypes, importedModels, Map.of(), expressionTransforms);
firstPhaseRanking = compile(this.getFirstPhase(), queryProfiles, featureTypes, importedModels, constants(), inlineFunctions, expressionTransforms);
secondPhaseRanking = compile(this.getSecondPhase(), queryProfiles, featureTypes, importedModels, constants(), inlineFunctions, expressionTransforms);
globalPhaseRanking = compile(this.getGlobalPhase(), queryProfiles, featureTypes, importedModels, constants(), inlineFunctions, expressionTransforms);
// Function compiling second pass: compile all functions and insert previously compiled inline functions
// TODO: This merges all functions from inherited profiles too and erases inheritance information. Not good.
functions = compileFunctions(this::getFunctions, queryProfiles, featureTypes, importedModels, inlineFunctions, expressionTransforms);
allFunctionsCached = null;
var context = new RankProfileTransformContext(this,
queryProfiles,
featureTypes,
importedModels,
constants(),
inlineFunctions);
var allNormalizers = getFeatureNormalizers();
verifyNoNormalizers("first-phase expression", firstPhaseRanking, allNormalizers, context);
verifyNoNormalizers("second-phase expression", secondPhaseRanking, allNormalizers, context);
for (ReferenceNode mf : getMatchFeatures()) {
verifyNoNormalizers("match-feature " + mf, mf, allNormalizers, context);
}
for (ReferenceNode sf : getSummaryFeatures()) {
verifyNoNormalizers("summary-feature " + sf, sf, allNormalizers, context);
}
if (globalPhaseRanking != null) {
var needInputs = new HashSet();
Set userDeclaredMatchFeatures = new HashSet<>();
for (ReferenceNode mf : getMatchFeatures()) {
userDeclaredMatchFeatures.add(mf.toString());
}
var recorder = new InputRecorder(needInputs);
recorder.alreadyMatchFeatures(userDeclaredMatchFeatures);
recorder.addKnownNormalizers(allNormalizers.keySet());
recorder.process(globalPhaseRanking.function().getBody(), context);
for (var normalizerName : recorder.normalizersUsed()) {
var normalizer = allNormalizers.get(normalizerName);
var func = functions.get(normalizer.input());
if (func != null) {
verifyNoNormalizers("normalizer input " + normalizer.input(), func, allNormalizers, context);
if (! userDeclaredMatchFeatures.contains(normalizer.input())) {
var subRecorder = new InputRecorder(needInputs);
subRecorder.alreadyMatchFeatures(userDeclaredMatchFeatures);
subRecorder.process(func.function().getBody(), context);
}
} else {
needInputs.add(normalizer.input());
}
}
List addIfMissing = new ArrayList<>();
for (String input : needInputs) {
if (input.startsWith("constant(") || input.startsWith("query(")) {
continue;
}
try {
addIfMissing.add(new FeatureList(input));
} catch (com.yahoo.searchlib.rankingexpression.parser.ParseException e) {
throw new IllegalArgumentException("invalid input in global-phase expression: "+input);
}
}
addImplicitMatchFeatures(addIfMissing);
}
}
private void checkNameCollisions(Map functions, Map constants) {
for (var functionEntry : functions.entrySet()) {
if (constants.containsKey(FeatureNames.asConstantFeature(functionEntry.getKey())))
throw new IllegalArgumentException("Cannot have both a constant and function named '" +
functionEntry.getKey() + "'");
}
}
private Map getInlineFunctions() {
return getFunctions().entrySet().stream().filter(x -> x.getValue().inline())
.collect(Collectors.toMap(Map.Entry::getKey, Map.Entry::getValue));
}
private Map compileFunctions(Supplier
© 2015 - 2024 Weber Informatics LLC | Privacy Policy