org.apache.solr.handler.clustering.EngineParameters Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-clustering Show documentation
Show all versions of solr-clustering Show documentation
Apache Solr (module: clustering)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.handler.clustering;
import java.util.Arrays;
import java.util.LinkedHashMap;
import java.util.LinkedHashSet;
import java.util.Objects;
import java.util.Set;
import org.apache.solr.common.params.SolrParams;
/**
* {@link Engine} configuration parameters (and other parameters that may tweak clustering
* algorithms on a per-request basis).
*
* @lucene.experimental
*/
public final class EngineParameters implements Cloneable {
/** Common prefix for configuration of engine settings. */
private static final String PARAM_PREFIX = "clustering.";
/**
* @see #algorithmName()
*/
public static final String PARAM_ALGORITHM = PARAM_PREFIX + "algorithm";
/**
* @see #maxLabels()
*/
public static final String PARAM_MAX_LABELS = PARAM_PREFIX + "maxLabels";
/**
* @see #includeSubclusters()
*/
public static final String PARAM_INCLUDE_SUBCLUSTERS = PARAM_PREFIX + "includeSubclusters";
/**
* @see #includeOtherTopics()
*/
public static final String PARAM_INCLUDE_OTHER_TOPICS = PARAM_PREFIX + "includeOtherTopics";
/**
* @see #language()
*/
public static final String PARAM_LANGUAGE = PARAM_PREFIX + "language";
/**
* @see #languageField()
*/
public static final String PARAM_LANGUAGE_FIELD = PARAM_PREFIX + "languageField";
/**
* @see #resources()
*/
public static final String PARAM_RESOURCES = PARAM_PREFIX + "resources";
/**
* @see #fields()
*/
public static final String PARAM_FIELDS = PARAM_PREFIX + "fields";
/**
* @see #preferQueryContext()
*/
public static final String PARAM_PREFER_QUERY_CONTEXT = PARAM_PREFIX + "preferQueryContext";
/**
* @see #contextSize()
*/
public static final String PARAM_CONTEXT_SIZE = PARAM_PREFIX + "contextSize";
/**
* @see #contextCount()
*/
public static final String PARAM_CONTEXT_COUNT = PARAM_PREFIX + "contextCount";
/**
* @see #PARAM_MAX_LABELS
*/
private int maxLabels = Integer.MAX_VALUE;
/**
* @see #PARAM_INCLUDE_SUBCLUSTERS
*/
private boolean includeSubclusters = true;
/**
* @see #PARAM_INCLUDE_OTHER_TOPICS
*/
private boolean includeOtherTopics = true;
/**
* @see #PARAM_ALGORITHM
*/
private String algorithmName;
/**
* @see #PARAM_RESOURCES
*/
private String resources;
/**
* @see #PARAM_LANGUAGE
*/
private String language = "English";
/**
* @see #PARAM_LANGUAGE_FIELD
*/
private String languageField;
/**
* @see #PARAM_PREFER_QUERY_CONTEXT
*/
private boolean preferQueryContext;
/**
* @see #PARAM_CONTEXT_SIZE
*/
private int contextSize = 80 * 4;
/**
* @see #PARAM_CONTEXT_COUNT
*/
private int contextCount = 3;
/**
* @see #PARAM_FIELDS
*/
private LinkedHashSet fields = new LinkedHashSet<>();
/** Non-engine configuration parameters (algorithm parameters). */
private LinkedHashMap otherParameters = new LinkedHashMap<>();
/**
* Unique-value document identifier field. This is required for clustering since clusters only
* reference documents by their ID field's value.
*/
private String docIdField;
EngineParameters(SolrParams params) {
extractFrom(params);
}
/** Extract parameter values from the given {@link SolrParams}. */
private EngineParameters extractFrom(SolrParams params) {
params.stream()
.forEachOrdered(
e -> {
switch (e.getKey()) {
case PARAM_MAX_LABELS:
maxLabels = params.getInt(PARAM_MAX_LABELS);
break;
case PARAM_INCLUDE_SUBCLUSTERS:
includeSubclusters = params.getBool(PARAM_INCLUDE_SUBCLUSTERS);
break;
case PARAM_INCLUDE_OTHER_TOPICS:
includeOtherTopics = params.getBool(PARAM_INCLUDE_OTHER_TOPICS);
break;
case PARAM_ALGORITHM:
algorithmName = params.get(PARAM_ALGORITHM);
break;
case PARAM_RESOURCES:
resources = params.get(PARAM_RESOURCES);
break;
case PARAM_LANGUAGE:
language = params.get(PARAM_LANGUAGE);
break;
case PARAM_LANGUAGE_FIELD:
languageField = params.get(PARAM_LANGUAGE_FIELD);
break;
case PARAM_PREFER_QUERY_CONTEXT:
preferQueryContext = params.getBool(PARAM_PREFER_QUERY_CONTEXT);
break;
case PARAM_CONTEXT_COUNT:
contextCount = params.getPrimitiveInt(PARAM_CONTEXT_COUNT);
break;
case PARAM_CONTEXT_SIZE:
contextSize = params.getPrimitiveInt(PARAM_CONTEXT_SIZE);
break;
case PARAM_FIELDS:
fields.addAll(Arrays.asList(params.get(PARAM_FIELDS).split("[,]\\s*")));
break;
default:
// Unrecognized parameter. Preserve it.
String[] value = e.getValue();
if (value != null) {
if (value.length == 1) {
otherParameters.put(e.getKey(), value[0]);
} else {
otherParameters.put(e.getKey(), String.join(", ", value));
}
}
break;
}
});
return this;
}
/**
* @return Maximum number of returned cluster labels (even if the algorithm returns more).
*/
int maxLabels() {
return maxLabels;
}
/**
* @return If {@code true}, include subclusters in response (if the algorithm produces
* hierarchical clustering).
*/
boolean includeSubclusters() {
return includeSubclusters;
}
/**
* @return If {@code true}, include a synthetic cluster called "Other Topics" that consists of all
* documents not assigned to any other cluster.
*/
boolean includeOtherTopics() {
return includeOtherTopics;
}
/**
* @return Name of the clustering algorithm to use (as loaded via the service * extension point
* {@link org.carrot2.clustering.ClusteringAlgorithm}).
*/
String algorithmName() {
return algorithmName;
}
/**
* @return Return Solr component-configuration relative language resources path.
*/
String resources() {
return resources;
}
/**
* @return Name of the default language to use for clustering. The corresponding {@link
* org.carrot2.language.LanguageComponents} must be available (loaded via service provider
* extension).
*/
String language() {
return language;
}
/**
* @return Name of the field that carries each document's language. {@code null} value means all
* documents will be clustered according to the default {@link #language()}. If not {@code
* null} and the document's field has a missing value, it will be clustered using the default
* {@link #language()} as well.
*/
String languageField() {
return languageField;
}
/**
* @return Names of all fields whose textual content will be passed to the clustering engine.
* Comma or space separated.
*/
Set fields() {
return fields;
}
/**
* @return Returns {@code true} if clustering should try to extract context fragments around the
* matching query regions rather than use full field content. Such context snippets typically
* cluster well because they carry a more compact and query-related information.
*/
boolean preferQueryContext() {
return preferQueryContext;
}
/**
* @return Returns the maximum query context window to use if {@link #preferQueryContext()} is
* {@code true}.
*/
int contextSize() {
return contextSize;
}
/**
* @return Returns the maximum number of different, non-contiguous query context snippets from a
* single field if {@link #preferQueryContext()} is {@code true}.
*/
int contextCount() {
return contextCount;
}
LinkedHashMap otherParameters() {
return otherParameters;
}
@Override
protected EngineParameters clone() {
try {
EngineParameters clone = (EngineParameters) super.clone();
clone.otherParameters = new LinkedHashMap<>(this.otherParameters);
clone.fields.addAll(this.fields);
return clone;
} catch (CloneNotSupportedException e) {
throw new RuntimeException(e);
}
}
/**
* @return Return a copy of the argument with any parameters present in {@code params} overriding
* this object defaults.
*/
EngineParameters derivedFrom(SolrParams params) {
EngineParameters cloned = this.clone();
cloned.extractFrom(params);
return cloned;
}
String docIdField() {
return Objects.requireNonNull(docIdField);
}
void setDocIdField(String docIdField) {
this.docIdField = Objects.requireNonNull(docIdField);
}
Set getFieldsToLoad() {
Set fields = new LinkedHashSet<>(fields());
fields.add(docIdField());
String languageField = languageField();
if (languageField != null && !languageField.isBlank()) {
fields.add(languageField);
}
return fields;
}
}