org.apache.solr.ltr.feature.FieldValueFeature Maven / Gradle / Ivy
Show all versions of solr-ltr Show documentation
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.solr.ltr.feature;
import java.io.IOException;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;
/**
* This feature returns the value of a field in the current document. The field must have
* stored="true" or docValues="true" properties. Example configuration:
*
*
* {
* "name": "rawHits",
* "class": "org.apache.solr.ltr.feature.FieldValueFeature",
* "params": {
* "field": "hits"
* }
* }
*
*
* There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
* chosen scorer depends on the field attributes.
*
*
FieldValueFeatureScorer (FVFS): used for stored=true if docValues=false
*
*
NumericDocValuesFVFS: used for docValues=true, if docValueType == NUMERIC
*
*
SortedDocValuesFVFS: used for docValues=true, if docValueType == SORTED
*
*
DefaultValueFVFS: used for docValues=true, a fallback scorer that is used on segments where no
* document has a value set in the field of this feature
*
*
Use {@link LegacyFieldValueFeature} for the pre 9.4 behaviour of not using DocValues when
* docValues=true is combined with stored=true.
*/
public class FieldValueFeature extends Feature {
protected boolean useDocValuesForStored = true;
private String field;
private Set fieldAsSet;
public String getField() {
return field;
}
public void setField(String field) {
this.field = field;
fieldAsSet = Collections.singleton(field);
}
@Override
public LinkedHashMap paramsToMap() {
final LinkedHashMap params = defaultParamsToMap();
params.put("field", field);
return params;
}
@Override
protected void validate() throws FeatureException {
if (field == null || field.isEmpty()) {
throw new FeatureException(getClass().getSimpleName() + ": field must be provided");
}
}
public FieldValueFeature(String name, Map params) {
super(name, params);
}
@Override
public FeatureWeight createWeight(
IndexSearcher searcher,
boolean needsScores,
SolrQueryRequest request,
Query originalQuery,
Map efi)
throws IOException {
return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
}
public class FieldValueFeatureWeight extends FeatureWeight {
private final SchemaField schemaField;
public FieldValueFeatureWeight(
IndexSearcher searcher,
SolrQueryRequest request,
Query originalQuery,
Map efi) {
super(FieldValueFeature.this, searcher, request, originalQuery, efi);
if (searcher instanceof SolrIndexSearcher) {
schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
} else { // some tests pass a null or a non-SolrIndexSearcher searcher
schemaField = null;
}
}
/**
* Override this method in sub classes that wish to use not an absolute time but an interval
* such as document age or remaining shelf life relative to a specific date or relative to now.
*
* @param val value of the field
* @return value after transformation
*/
protected long readNumericDocValuesDate(long val) {
return val;
}
/**
* Return a FeatureScorer that uses docValues or storedFields if no docValues are present
*
* @param context the segment this FeatureScorer is working with
* @return FeatureScorer for the current segment and field
* @throws IOException as defined by abstract class Feature
*/
@Override
public FeatureScorer scorer(LeafReaderContext context) throws IOException {
if (schemaField != null
&& (!schemaField.stored() || useDocValuesForStored)
&& schemaField.hasDocValues()) {
final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
final DocValuesType docValuesType =
fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;
if (DocValuesType.NUMERIC.equals(docValuesType)) {
return new NumericDocValuesFieldValueFeatureScorer(
this,
context,
DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS),
schemaField.getType().getNumberType());
} else if (DocValuesType.SORTED.equals(docValuesType)) {
return new SortedDocValuesFieldValueFeatureScorer(
this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
} else if (DocValuesType.NONE.equals(docValuesType)) {
// Using a fallback feature scorer because this segment has no documents with a doc value
// for the current field
return new DefaultValueFieldValueFeatureScorer(
this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
throw new IllegalArgumentException(
"Doc values type " + docValuesType.name() + " of field " + field + " is not supported");
}
return new FieldValueFeatureScorer(
this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
}
/** A FeatureScorer that reads the stored value for a field */
public class FieldValueFeatureScorer extends FeatureScorer {
private final LeafReaderContext context;
public FieldValueFeatureScorer(
FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) {
super(weight, itr);
this.context = context;
}
@Override
public float score() throws IOException {
try {
final Document document = context.reader().document(itr.docID(), fieldAsSet);
final IndexableField indexableField = document.getField(field);
if (indexableField == null) {
return getDefaultValue();
}
final Number number = indexableField.numericValue();
if (number != null) {
return number.floatValue();
} else {
final String string = indexableField.stringValue();
if (string.length() == 1) {
// boolean values in the index are encoded with the
// a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
return 1;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
return 0;
}
}
}
} catch (final IOException e) {
throw new FeatureException(
e.toString() + ": " + "Unable to extract feature for " + name, e);
}
return getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/** A FeatureScorer that reads the numeric docValues for a field */
public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer {
private final NumericDocValues docValues;
private final NumberType numberType;
public NumericDocValuesFieldValueFeatureScorer(
final FeatureWeight weight,
final LeafReaderContext context,
final DocIdSetIterator itr,
final NumberType numberType) {
super(weight, itr);
this.numberType = numberType;
NumericDocValues docValues;
try {
docValues = DocValues.getNumeric(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
}
this.docValues = docValues;
}
@Override
public float score() throws IOException {
if (docValues.advanceExact(itr.docID())) {
return readNumericDocValues();
}
return FieldValueFeature.this.getDefaultValue();
}
/**
* Read the numeric value for a field and convert the different number types to float.
*
* @return The numeric value that the docValues contain for the current document
* @throws IOException if docValues cannot be read
*/
private float readNumericDocValues() throws IOException {
if (NumberType.FLOAT.equals(numberType)) {
// convert float value that was stored as long back to float
return Float.intBitsToFloat((int) docValues.longValue());
} else if (NumberType.DOUBLE.equals(numberType)) {
// handle double value conversion
return (float) Double.longBitsToDouble(docValues.longValue());
} else if (NumberType.DATE.equals(numberType)) {
return readNumericDocValuesDate(docValues.longValue());
}
// just take the long value
return docValues.longValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/** A FeatureScorer that reads the sorted docValues for a field */
public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer {
private final SortedDocValues docValues;
public SortedDocValuesFieldValueFeatureScorer(
final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr) {
super(weight, itr);
SortedDocValues docValues;
try {
docValues = DocValues.getSorted(context.reader(), field);
} catch (IOException e) {
throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
}
this.docValues = docValues;
}
@Override
public float score() throws IOException {
if (docValues.advanceExact(itr.docID())) {
int ord = docValues.ordValue();
return readSortedDocValues(docValues.lookupOrd(ord));
}
return FieldValueFeature.this.getDefaultValue();
}
/**
* Interprets the bytesRef either as true / false token or tries to read it as number string
*
* @param bytesRef the value of the field that should be used as score
* @return the input converted to a number
*/
private float readSortedDocValues(BytesRef bytesRef) {
String string = bytesRef.utf8ToString();
if (string.length() == 1) {
// boolean values in the index are encoded with the
// a single char contained in TRUE_TOKEN or FALSE_TOKEN
// (see BoolField)
if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
return 1;
}
if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
return 0;
}
}
return FieldValueFeature.this.getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
/**
* A FeatureScorer that always returns the default value.
*
* It is used as a fallback for cases when a segment does not have any documents that contain
* doc values for a field. By doing so, we prevent a fallback to the FieldValueFeatureScorer,
* which would also return the default value but in a less performant way because it would first
* try to read the stored fields for the doc (which aren't present).
*/
public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer {
public DefaultValueFieldValueFeatureScorer(
final FeatureWeight weight, final DocIdSetIterator itr) {
super(weight, itr);
}
@Override
public float score() throws IOException {
return FieldValueFeature.this.getDefaultValue();
}
@Override
public float getMaxScore(int upTo) throws IOException {
return Float.POSITIVE_INFINITY;
}
}
}
}