All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.ltr.feature.FieldValueFeature Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.ltr.feature;

import java.io.IOException;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * This feature returns the value of a field in the current document. The field must have
 * stored="true" or docValues="true" properties. Example configuration:
 *
 * 
 * {
 *   "name":  "rawHits",
 *   "class": "org.apache.solr.ltr.feature.FieldValueFeature",
 *   "params": {
 *     "field": "hits"
 *   }
 * }
 * 
* *

There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The * chosen scorer depends on the field attributes. * *

FieldValueFeatureScorer (FVFS): used for stored=true if docValues=false * *

NumericDocValuesFVFS: used for docValues=true, if docValueType == NUMERIC * *

SortedDocValuesFVFS: used for docValues=true, if docValueType == SORTED * *

DefaultValueFVFS: used for docValues=true, a fallback scorer that is used on segments where no * document has a value set in the field of this feature * *

Use {@link LegacyFieldValueFeature} for the pre 9.4 behaviour of not using DocValues when * docValues=true is combined with stored=true. */ public class FieldValueFeature extends Feature { protected boolean useDocValuesForStored = true; private String field; private Set fieldAsSet; public String getField() { return field; } public void setField(String field) { this.field = field; fieldAsSet = Collections.singleton(field); } @Override public LinkedHashMap paramsToMap() { final LinkedHashMap params = defaultParamsToMap(); params.put("field", field); return params; } @Override protected void validate() throws FeatureException { if (field == null || field.isEmpty()) { throw new FeatureException(getClass().getSimpleName() + ": field must be provided"); } } public FieldValueFeature(String name, Map params) { super(name, params); } @Override public FeatureWeight createWeight( IndexSearcher searcher, boolean needsScores, SolrQueryRequest request, Query originalQuery, Map efi) throws IOException { return new FieldValueFeatureWeight(searcher, request, originalQuery, efi); } public class FieldValueFeatureWeight extends FeatureWeight { private final SchemaField schemaField; public FieldValueFeatureWeight( IndexSearcher searcher, SolrQueryRequest request, Query originalQuery, Map efi) { super(FieldValueFeature.this, searcher, request, originalQuery, efi); if (searcher instanceof SolrIndexSearcher) { schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); } else { // some tests pass a null or a non-SolrIndexSearcher searcher schemaField = null; } } /** * Override this method in sub classes that wish to use not an absolute time but an interval * such as document age or remaining shelf life relative to a specific date or relative to now. * * @param val value of the field * @return value after transformation */ protected long readNumericDocValuesDate(long val) { return val; } /** * Return a FeatureScorer that uses docValues or storedFields if no docValues are present * * @param context the segment this FeatureScorer is working with * @return FeatureScorer for the current segment and field * @throws IOException as defined by abstract class Feature */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { if (schemaField != null && (!schemaField.stored() || useDocValuesForStored) && schemaField.hasDocValues()) { final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); final DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; if (DocValuesType.NUMERIC.equals(docValuesType)) { return new NumericDocValuesFieldValueFeatureScorer( this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType().getNumberType()); } else if (DocValuesType.SORTED.equals(docValuesType)) { return new SortedDocValuesFieldValueFeatureScorer( this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } else if (DocValuesType.NONE.equals(docValuesType)) { // Using a fallback feature scorer because this segment has no documents with a doc value // for the current field return new DefaultValueFieldValueFeatureScorer( this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } throw new IllegalArgumentException( "Doc values type " + docValuesType.name() + " of field " + field + " is not supported"); } return new FieldValueFeatureScorer( this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); } /** A FeatureScorer that reads the stored value for a field */ public class FieldValueFeatureScorer extends FeatureScorer { private final LeafReaderContext context; public FieldValueFeatureScorer( FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) { super(weight, itr); this.context = context; } @Override public float score() throws IOException { try { final Document document = context.reader().document(itr.docID(), fieldAsSet); final IndexableField indexableField = document.getField(field); if (indexableField == null) { return getDefaultValue(); } final Number number = indexableField.numericValue(); if (number != null) { return number.floatValue(); } else { final String string = indexableField.stringValue(); if (string.length() == 1) { // boolean values in the index are encoded with the // a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { return 1; } if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { return 0; } } } } catch (final IOException e) { throw new FeatureException( e.toString() + ": " + "Unable to extract feature for " + name, e); } return getDefaultValue(); } @Override public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } /** A FeatureScorer that reads the numeric docValues for a field */ public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer { private final NumericDocValues docValues; private final NumberType numberType; public NumericDocValuesFieldValueFeatureScorer( final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr, final NumberType numberType) { super(weight, itr); this.numberType = numberType; NumericDocValues docValues; try { docValues = DocValues.getNumeric(context.reader(), field); } catch (IOException e) { throw new IllegalArgumentException("Could not read numeric docValues for field " + field); } this.docValues = docValues; } @Override public float score() throws IOException { if (docValues.advanceExact(itr.docID())) { return readNumericDocValues(); } return FieldValueFeature.this.getDefaultValue(); } /** * Read the numeric value for a field and convert the different number types to float. * * @return The numeric value that the docValues contain for the current document * @throws IOException if docValues cannot be read */ private float readNumericDocValues() throws IOException { if (NumberType.FLOAT.equals(numberType)) { // convert float value that was stored as long back to float return Float.intBitsToFloat((int) docValues.longValue()); } else if (NumberType.DOUBLE.equals(numberType)) { // handle double value conversion return (float) Double.longBitsToDouble(docValues.longValue()); } else if (NumberType.DATE.equals(numberType)) { return readNumericDocValuesDate(docValues.longValue()); } // just take the long value return docValues.longValue(); } @Override public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } /** A FeatureScorer that reads the sorted docValues for a field */ public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer { private final SortedDocValues docValues; public SortedDocValuesFieldValueFeatureScorer( final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr) { super(weight, itr); SortedDocValues docValues; try { docValues = DocValues.getSorted(context.reader(), field); } catch (IOException e) { throw new IllegalArgumentException("Could not read sorted docValues for field " + field); } this.docValues = docValues; } @Override public float score() throws IOException { if (docValues.advanceExact(itr.docID())) { int ord = docValues.ordValue(); return readSortedDocValues(docValues.lookupOrd(ord)); } return FieldValueFeature.this.getDefaultValue(); } /** * Interprets the bytesRef either as true / false token or tries to read it as number string * * @param bytesRef the value of the field that should be used as score * @return the input converted to a number */ private float readSortedDocValues(BytesRef bytesRef) { String string = bytesRef.utf8ToString(); if (string.length() == 1) { // boolean values in the index are encoded with the // a single char contained in TRUE_TOKEN or FALSE_TOKEN // (see BoolField) if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) { return 1; } if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) { return 0; } } return FieldValueFeature.this.getDefaultValue(); } @Override public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } /** * A FeatureScorer that always returns the default value. * *

It is used as a fallback for cases when a segment does not have any documents that contain * doc values for a field. By doing so, we prevent a fallback to the FieldValueFeatureScorer, * which would also return the default value but in a less performant way because it would first * try to read the stored fields for the doc (which aren't present). */ public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer { public DefaultValueFieldValueFeatureScorer( final FeatureWeight weight, final DocIdSetIterator itr) { super(weight, itr); } @Override public float score() throws IOException { return FieldValueFeature.this.getDefaultValue(); } @Override public float getMaxScore(int upTo) throws IOException { return Float.POSITIVE_INFINITY; } } } }





© 2015 - 2025 Weber Informatics LLC | Privacy Policy