org.apache.solr.ltr.feature.FieldValueFeature Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of solr-ltr Show documentation
Apache Solr Learning to Rank Package
There is a newer version: 9.7.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.ltr.feature;

import java.io.IOException;
import java.util.Collections;
import java.util.LinkedHashMap;
import java.util.Map;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.BytesRef;
import org.apache.solr.request.SolrQueryRequest;
import org.apache.solr.schema.BoolField;
import org.apache.solr.schema.NumberType;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.SolrIndexSearcher;

/**
 * This feature returns the value of a field in the current document. The field must have
 * stored="true" or docValues="true" properties. Example configuration:
 *
 *  * {
 *   "name":  "rawHits",
 *   "class": "org.apache.solr.ltr.feature.FieldValueFeature",
 *   "params": {
 *     "field": "hits"
 *   }
 * }
 * 
 *
 * There are 4 different types of FeatureScorers that a FieldValueFeatureWeight may use. The
 * chosen scorer depends on the field attributes.
 *
 * 
FieldValueFeatureScorer (FVFS): used for stored=true if docValues=false
 *
 * 
NumericDocValuesFVFS: used for docValues=true, if docValueType == NUMERIC
 *
 * 
SortedDocValuesFVFS: used for docValues=true, if docValueType == SORTED
 *
 * 
DefaultValueFVFS: used for docValues=true, a fallback scorer that is used on segments where no
 * document has a value set in the field of this feature
 *
 * 
Use {@link LegacyFieldValueFeature} for the pre 9.4 behaviour of not using DocValues when
 * docValues=true is combined with stored=true.
 */
public class FieldValueFeature extends Feature {

  protected boolean useDocValuesForStored = true;

  private String field;
  private Set fieldAsSet;

  public String getField() {
    return field;
  }

  public void setField(String field) {
    this.field = field;
    fieldAsSet = Collections.singleton(field);
  }

  @Override
  public LinkedHashMap paramsToMap() {
    final LinkedHashMap params = defaultParamsToMap();
    params.put("field", field);
    return params;
  }

  @Override
  protected void validate() throws FeatureException {
    if (field == null || field.isEmpty()) {
      throw new FeatureException(getClass().getSimpleName() + ": field must be provided");
    }
  }

  public FieldValueFeature(String name, Map params) {
    super(name, params);
  }

  @Override
  public FeatureWeight createWeight(
      IndexSearcher searcher,
      boolean needsScores,
      SolrQueryRequest request,
      Query originalQuery,
      Map efi)
      throws IOException {
    return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
  }

  public class FieldValueFeatureWeight extends FeatureWeight {
    private final SchemaField schemaField;

    public FieldValueFeatureWeight(
        IndexSearcher searcher,
        SolrQueryRequest request,
        Query originalQuery,
        Map efi) {
      super(FieldValueFeature.this, searcher, request, originalQuery, efi);
      if (searcher instanceof SolrIndexSearcher) {
        schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field);
      } else { // some tests pass a null or a non-SolrIndexSearcher searcher
        schemaField = null;
      }
    }

    /**
     * Override this method in sub classes that wish to use not an absolute time but an interval
     * such as document age or remaining shelf life relative to a specific date or relative to now.
     *
     * @param val value of the field
     * @return value after transformation
     */
    protected long readNumericDocValuesDate(long val) {
      return val;
    }

    /**
     * Return a FeatureScorer that uses docValues or storedFields if no docValues are present
     *
     * @param context the segment this FeatureScorer is working with
     * @return FeatureScorer for the current segment and field
     * @throws IOException as defined by abstract class Feature
     */
    @Override
    public FeatureScorer scorer(LeafReaderContext context) throws IOException {
      if (schemaField != null
          && (!schemaField.stored() || useDocValuesForStored)
          && schemaField.hasDocValues()) {

        final FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field);
        final DocValuesType docValuesType =
            fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE;

        if (DocValuesType.NUMERIC.equals(docValuesType)) {
          return new NumericDocValuesFieldValueFeatureScorer(
              this,
              context,
              DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS),
              schemaField.getType().getNumberType());
        } else if (DocValuesType.SORTED.equals(docValuesType)) {
          return new SortedDocValuesFieldValueFeatureScorer(
              this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
        } else if (DocValuesType.NONE.equals(docValuesType)) {
          // Using a fallback feature scorer because this segment has no documents with a doc value
          // for the current field
          return new DefaultValueFieldValueFeatureScorer(
              this, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
        }
        throw new IllegalArgumentException(
            "Doc values type " + docValuesType.name() + " of field " + field + " is not supported");
      }
      return new FieldValueFeatureScorer(
          this, context, DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
    }

    /** A FeatureScorer that reads the stored value for a field */
    public class FieldValueFeatureScorer extends FeatureScorer {

      private final LeafReaderContext context;

      public FieldValueFeatureScorer(
          FeatureWeight weight, LeafReaderContext context, DocIdSetIterator itr) {
        super(weight, itr);
        this.context = context;
      }

      @Override
      public float score() throws IOException {

        try {
          final Document document = context.reader().document(itr.docID(), fieldAsSet);
          final IndexableField indexableField = document.getField(field);
          if (indexableField == null) {
            return getDefaultValue();
          }
          final Number number = indexableField.numericValue();
          if (number != null) {
            return number.floatValue();
          } else {
            final String string = indexableField.stringValue();
            if (string.length() == 1) {
              // boolean values in the index are encoded with the
              // a single char contained in TRUE_TOKEN or FALSE_TOKEN
              // (see BoolField)
              if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
                return 1;
              }
              if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
                return 0;
              }
            }
          }
        } catch (final IOException e) {
          throw new FeatureException(
              e.toString() + ": " + "Unable to extract feature for " + name, e);
        }
        return getDefaultValue();
      }

      @Override
      public float getMaxScore(int upTo) throws IOException {
        return Float.POSITIVE_INFINITY;
      }
    }

    /** A FeatureScorer that reads the numeric docValues for a field */
    public final class NumericDocValuesFieldValueFeatureScorer extends FeatureScorer {
      private final NumericDocValues docValues;
      private final NumberType numberType;

      public NumericDocValuesFieldValueFeatureScorer(
          final FeatureWeight weight,
          final LeafReaderContext context,
          final DocIdSetIterator itr,
          final NumberType numberType) {
        super(weight, itr);
        this.numberType = numberType;

        NumericDocValues docValues;
        try {
          docValues = DocValues.getNumeric(context.reader(), field);
        } catch (IOException e) {
          throw new IllegalArgumentException("Could not read numeric docValues for field " + field);
        }
        this.docValues = docValues;
      }

      @Override
      public float score() throws IOException {
        if (docValues.advanceExact(itr.docID())) {
          return readNumericDocValues();
        }
        return FieldValueFeature.this.getDefaultValue();
      }

      /**
       * Read the numeric value for a field and convert the different number types to float.
       *
       * @return The numeric value that the docValues contain for the current document
       * @throws IOException if docValues cannot be read
       */
      private float readNumericDocValues() throws IOException {
        if (NumberType.FLOAT.equals(numberType)) {
          // convert float value that was stored as long back to float
          return Float.intBitsToFloat((int) docValues.longValue());
        } else if (NumberType.DOUBLE.equals(numberType)) {
          // handle double value conversion
          return (float) Double.longBitsToDouble(docValues.longValue());
        } else if (NumberType.DATE.equals(numberType)) {
          return readNumericDocValuesDate(docValues.longValue());
        }
        // just take the long value
        return docValues.longValue();
      }

      @Override
      public float getMaxScore(int upTo) throws IOException {
        return Float.POSITIVE_INFINITY;
      }
    }

    /** A FeatureScorer that reads the sorted docValues for a field */
    public final class SortedDocValuesFieldValueFeatureScorer extends FeatureScorer {
      private final SortedDocValues docValues;

      public SortedDocValuesFieldValueFeatureScorer(
          final FeatureWeight weight, final LeafReaderContext context, final DocIdSetIterator itr) {
        super(weight, itr);

        SortedDocValues docValues;
        try {
          docValues = DocValues.getSorted(context.reader(), field);
        } catch (IOException e) {
          throw new IllegalArgumentException("Could not read sorted docValues for field " + field);
        }
        this.docValues = docValues;
      }

      @Override
      public float score() throws IOException {
        if (docValues.advanceExact(itr.docID())) {
          int ord = docValues.ordValue();
          return readSortedDocValues(docValues.lookupOrd(ord));
        }
        return FieldValueFeature.this.getDefaultValue();
      }

      /**
       * Interprets the bytesRef either as true / false token or tries to read it as number string
       *
       * @param bytesRef the value of the field that should be used as score
       * @return the input converted to a number
       */
      private float readSortedDocValues(BytesRef bytesRef) {
        String string = bytesRef.utf8ToString();
        if (string.length() == 1) {
          // boolean values in the index are encoded with the
          // a single char contained in TRUE_TOKEN or FALSE_TOKEN
          // (see BoolField)
          if (string.charAt(0) == BoolField.TRUE_TOKEN[0]) {
            return 1;
          }
          if (string.charAt(0) == BoolField.FALSE_TOKEN[0]) {
            return 0;
          }
        }
        return FieldValueFeature.this.getDefaultValue();
      }

      @Override
      public float getMaxScore(int upTo) throws IOException {
        return Float.POSITIVE_INFINITY;
      }
    }

    /**
     * A FeatureScorer that always returns the default value.
     *
     * It is used as a fallback for cases when a segment does not have any documents that contain
     * doc values for a field. By doing so, we prevent a fallback to the FieldValueFeatureScorer,
     * which would also return the default value but in a less performant way because it would first
     * try to read the stored fields for the doc (which aren't present).
     */
    public final class DefaultValueFieldValueFeatureScorer extends FeatureScorer {
      public DefaultValueFieldValueFeatureScorer(
          final FeatureWeight weight, final DocIdSetIterator itr) {
        super(weight, itr);
      }

      @Override
      public float score() throws IOException {
        return FieldValueFeature.this.getDefaultValue();
      }

      @Override
      public float getMaxScore(int upTo) throws IOException {
        return Float.POSITIVE_INFINITY;
      }
    }
  }
}