org.apache.solr.uninverting.UninvertingReader Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of solr-core Show documentation
Apache Solr Core
There is a newer version: 9.6.1
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.uninverting;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Map;
import java.util.function.Function;

import org.apache.lucene.document.BinaryDocValuesField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.index.BinaryDocValues;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocValuesType;
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.FilterDirectoryReader;
import org.apache.lucene.index.FilterLeafReader;
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.solr.uninverting.FieldCache.CacheEntry;

/**
 * A FilterReader that exposes indexed values as if they also had
 * docvalues.
 * 
 * This is accomplished by "inverting the inverted index" or "uninversion".
 * 

 * The uninversion process happens lazily: upon the first request for the 
 * field's docvalues (e.g. via {@link org.apache.lucene.index.LeafReader#getNumericDocValues(String)} 
 * or similar), it will create the docvalues on-the-fly if needed and cache it,
 * based on the core cache key of the wrapped LeafReader.
 */
public class UninvertingReader extends FilterLeafReader {

  /**
   * Specifies the type of uninversion to apply for the field. 
   */
  public static enum Type {
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.IntPoint})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    INTEGER_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.LongPoint})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    LONG_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.FloatPoint})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    FLOAT_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.lucene.document.DoublePoint})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     */
    DOUBLE_POINT,
    /** 
     * Single-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #INTEGER_POINT} instead.
     */
    @Deprecated
    LEGACY_INTEGER,
    /** 
     * Single-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #LONG_POINT} instead.
     */
    @Deprecated
    LEGACY_LONG,
    /** 
     * Single-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #FLOAT_POINT} instead.
     */
    @Deprecated
    LEGACY_FLOAT,
    /** 
     * Single-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link NumericDocValuesField}.
     * @deprecated Index with points and use {@link #DOUBLE_POINT} instead.
     */
    @Deprecated
    LEGACY_DOUBLE,
    /** 
     * Single-valued Binary, (e.g. indexed with {@link StringField}) 
     * 

     * Fields with this type act as if they were indexed with
     * {@link BinaryDocValuesField}.
     */
    BINARY,
    /** 
     * Single-valued Binary, (e.g. indexed with {@link StringField}) 
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedDocValuesField}.
     */
    SORTED,
    /** 
     * Multi-valued Binary, (e.g. indexed with {@link StringField}) 
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_BINARY,
    /** 
     * Multi-valued Integer, (e.g. indexed with {@link org.apache.solr.legacy.LegacyIntField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_INTEGER,
    /** 
     * Multi-valued Float, (e.g. indexed with {@link org.apache.solr.legacy.LegacyFloatField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_FLOAT,
    /** 
     * Multi-valued Long, (e.g. indexed with {@link org.apache.solr.legacy.LegacyLongField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_LONG,
    /** 
     * Multi-valued Double, (e.g. indexed with {@link org.apache.solr.legacy.LegacyDoubleField})
     * 

     * Fields with this type act as if they were indexed with
     * {@link SortedSetDocValuesField}.
     */
    SORTED_SET_DOUBLE

  }

  /** @see #wrap(DirectoryReader, Function) */
  public static DirectoryReader wrap(DirectoryReader reader, Map mapping) throws IOException {
    return wrap(reader, mapping::get);
  }

  /**
   * Wraps a provided {@link DirectoryReader}. Note that for convenience, the returned reader
   * can be used normally (e.g. passed to {@link DirectoryReader#openIfChanged(DirectoryReader)})
   * and so on. 
   * 
   * @param in input directory reader
   * @param mapper function to map a field name to an uninversion type.  A Null result means to not uninvert.
   * @return a wrapped directory reader
   */
  public static DirectoryReader wrap(DirectoryReader in, Function mapper) throws IOException {
    return new UninvertingDirectoryReader(in, mapper);
  }

  static class UninvertingDirectoryReader extends FilterDirectoryReader {
    final Function mapper;
    
    public UninvertingDirectoryReader(DirectoryReader in, final Function mapper) throws IOException {
      super(in, new FilterDirectoryReader.SubReaderWrapper() {
        @Override
        public LeafReader wrap(LeafReader reader) {
          return UninvertingReader.wrap(reader, mapper);
        }
      });
      this.mapper = mapper;
    }

    @Override
    protected DirectoryReader doWrapDirectoryReader(DirectoryReader in) throws IOException {
      return new UninvertingDirectoryReader(in, mapper);
    }

    // NOTE: delegating the cache helpers is wrong since this wrapper alters the
    // content of the reader, it is only fine to do that because Solr ALWAYS
    // consumes index readers through this wrapper

    @Override
    public CacheHelper getReaderCacheHelper() {
      return in.getReaderCacheHelper();
    }
  }

  /**
   * Create a new UninvertingReader with the specified mapping, wrapped around the input.  It may be deemed that there
   * is no mapping to do, in which case the input is returned.
   * 
   * Expert: This should almost never be used. Use {@link #wrap(DirectoryReader, Function)} instead.
   *
   * @lucene.internal
   */
  public static LeafReader wrap(LeafReader in, Function mapping) {
    boolean wrap = false;

    // Calculate a new FieldInfos that has DocValuesType where we didn't before
    ArrayList newFieldInfos = new ArrayList<>(in.getFieldInfos().size());
    for (FieldInfo fi : in.getFieldInfos()) {
      DocValuesType type = fi.getDocValuesType();
      // fields which currently don't have docValues, but are uninvertable (indexed or points data present)
      if (type == DocValuesType.NONE &&
          (fi.getIndexOptions() != IndexOptions.NONE || (fi.getPointNumBytes() > 0 && fi.getPointDataDimensionCount() == 1))) {
        Type t = mapping.apply(fi.name); // could definitely return null, thus still can't uninvert it
        if (t != null) {
          if (t == Type.INTEGER_POINT || t == Type.LONG_POINT || t == Type.FLOAT_POINT || t == Type.DOUBLE_POINT) {
            // type uses points
            if (fi.getPointDataDimensionCount() == 0) {
              continue;
            }
          } else {
            // type uses inverted index
            if (fi.getIndexOptions() == IndexOptions.NONE) {
              continue;
            }
          }
          switch(t) {
            case INTEGER_POINT:
            case LONG_POINT:
            case FLOAT_POINT:
            case DOUBLE_POINT:
            case LEGACY_INTEGER:
            case LEGACY_LONG:
            case LEGACY_FLOAT:
            case LEGACY_DOUBLE:
              type = DocValuesType.NUMERIC;
              break;
            case BINARY:
              type = DocValuesType.BINARY;
              break;
            case SORTED:
              type = DocValuesType.SORTED;
              break;
            case SORTED_SET_BINARY:
            case SORTED_SET_INTEGER:
            case SORTED_SET_FLOAT:
            case SORTED_SET_LONG:
            case SORTED_SET_DOUBLE:
              type = DocValuesType.SORTED_SET;
              break;
            default:
              throw new AssertionError();
          }
        }
      }
      if (type != fi.getDocValuesType()) { // we changed it
        wrap = true;
        newFieldInfos.add(new FieldInfo(fi.name, fi.number, fi.hasVectors(), fi.omitsNorms(),
            fi.hasPayloads(), fi.getIndexOptions(), type, fi.getDocValuesGen(), fi.attributes(),
            fi.getPointDataDimensionCount(), fi.getPointIndexDimensionCount(), fi.getPointNumBytes(), fi.isSoftDeletesField()));
      } else {
        newFieldInfos.add(fi);
      }
    }
    if (!wrap) {
      return in;
    } else {
      FieldInfos fieldInfos = new FieldInfos(newFieldInfos.toArray(new FieldInfo[newFieldInfos.size()]));
      return new UninvertingReader(in, mapping, fieldInfos);
    }
  }

  final Function mapping;
  final FieldInfos fieldInfos;

  private UninvertingReader(LeafReader in, Function mapping, FieldInfos fieldInfos) {
    super(in);
    this.mapping = mapping;
    this.fieldInfos = fieldInfos;
  }

  @Override
  public FieldInfos getFieldInfos() {
    return fieldInfos;
  }

  @Override
  public NumericDocValues getNumericDocValues(String field) throws IOException {
    NumericDocValues values = super.getNumericDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v != null) {
      switch (v) {
        case INTEGER_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.INT_POINT_PARSER);
        case FLOAT_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.FLOAT_POINT_PARSER);
        case LONG_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LONG_POINT_PARSER);
        case DOUBLE_POINT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.DOUBLE_POINT_PARSER);
        case LEGACY_INTEGER: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_INT_PARSER);
        case LEGACY_FLOAT: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_FLOAT_PARSER);
        case LEGACY_LONG: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_LONG_PARSER);
        case LEGACY_DOUBLE: return FieldCache.DEFAULT.getNumerics(in, field, FieldCache.LEGACY_DOUBLE_PARSER);
        case BINARY:
        case SORTED:
        case SORTED_SET_BINARY:
        case SORTED_SET_DOUBLE:
        case SORTED_SET_FLOAT:
        case SORTED_SET_INTEGER:
        case SORTED_SET_LONG:
          break;
      }
    }
    return null;
  }

  @Override
  public BinaryDocValues getBinaryDocValues(String field) throws IOException {
    BinaryDocValues values = in.getBinaryDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v == Type.BINARY) {
      return FieldCache.DEFAULT.getTerms(in, field);
    } else {
      return null;
    }
  }

  @Override
  public SortedDocValues getSortedDocValues(String field) throws IOException {
    SortedDocValues values = in.getSortedDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v == Type.SORTED) {
      return FieldCache.DEFAULT.getTermsIndex(in, field);
    } else {
      return null;
    }
  }
  
  @Override
  public SortedSetDocValues getSortedSetDocValues(String field) throws IOException {
    SortedSetDocValues values = in.getSortedSetDocValues(field);
    if (values != null) {
      return values;
    }
    Type v = getType(field);
    if (v != null) {
      switch (v) {
        case SORTED_SET_INTEGER:
        case SORTED_SET_FLOAT: 
          return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT32_TERM_PREFIX);
        case SORTED_SET_LONG:
        case SORTED_SET_DOUBLE:
          return FieldCache.DEFAULT.getDocTermOrds(in, field, FieldCache.INT64_TERM_PREFIX);
        case SORTED_SET_BINARY:
          return FieldCache.DEFAULT.getDocTermOrds(in, field, null);
        case BINARY:
        case LEGACY_DOUBLE:
        case LEGACY_FLOAT:
        case LEGACY_INTEGER:
        case LEGACY_LONG:
        case DOUBLE_POINT:
        case FLOAT_POINT:
        case INTEGER_POINT:
        case LONG_POINT:
        case SORTED:
          break;
      }
    }
    return null;
  }

  /** 
   * Returns the field's uninversion type, or null 
   * if the field doesn't exist or doesn't have a mapping.
   */
  private Type getType(String field) {
    return mapping.apply(field);
  }

  // NOTE: delegating the cache helpers is wrong since this wrapper alters the
  // content of the reader, it is only fine to do that because Solr ALWAYS
  // consumes index readers through this wrapper

  @Override
  public CacheHelper getCoreCacheHelper() {
    return in.getCoreCacheHelper();
  }

  @Override
  public CacheHelper getReaderCacheHelper() {
    return in.getReaderCacheHelper();
  }

  @Override
  public String toString() {
    return "Uninverting(" + in.toString() + ")";
  }
  
  /** 
   * Return information about the backing cache
   * @lucene.internal 
   */
  public static FieldCacheStats getUninvertedStats() {
    CacheEntry[] entries = FieldCache.DEFAULT.getCacheEntries();
    long totalBytesUsed = 0;
    String[] info = new String[entries.length];
    for (int i = 0; i < entries.length; i++) {
      info[i] = entries[i].toString();
      totalBytesUsed += entries[i].getValue().ramBytesUsed();
    }
    String totalSize = RamUsageEstimator.humanReadableUnits(totalBytesUsed);
    return new FieldCacheStats(totalSize, info);
  }

  public static int getUninvertedStatsSize() {
    return FieldCache.DEFAULT.getCacheEntries().length;
  }

  /**
   * Return information about the backing cache
   * @lucene.internal
   */
  public static class FieldCacheStats {
    public String totalSize;
    public String[] info;

    public FieldCacheStats(String totalSize, String[] info) {
      this.totalSize = totalSize;
      this.info = info;
    }

  }
}