All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.TopLevelJoinQuery Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.solr.search;

import java.io.IOException;
import java.lang.invoke.MethodHandles;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedSetDocValues;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.ConstantScoreScorer;
import org.apache.lucene.search.ConstantScoreWeight;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.TwoPhaseIterator;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.LongBitSet;
import org.apache.solr.common.SolrException;
import org.apache.solr.schema.IndexSchema;
import org.apache.solr.schema.SchemaField;
import org.apache.solr.search.join.MultiValueTermOrdinalCollector;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * {@link JoinQuery} implementation using global (top-level) DocValues ordinals to efficiently
 * compare values in the "from" and "to" fields.
 */
public class TopLevelJoinQuery extends JoinQuery {
  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());

  public TopLevelJoinQuery(String fromField, String toField, String coreName, Query subQuery) {
    super(fromField, toField, coreName, subQuery);
  }

  @Override
  public Weight createWeight(IndexSearcher searcher, ScoreMode scoreMode, float boost)
      throws IOException {
    if (!(searcher instanceof SolrIndexSearcher)) {
      log.debug(
          "Falling back to JoinQueryWeight because searcher [{}] is not the required SolrIndexSearcher",
          searcher);
      return super.createWeight(searcher, scoreMode, boost);
    }

    final SolrIndexSearcher solrSearcher = (SolrIndexSearcher) searcher;
    final JoinQueryWeight weight =
        new JoinQueryWeight(solrSearcher, ScoreMode.COMPLETE_NO_SCORES, 1.0f);
    final SolrIndexSearcher fromSearcher = weight.fromSearcher;
    final SolrIndexSearcher toSearcher = weight.toSearcher;

    try {
      final SortedSetDocValues topLevelFromDocValues =
          validateAndFetchDocValues(fromSearcher, fromField, "from");
      final SortedSetDocValues topLevelToDocValues =
          validateAndFetchDocValues(toSearcher, toField, "to");
      if (topLevelFromDocValues.getValueCount() == 0 || topLevelToDocValues.getValueCount() == 0) {
        return createNoMatchesWeight(boost);
      }

      final LongBitSet fromOrdBitSet =
          findFieldOrdinalsMatchingQuery(q, fromField, fromSearcher, topLevelFromDocValues);
      final LongBitSet toOrdBitSet = new LongBitSet(topLevelToDocValues.getValueCount());
      final BitsetBounds toBitsetBounds =
          convertFromOrdinalsIntoToField(
              fromOrdBitSet, topLevelFromDocValues, toOrdBitSet, topLevelToDocValues);

      final boolean toMultivalued = toSearcher.getSchema().getFieldOrNull(toField).multiValued();
      return new ConstantScoreWeight(this, boost) {
        @Override
        public Scorer scorer(LeafReaderContext context) throws IOException {
          if (toBitsetBounds.lower == BitsetBounds.NO_MATCHES) {
            return null;
          }

          final DocIdSetIterator toApproximation =
              (toMultivalued)
                  ? context.reader().getSortedSetDocValues(toField)
                  : context.reader().getSortedDocValues(toField);
          if (toApproximation == null) {
            return null;
          }

          final int docBase = context.docBase;
          return new ConstantScoreScorer(
              this,
              this.score(),
              scoreMode,
              new TwoPhaseIterator(toApproximation) {
                @Override
                public boolean matches() throws IOException {
                  final boolean hasDoc =
                      topLevelToDocValues.advanceExact(docBase + approximation.docID());
                  if (hasDoc) {
                    for (long ord = topLevelToDocValues.nextOrd();
                        ord != -1L;
                        ord = topLevelToDocValues.nextOrd()) {
                      if (toOrdBitSet.get(ord)) {
                        return true;
                      }
                    }
                  }
                  return false;
                }

                @Override
                public float matchCost() {
                  return 10.0F;
                }
              });
        }

        @Override
        public boolean isCacheable(LeafReaderContext ctx) {
          return false;
        }
      };
    } catch (IOException e) {
      throw new RuntimeException(e);
    }
  }

  private Weight createNoMatchesWeight(float boost) {
    return new ConstantScoreWeight(this, boost) {
      @Override
      public Scorer scorer(LeafReaderContext context) throws IOException {
        return null;
      }

      @Override
      public boolean isCacheable(LeafReaderContext ctx) {
        return false;
      }
    };
  }

  private SortedSetDocValues validateAndFetchDocValues(
      SolrIndexSearcher solrSearcher, String fieldName, String querySide) throws IOException {
    final IndexSchema schema = solrSearcher.getSchema();
    final SchemaField field = schema.getFieldOrNull(fieldName);
    if (field == null) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          querySide + " field '" + fieldName + "' does not exist");
    }

    if (!field.hasDocValues()) {
      throw new SolrException(
          SolrException.ErrorCode.BAD_REQUEST,
          "'top-level' join queries require both 'from' and 'to' fields to have docValues, but "
              + querySide
              + " field ["
              + fieldName
              + "] does not.");
    }

    final LeafReader leafReader = solrSearcher.getSlowAtomicReader();
    if (field.multiValued()) {
      return DocValues.getSortedSet(leafReader, fieldName);
    }
    return DocValues.singleton(DocValues.getSorted(leafReader, fieldName));
  }

  private static LongBitSet findFieldOrdinalsMatchingQuery(
      Query q, String field, SolrIndexSearcher searcher, SortedSetDocValues docValues)
      throws IOException {
    final LongBitSet fromOrdBitSet = new LongBitSet(docValues.getValueCount());
    final Collector fromCollector =
        new MultiValueTermOrdinalCollector(field, docValues, fromOrdBitSet);

    searcher.search(q, fromCollector);

    return fromOrdBitSet;
  }

  protected BitsetBounds convertFromOrdinalsIntoToField(
      LongBitSet fromOrdBitSet,
      SortedSetDocValues fromDocValues,
      LongBitSet toOrdBitSet,
      SortedSetDocValues toDocValues)
      throws IOException {
    long fromOrdinal = 0;
    long firstToOrd = BitsetBounds.NO_MATCHES;
    long lastToOrd = 0;

    while (fromOrdinal < fromOrdBitSet.length()
        && (fromOrdinal = fromOrdBitSet.nextSetBit(fromOrdinal)) >= 0) {
      final BytesRef fromBytesRef = fromDocValues.lookupOrd(fromOrdinal);
      final long toOrdinal = lookupTerm(toDocValues, fromBytesRef, lastToOrd);
      if (toOrdinal >= 0) {
        toOrdBitSet.set(toOrdinal);
        if (firstToOrd == BitsetBounds.NO_MATCHES) firstToOrd = toOrdinal;
        lastToOrd = toOrdinal;
      }
      fromOrdinal++;
    }

    return new BitsetBounds(firstToOrd, lastToOrd);
  }

  /*
   * Same binary-search based implementation as SortedSetDocValues.lookupTerm(BytesRef), but with an
   * optimization to narrow the search space where possible by providing a startOrd instead of beginning each search
   * at 0.
   */
  private long lookupTerm(SortedSetDocValues docValues, BytesRef key, long startOrd)
      throws IOException {
    long low = startOrd;
    long high = docValues.getValueCount() - 1;

    while (low <= high) {
      long mid = (low + high) >>> 1;
      final BytesRef term = docValues.lookupOrd(mid);
      int cmp = term.compareTo(key);

      if (cmp < 0) {
        low = mid + 1;
      } else if (cmp > 0) {
        high = mid - 1;
      } else {
        return mid; // key found
      }
    }

    return -(low + 1); // key not found.
  }

  protected static class BitsetBounds {
    public static final long NO_MATCHES = -1L;
    public final long lower;
    public final long upper;

    public BitsetBounds(long lower, long upper) {
      this.lower = lower;
      this.upper = upper;
    }
  }

  /**
   * A {@link TopLevelJoinQuery} implementation optimized for when 'from' and 'to' cores and fields
   * match and no ordinal- conversion is necessary.
   */
  static class SelfJoin extends TopLevelJoinQuery {
    public SelfJoin(String joinField, Query subQuery) {
      super(joinField, joinField, null, subQuery);
    }

    @Override
    protected BitsetBounds convertFromOrdinalsIntoToField(
        LongBitSet fromOrdBitSet,
        SortedSetDocValues fromDocValues,
        LongBitSet toOrdBitSet,
        SortedSetDocValues toDocValues)
        throws IOException {

      // 'from' and 'to' ordinals are identical for self-joins.
      toOrdBitSet.or(fromOrdBitSet);

      // Calculate boundary ords used for other optimizations
      final long firstToOrd = toOrdBitSet.nextSetBit(0);
      final long lastToOrd = toOrdBitSet.prevSetBit(toOrdBitSet.length() - 1);
      return new BitsetBounds(firstToOrd, lastToOrd);
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy