All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.facet.DrillSidewaysScorer Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
package org.apache.lucene.facet;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Collection;
import java.util.Collections;

import org.apache.lucene.index.AtomicReaderContext;
import org.apache.lucene.search.Collector;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.Scorer;
import org.apache.lucene.search.Weight;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.FixedBitSet;

class DrillSidewaysScorer extends Scorer {

  //private static boolean DEBUG = false;

  private final Collector drillDownCollector;

  private final DocsAndCost[] dims;

  // DrillDown DocsEnums:
  private final Scorer baseScorer;

  private final AtomicReaderContext context;

  final boolean scoreSubDocsAtOnce;

  private static final int CHUNK = 2048;
  private static final int MASK = CHUNK-1;

  private int collectDocID = -1;
  private float collectScore;

  DrillSidewaysScorer(Weight w, AtomicReaderContext context, Scorer baseScorer, Collector drillDownCollector,
                      DocsAndCost[] dims, boolean scoreSubDocsAtOnce) {
    super(w);
    this.dims = dims;
    this.context = context;
    this.baseScorer = baseScorer;
    this.drillDownCollector = drillDownCollector;
    this.scoreSubDocsAtOnce = scoreSubDocsAtOnce;
  }

  @Override
  public void score(Collector collector) throws IOException {
    //if (DEBUG) {
    //  System.out.println("\nscore: reader=" + context.reader());
    //}
    //System.out.println("score r=" + context.reader());
    collector.setScorer(this);
    if (drillDownCollector != null) {
      drillDownCollector.setScorer(this);
      drillDownCollector.setNextReader(context);
    }
    for (DocsAndCost dim : dims) {
      dim.sidewaysCollector.setScorer(this);
      dim.sidewaysCollector.setNextReader(context);
    }

    // TODO: if we ever allow null baseScorer ... it will
    // mean we DO score docs out of order ... hmm, or if we
    // change up the order of the conjuntions below
    assert baseScorer != null;

    // Position all scorers to their first matching doc:
    baseScorer.nextDoc();
    int numBits = 0;
    for (DocsAndCost dim : dims) {
      if (dim.disi != null) {
        dim.disi.nextDoc();
      } else if (dim.bits != null) {
        numBits++;
      }
    }

    final int numDims = dims.length;

    Bits[] bits = new Bits[numBits];
    Collector[] bitsSidewaysCollectors = new Collector[numBits];

    DocIdSetIterator[] disis = new DocIdSetIterator[numDims-numBits];
    Collector[] sidewaysCollectors = new Collector[numDims-numBits];
    long drillDownCost = 0;
    int disiUpto = 0;
    int bitsUpto = 0;
    for (int dim=0;dim docID) {
          if (failedCollector != null) {
            // More than one dim fails on this document, so
            // it's neither a hit nor a near-miss; move to
            // next doc:
            docID = baseScorer.nextDoc();
            continue nextDoc;
          } else {
            failedCollector = sidewaysCollectors[i];
          }
        }
      }

      // TODO: for the "non-costly Bits" we really should
      // have passed them down as acceptDocs, but
      // unfortunately we cannot distinguish today betwen
      // "bits() is so costly that you should apply it last"
      // from "bits() is so cheap that you should apply it
      // everywhere down low"

      // Fold in Filter Bits last, since they may be costly:
      for(int i=0;i= 1) {
              missingDims[slot] = 2;
              counts[slot] = 2;
              //if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=2 id=" + context.reader().document(docID).get("id"));
              //}
            } else {
              counts[slot] = 1;
              //if (DEBUG) {
              //  System.out.println("    set docID=" + docID + " missingDim=" + missingDims[slot] + " id=" + context.reader().document(docID).get("id"));
              //}
            }
          }

          docID = disi.nextDoc();
        }
      }

      // After this we can "upgrade" to conjunction, because
      // any doc not seen by either dim 0 or dim 1 cannot be
      // a hit or a near miss:

      //if (DEBUG) {
      //  System.out.println("  baseScorer");
      //}

      // Fold in baseScorer, using advance:
      int filledCount = 0;
      int slot0 = 0;
      while (slot0 < CHUNK && (slot0 = seen.nextSetBit(slot0)) != -1) {
        int ddDocID = docIDs[slot0];
        assert ddDocID != -1;

        int baseDocID = baseScorer.docID();
        if (baseDocID < ddDocID) {
          baseDocID = baseScorer.advance(ddDocID);
        }
        if (baseDocID == ddDocID) {
          //if (DEBUG) {
          //  System.out.println("    keep docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
          //}
          scores[slot0] = baseScorer.score();
          filledSlots[filledCount++] = slot0;
          counts[slot0]++;
        } else {
          //if (DEBUG) {
          //  System.out.println("    no docID=" + ddDocID + " id=" + context.reader().document(ddDocID).get("id"));
          //}
          docIDs[slot0] = -1;

          // TODO: we could jump slot0 forward to the
          // baseDocID ... but we'd need to set docIDs for
          // intervening slots to -1
        }
        slot0++;
      }
      seen.clear(0, CHUNK);

      if (filledCount == 0) {
        if (nextChunkStart >= maxDoc) {
          break;
        }
        nextChunkStart += CHUNK;
        continue;
      }
      
      // TODO: factor this out & share w/ union scorer,
      // except we start from dim=2 instead:
      for (int dim=2;dim= dim) {
              // TODO: single-valued dims will always be true
              // below; we could somehow specialize
              if (missingDims[slot] >= dim) {
                //if (DEBUG) {
                //  System.out.println("    set docID=" + docID + " count=" + (dim+2));
                //}
                missingDims[slot] = dim+1;
                counts[slot] = dim+2;
              } else {
                //if (DEBUG) {
                //  System.out.println("    set docID=" + docID + " missing count=" + (dim+1));
                //}
                counts[slot] = dim+1;
              }
            }

            // TODO: sometimes use advance?
            docID = disi.nextDoc();
          }
        }
      }

      // Collect:
      //if (DEBUG) {
      //  System.out.println("  now collect: " + filledCount + " hits");
      //}
      for (int i=0;i= maxDoc) {
        break;
      }

      nextChunkStart += CHUNK;
    }
  }

  private void collectHit(Collector collector, Collector[] sidewaysCollectors) throws IOException {
    //if (DEBUG) {
    //  System.out.println("      hit");
    //}

    collector.collect(collectDocID);
    if (drillDownCollector != null) {
      drillDownCollector.collect(collectDocID);
    }

    // TODO: we could "fix" faceting of the sideways counts
    // to do this "union" (of the drill down hits) in the
    // end instead:

    // Tally sideways counts:
    for (int dim=0;dim {
    // Iterator for docs matching this dim's filter, or ...
    DocIdSetIterator disi;
    // Random access bits:
    Bits bits;
    Collector sidewaysCollector;
    String dim;

    @Override
    public int compareTo(DocsAndCost other) {
      if (disi == null) {
        if (other.disi == null) {
          return 0;
        } else {
          return 1;
        }
      } else if (other.disi == null) {
        return -1;
      } else if (disi.cost() < other.disi.cost()) {
        return -1;
      } else if (disi.cost() > other.disi.cost()) {
        return 1;
      } else {
        return 0;
      }
    }
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy