All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.solr.search.DocSetCollector Maven / Gradle / Ivy

There is a newer version: 9.7.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.solr.search;

import java.io.IOException;
import java.util.ArrayList;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.search.ScoreMode;
import org.apache.lucene.search.SimpleCollector;
import org.apache.lucene.util.FixedBitSet;

/**
 *
 */

public class DocSetCollector extends SimpleCollector {
  int pos=0;
  FixedBitSet bits;
  final int maxDoc;
  final int smallSetSize;
  int base;

  // in case there aren't that many hits, we may not want a very sparse
  // bit array.  Optimistically collect the first few docs in an array
  // in case there are only a few.
  final ExpandingIntArray scratch;

  public DocSetCollector(int maxDoc) {
    this(DocSetUtil.smallSetSize(maxDoc), maxDoc);
  }

  public DocSetCollector(int smallSetSize, int maxDoc) {
    this.smallSetSize = smallSetSize;
    this.maxDoc = maxDoc;
    this.scratch = new ExpandingIntArray(smallSetSize);
  }

  @Override
  public void collect(int doc) throws IOException {
    doc += base;
    // optimistically collect the first docs in an array
    // in case the total number will be small enough to represent
    // as a small set like SortedIntDocSet instead...
    // Storing in this array will be quicker to convert
    // than scanning through a potentially huge bit vector.
    // FUTURE: when search methods all start returning docs in order, maybe
    // we could have a ListDocSet() and use the collected array directly.
    if (pos < smallSetSize) {
      scratch.add(pos, doc);
    } else {
      // this conditional could be removed if BitSet was preallocated, but that
      // would take up more memory, and add more GC time...
      if (bits==null) bits = new FixedBitSet(maxDoc);
      bits.set(doc);
    }

    pos++;
  }

  /** The number of documents that have been collected */
  public int size() {
    return pos;
  }

  public DocSet getDocSet() {
    if (pos<=scratch.size()) {
      // assumes docs were collected in sorted order!
      return new SortedIntDocSet(scratch.toArray(), pos);
//    } else if (pos == maxDoc) {
//      return new MatchAllDocSet(maxDoc);  // a bunch of code currently relies on BitDocSet (either explicitly, or implicitly for performance)
    } else {
      // set the bits for ids that were collected in the array
      scratch.copyTo(bits);
      return new BitDocSet(bits,pos);
    }
  }

  @Override
  public void setScorer(Scorable scorer) throws IOException {
  }

  @Override
  public ScoreMode scoreMode() {
    return ScoreMode.COMPLETE_NO_SCORES;
  }

  @Override
  protected void doSetNextReader(LeafReaderContext context) throws IOException {
    this.base = context.docBase;
  }

  protected static class ExpandingIntArray {
    private static final int[] EMPTY = new int[0];
    private int[] currentAddArray = null;
    private int indexForNextAddInCurrentAddArray = 0;
    private int size = 0;
    private final int smallSetSize;
    private ArrayList arrays;

    public ExpandingIntArray(int smallSetSize) {
      this.smallSetSize = smallSetSize;
      this.currentAddArray = EMPTY;
    }

    private void addNewArray() {
      int arrSize = Math.max(10, currentAddArray.length << 1);
      arrSize = Math.min(arrSize, smallSetSize - size); // max out at the smallSetSize
      this.currentAddArray = new int[arrSize];
      if (arrays == null) {
        arrays = new ArrayList<>();
      }
      arrays.add(this.currentAddArray);
      indexForNextAddInCurrentAddArray = 0;
      // System.out.println("### ALLOCATED " + this + " " + arrSize + " smallSetSize="+smallSetSize + " left=" + (smallSetSize-size));
    }

    public void add(int index, int value) {
      // assert index == size; // only appending is supported
      if (indexForNextAddInCurrentAddArray >= currentAddArray.length) {
        addNewArray();
      }
      currentAddArray[indexForNextAddInCurrentAddArray++] = value;
      size++;
    }

    public void copyTo(FixedBitSet bits) {
      if (size > 0) {
        int resultPos = 0;
        for (int i = 0; i < arrays.size(); i++) {
          int[] srcArray = arrays.get(i);
          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
          for (int j = 0; j < intsToCopy; j++) {
            bits.set(srcArray[j]);
          }
          resultPos += intsToCopy;
        }
        assert resultPos == size;
      }
    }

    public int[] toArray() {
      int[] result = new int[size];
      if (size > 0) {
        int resultPos = 0;
        for (int i = 0; i < arrays.size(); i++) {
          int[] srcArray = arrays.get(i);
          int intsToCopy = (i < (arrays.size() - 1)) ? srcArray.length : indexForNextAddInCurrentAddArray;
          System.arraycopy(srcArray, 0, result, resultPos, intsToCopy);
          resultPos += intsToCopy;
        }
        assert resultPos == size;
      }
      return result;
    }

    public int size() {
      return size;
    }
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy