org.apache.lucene.search.CheckHits Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-test-framework Show documentation
Apache Lucene (module: test-framework)
There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.Random;

import junit.framework.Assert;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.LuceneTestCase;

/**
 * Utility class for asserting expected hits in tests.
 */
public class CheckHits {
  
  /**
   * Some explains methods calculate their values though a slightly
   * different  order of operations from the actual scoring method ...
   * this allows for a small amount of relative variation
   */
  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.001f;
  
  /**
   * In general we use a relative epsilon, but some tests do crazy things
   * like boost documents with 0, creating tiny tiny scores where the
   * relative difference is large but the absolute difference is tiny.
   * we ensure the the epsilon is always at least this big.
   */
  public static float EXPLAIN_SCORE_TOLERANCE_MINIMUM = 1e-6f;
    
  /**
   * Tests that all documents up to maxDoc which are *not* in the
   * expected result set, have an explanation which indicates that 
   * the document does not match
   */
  public static void checkNoMatchExplanations(Query q, String defaultFieldName,
                                              IndexSearcher searcher, int[] results)
    throws IOException {

    String d = q.toString(defaultFieldName);
    Set ignore = new TreeSet<>();
    for (int i = 0; i < results.length; i++) {
      ignore.add(Integer.valueOf(results[i]));
    }
    
    int maxDoc = searcher.getIndexReader().maxDoc();
    for (int doc = 0; doc < maxDoc; doc++) {
      if (ignore.contains(Integer.valueOf(doc))) continue;

      Explanation exp = searcher.explain(q, doc);
      Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
                             exp);
      Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
                         " doesn't indicate non-match: " + exp.toString(),
                         exp.isMatch());
    }
    
  }
  
  /**
   * Tests that a query matches the an expected set of documents using a
   * HitCollector.
   *
   * 
   * Note that when using the HitCollector API, documents will be collected
   * if they "match" regardless of what their score is.
   * 
   * @param query the query to test
   * @param searcher the searcher to test the query against
   * @param defaultFieldName used for displaying the query in assertion messages
   * @param results a list of documentIds that must match the query
   * @see #checkHits
   */
  public static void checkHitCollector(Random random, Query query, String defaultFieldName,
                                       IndexSearcher searcher, int[] results)
    throws IOException {

    QueryUtils.check(random,query,searcher);
    
    Set correct = new TreeSet<>();
    for (int i = 0; i < results.length; i++) {
      correct.add(Integer.valueOf(results[i]));
    }
    final Set actual = new TreeSet<>();
    final Collector c = new SetCollector(actual);

    searcher.search(query, c);
    Assert.assertEquals("Simple: " + query.toString(defaultFieldName), 
                        correct, actual);

    for (int i = -1; i < 2; i++) {
      actual.clear();
      IndexSearcher s = QueryUtils.wrapUnderlyingReader
        (random, searcher, i);
      s.search(query, c);
      Assert.assertEquals("Wrap Reader " + i + ": " +
                          query.toString(defaultFieldName),
                          correct, actual);
    }
  }

  /**
   * Just collects document ids into a set.
   */
  public static class SetCollector extends SimpleCollector {
    final Set bag;
    public SetCollector(Set bag) {
      this.bag = bag;
    }
    private int base = 0;
    @Override
    public void setScorer(Scorer scorer) throws IOException {}
    @Override
    public void collect(int doc) {
      bag.add(Integer.valueOf(doc + base));
    }
    @Override
    protected void doSetNextReader(LeafReaderContext context) throws IOException {
      base = context.docBase;
    }
    
    @Override
    public boolean needsScores() {
      return false;
    }
  }

  /**
   * Tests that a query matches the an expected set of documents using Hits.
   *
   * 
   * Note that when using the Hits API, documents will only be returned
   * if they have a positive normalized score.
   * 
   * @param query the query to test
   * @param searcher the searcher to test the query against
   * @param defaultFieldName used for displaing the query in assertion messages
   * @param results a list of documentIds that must match the query
   * @see #checkHitCollector
   */
  public static void checkHits(
        Random random,
        Query query,
        String defaultFieldName,
        IndexSearcher searcher,
        int[] results)
          throws IOException {

    ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;

    Set correct = new TreeSet<>();
    for (int i = 0; i < results.length; i++) {
      correct.add(Integer.valueOf(results[i]));
    }

    Set actual = new TreeSet<>();
    for (int i = 0; i < hits.length; i++) {
      actual.add(Integer.valueOf(hits[i].doc));
    }

    Assert.assertEquals(query.toString(defaultFieldName), correct, actual);

    QueryUtils.check(random, query,searcher, LuceneTestCase.rarely(random));
  }

  /** Tests that a Hits has an expected order of documents */
  public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits) {
    Assert.assertEquals(mes + " nr of hits", hits.length, results.length);
    for (int i = 0; i < results.length; i++) {
      Assert.assertEquals(mes + " doc nrs for hit " + i, results[i], hits[i].doc);
    }
  }

  /** Tests that two queries have an expected order of documents,
   * and that the two queries have the same score values.
   */
  public static void checkHitsQuery(
        Query query,
        ScoreDoc[] hits1,
        ScoreDoc[] hits2,
        int[] results) {

    checkDocIds("hits1", results, hits1);
    checkDocIds("hits2", results, hits2);
    checkEqual(query, hits1, hits2);
  }

  public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) {
     final float scoreTolerance = 1.0e-6f;
     if (hits1.length != hits2.length) {
       Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length);
     }
    for (int i = 0; i < hits1.length; i++) {
      if (hits1[i].doc != hits2[i].doc) {
        Assert.fail("Hit " + i + " docnumbers don't match\n"
                + hits2str(hits1, hits2,0,0)
                + "for query:" + query.toString());
      }

      if ((hits1[i].doc != hits2[i].doc)
          || Math.abs(hits1[i].score -  hits2[i].score) > scoreTolerance)
      {
        Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc
                      + "\nunequal       : " + hits1[i].score
                      + "\n           and: " + hits2[i].score
                      + "\nfor query:" + query.toString());
      }
    }
  }

  public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) {
    StringBuilder sb = new StringBuilder();
    int len1=hits1==null ? 0 : hits1.length;
    int len2=hits2==null ? 0 : hits2.length;
    if (end<=0) {
      end = Math.max(len1,len2);
    }

      sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2);

    sb.append('\n');
    for (int i=start; i 0) {
      if (detail.length==1) {
        // simple containment, unless it's a freq of: (which lets a query explain how the freq is calculated), 
        // just verify contained expl has same score
        if (!expl.getDescription().endsWith("with freq of:"))
          verifyExplanation(q,doc,score,deep,detail[0]);
      } else {
        // explanation must either:
        // - end with one of: "product of:", "sum of:", "max of:", or
        // - have "max plus  times others" (where  is float).
        float x = 0;
        String descr = expl.getDescription().toLowerCase(Locale.ROOT);
        boolean productOf = descr.endsWith("product of:");
        boolean sumOf = descr.endsWith("sum of:");
        boolean maxOf = descr.endsWith("max of:");
        boolean maxTimesOthers = false;
        if (!(productOf || sumOf || maxOf)) {
          // maybe 'max plus x times others'
          int k1 = descr.indexOf("max plus ");
          if (k1>=0) {
            k1 += "max plus ".length();
            int k2 = descr.indexOf(" ",k1);
            try {
              x = Float.parseFloat(descr.substring(k1,k2).trim());
              if (descr.substring(k2).trim().equals("times others of:")) {
                maxTimesOthers = true;
              }
            } catch (NumberFormatException e) {
            }
          }
        }
        // TODO: this is a TERRIBLE assertion!!!!
        Assert.assertTrue(
            q+": multi valued explanation description=\""+descr
            +"\" must be 'max of plus x times others' or end with 'product of'"
            +" or 'sum of:' or 'max of:' - "+expl,
            productOf || sumOf || maxOf || maxTimesOthers);
        float sum = 0;
        float product = 1;
        float max = 0;
        for (int i=0; i