All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.CheckHits Maven / Gradle / Ivy

There is a newer version: 10.1.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.Random;

import junit.framework.Assert;

import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.LuceneTestCase;

/**
 * Utility class for asserting expected hits in tests.
 */
public class CheckHits {
  
  /**
   * Some explains methods calculate their values though a slightly
   * different  order of operations from the actual scoring method ...
   * this allows for a small amount of relative variation
   */
  public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.001f;
  
  /**
   * In general we use a relative epsilon, but some tests do crazy things
   * like boost documents with 0, creating tiny tiny scores where the
   * relative difference is large but the absolute difference is tiny.
   * we ensure the the epsilon is always at least this big.
   */
  public static float EXPLAIN_SCORE_TOLERANCE_MINIMUM = 1e-6f;
    
  /**
   * Tests that all documents up to maxDoc which are *not* in the
   * expected result set, have an explanation which indicates that 
   * the document does not match
   */
  public static void checkNoMatchExplanations(Query q, String defaultFieldName,
                                              IndexSearcher searcher, int[] results)
    throws IOException {

    String d = q.toString(defaultFieldName);
    Set ignore = new TreeSet<>();
    for (int i = 0; i < results.length; i++) {
      ignore.add(Integer.valueOf(results[i]));
    }
    
    int maxDoc = searcher.getIndexReader().maxDoc();
    for (int doc = 0; doc < maxDoc; doc++) {
      if (ignore.contains(Integer.valueOf(doc))) continue;

      Explanation exp = searcher.explain(q, doc);
      Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
                             exp);
      Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
                         " doesn't indicate non-match: " + exp.toString(),
                         exp.isMatch());
    }
    
  }
  
  /**
   * Tests that a query matches the an expected set of documents using a
   * HitCollector.
   *
   * 

* Note that when using the HitCollector API, documents will be collected * if they "match" regardless of what their score is. *

* @param query the query to test * @param searcher the searcher to test the query against * @param defaultFieldName used for displaying the query in assertion messages * @param results a list of documentIds that must match the query * @see #checkHits */ public static void checkHitCollector(Random random, Query query, String defaultFieldName, IndexSearcher searcher, int[] results) throws IOException { QueryUtils.check(random,query,searcher); Set correct = new TreeSet<>(); for (int i = 0; i < results.length; i++) { correct.add(Integer.valueOf(results[i])); } final Set actual = new TreeSet<>(); final Collector c = new SetCollector(actual); searcher.search(query, c); Assert.assertEquals("Simple: " + query.toString(defaultFieldName), correct, actual); for (int i = -1; i < 2; i++) { actual.clear(); IndexSearcher s = QueryUtils.wrapUnderlyingReader (random, searcher, i); s.search(query, c); Assert.assertEquals("Wrap Reader " + i + ": " + query.toString(defaultFieldName), correct, actual); } } /** * Just collects document ids into a set. */ public static class SetCollector extends SimpleCollector { final Set bag; public SetCollector(Set bag) { this.bag = bag; } private int base = 0; @Override public void setScorer(Scorer scorer) throws IOException {} @Override public void collect(int doc) { bag.add(Integer.valueOf(doc + base)); } @Override protected void doSetNextReader(LeafReaderContext context) throws IOException { base = context.docBase; } @Override public boolean needsScores() { return false; } } /** * Tests that a query matches the an expected set of documents using Hits. * *

* Note that when using the Hits API, documents will only be returned * if they have a positive normalized score. *

* @param query the query to test * @param searcher the searcher to test the query against * @param defaultFieldName used for displaing the query in assertion messages * @param results a list of documentIds that must match the query * @see #checkHitCollector */ public static void checkHits( Random random, Query query, String defaultFieldName, IndexSearcher searcher, int[] results) throws IOException { ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs; Set correct = new TreeSet<>(); for (int i = 0; i < results.length; i++) { correct.add(Integer.valueOf(results[i])); } Set actual = new TreeSet<>(); for (int i = 0; i < hits.length; i++) { actual.add(Integer.valueOf(hits[i].doc)); } Assert.assertEquals(query.toString(defaultFieldName), correct, actual); QueryUtils.check(random, query,searcher, LuceneTestCase.rarely(random)); } /** Tests that a Hits has an expected order of documents */ public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits) { Assert.assertEquals(mes + " nr of hits", hits.length, results.length); for (int i = 0; i < results.length; i++) { Assert.assertEquals(mes + " doc nrs for hit " + i, results[i], hits[i].doc); } } /** Tests that two queries have an expected order of documents, * and that the two queries have the same score values. */ public static void checkHitsQuery( Query query, ScoreDoc[] hits1, ScoreDoc[] hits2, int[] results) { checkDocIds("hits1", results, hits1); checkDocIds("hits2", results, hits2); checkEqual(query, hits1, hits2); } public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) { final float scoreTolerance = 1.0e-6f; if (hits1.length != hits2.length) { Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length); } for (int i = 0; i < hits1.length; i++) { if (hits1[i].doc != hits2[i].doc) { Assert.fail("Hit " + i + " docnumbers don't match\n" + hits2str(hits1, hits2,0,0) + "for query:" + query.toString()); } if ((hits1[i].doc != hits2[i].doc) || Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance) { Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc + "\nunequal : " + hits1[i].score + "\n and: " + hits2[i].score + "\nfor query:" + query.toString()); } } } public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) { StringBuilder sb = new StringBuilder(); int len1=hits1==null ? 0 : hits1.length; int len2=hits2==null ? 0 : hits2.length; if (end<=0) { end = Math.max(len1,len2); } sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2); sb.append('\n'); for (int i=start; i 0) { if (detail.length==1) { // simple containment, unless it's a freq of: (which lets a query explain how the freq is calculated), // just verify contained expl has same score if (!expl.getDescription().endsWith("with freq of:")) verifyExplanation(q,doc,score,deep,detail[0]); } else { // explanation must either: // - end with one of: "product of:", "sum of:", "max of:", or // - have "max plus times others" (where is float). float x = 0; String descr = expl.getDescription().toLowerCase(Locale.ROOT); boolean productOf = descr.endsWith("product of:"); boolean sumOf = descr.endsWith("sum of:"); boolean maxOf = descr.endsWith("max of:"); boolean maxTimesOthers = false; if (!(productOf || sumOf || maxOf)) { // maybe 'max plus x times others' int k1 = descr.indexOf("max plus "); if (k1>=0) { k1 += "max plus ".length(); int k2 = descr.indexOf(" ",k1); try { x = Float.parseFloat(descr.substring(k1,k2).trim()); if (descr.substring(k2).trim().equals("times others of:")) { maxTimesOthers = true; } } catch (NumberFormatException e) { } } } // TODO: this is a TERRIBLE assertion!!!! Assert.assertTrue( q+": multi valued explanation description=\""+descr +"\" must be 'max of plus x times others' or end with 'product of'" +" or 'sum of:' or 'max of:' - "+expl, productOf || sumOf || maxOf || maxTimesOthers); float sum = 0; float product = 1; float max = 0; for (int i=0; i




© 2015 - 2025 Weber Informatics LLC | Privacy Policy