org.apache.lucene.search.CheckHits Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-test-framework Show documentation
Show all versions of lucene-test-framework Show documentation
Apache Lucene (module: test-framework)
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search;
import java.io.IOException;
import java.util.Locale;
import java.util.Set;
import java.util.TreeSet;
import java.util.Random;
import junit.framework.Assert;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.util.LuceneTestCase;
/**
* Utility class for asserting expected hits in tests.
*/
public class CheckHits {
/**
* Some explains methods calculate their values though a slightly
* different order of operations from the actual scoring method ...
* this allows for a small amount of relative variation
*/
public static float EXPLAIN_SCORE_TOLERANCE_DELTA = 0.001f;
/**
* In general we use a relative epsilon, but some tests do crazy things
* like boost documents with 0, creating tiny tiny scores where the
* relative difference is large but the absolute difference is tiny.
* we ensure the the epsilon is always at least this big.
*/
public static float EXPLAIN_SCORE_TOLERANCE_MINIMUM = 1e-6f;
/**
* Tests that all documents up to maxDoc which are *not* in the
* expected result set, have an explanation which indicates that
* the document does not match
*/
public static void checkNoMatchExplanations(Query q, String defaultFieldName,
IndexSearcher searcher, int[] results)
throws IOException {
String d = q.toString(defaultFieldName);
Set ignore = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
ignore.add(Integer.valueOf(results[i]));
}
int maxDoc = searcher.getIndexReader().maxDoc();
for (int doc = 0; doc < maxDoc; doc++) {
if (ignore.contains(Integer.valueOf(doc))) continue;
Explanation exp = searcher.explain(q, doc);
Assert.assertNotNull("Explanation of [["+d+"]] for #"+doc+" is null",
exp);
Assert.assertFalse("Explanation of [["+d+"]] for #"+doc+
" doesn't indicate non-match: " + exp.toString(),
exp.isMatch());
}
}
/**
* Tests that a query matches the an expected set of documents using a
* HitCollector.
*
*
* Note that when using the HitCollector API, documents will be collected
* if they "match" regardless of what their score is.
*
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaying the query in assertion messages
* @param results a list of documentIds that must match the query
* @see #checkHits
*/
public static void checkHitCollector(Random random, Query query, String defaultFieldName,
IndexSearcher searcher, int[] results)
throws IOException {
QueryUtils.check(random,query,searcher);
Set correct = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
}
final Set actual = new TreeSet<>();
final Collector c = new SetCollector(actual);
searcher.search(query, c);
Assert.assertEquals("Simple: " + query.toString(defaultFieldName),
correct, actual);
for (int i = -1; i < 2; i++) {
actual.clear();
IndexSearcher s = QueryUtils.wrapUnderlyingReader
(random, searcher, i);
s.search(query, c);
Assert.assertEquals("Wrap Reader " + i + ": " +
query.toString(defaultFieldName),
correct, actual);
}
}
/**
* Just collects document ids into a set.
*/
public static class SetCollector extends SimpleCollector {
final Set bag;
public SetCollector(Set bag) {
this.bag = bag;
}
private int base = 0;
@Override
public void setScorer(Scorer scorer) throws IOException {}
@Override
public void collect(int doc) {
bag.add(Integer.valueOf(doc + base));
}
@Override
protected void doSetNextReader(LeafReaderContext context) throws IOException {
base = context.docBase;
}
@Override
public boolean needsScores() {
return false;
}
}
/**
* Tests that a query matches the an expected set of documents using Hits.
*
*
* Note that when using the Hits API, documents will only be returned
* if they have a positive normalized score.
*
* @param query the query to test
* @param searcher the searcher to test the query against
* @param defaultFieldName used for displaing the query in assertion messages
* @param results a list of documentIds that must match the query
* @see #checkHitCollector
*/
public static void checkHits(
Random random,
Query query,
String defaultFieldName,
IndexSearcher searcher,
int[] results)
throws IOException {
ScoreDoc[] hits = searcher.search(query, 1000).scoreDocs;
Set correct = new TreeSet<>();
for (int i = 0; i < results.length; i++) {
correct.add(Integer.valueOf(results[i]));
}
Set actual = new TreeSet<>();
for (int i = 0; i < hits.length; i++) {
actual.add(Integer.valueOf(hits[i].doc));
}
Assert.assertEquals(query.toString(defaultFieldName), correct, actual);
QueryUtils.check(random, query,searcher, LuceneTestCase.rarely(random));
}
/** Tests that a Hits has an expected order of documents */
public static void checkDocIds(String mes, int[] results, ScoreDoc[] hits) {
Assert.assertEquals(mes + " nr of hits", hits.length, results.length);
for (int i = 0; i < results.length; i++) {
Assert.assertEquals(mes + " doc nrs for hit " + i, results[i], hits[i].doc);
}
}
/** Tests that two queries have an expected order of documents,
* and that the two queries have the same score values.
*/
public static void checkHitsQuery(
Query query,
ScoreDoc[] hits1,
ScoreDoc[] hits2,
int[] results) {
checkDocIds("hits1", results, hits1);
checkDocIds("hits2", results, hits2);
checkEqual(query, hits1, hits2);
}
public static void checkEqual(Query query, ScoreDoc[] hits1, ScoreDoc[] hits2) {
final float scoreTolerance = 1.0e-6f;
if (hits1.length != hits2.length) {
Assert.fail("Unequal lengths: hits1="+hits1.length+",hits2="+hits2.length);
}
for (int i = 0; i < hits1.length; i++) {
if (hits1[i].doc != hits2[i].doc) {
Assert.fail("Hit " + i + " docnumbers don't match\n"
+ hits2str(hits1, hits2,0,0)
+ "for query:" + query.toString());
}
if ((hits1[i].doc != hits2[i].doc)
|| Math.abs(hits1[i].score - hits2[i].score) > scoreTolerance)
{
Assert.fail("Hit " + i + ", doc nrs " + hits1[i].doc + " and " + hits2[i].doc
+ "\nunequal : " + hits1[i].score
+ "\n and: " + hits2[i].score
+ "\nfor query:" + query.toString());
}
}
}
public static String hits2str(ScoreDoc[] hits1, ScoreDoc[] hits2, int start, int end) {
StringBuilder sb = new StringBuilder();
int len1=hits1==null ? 0 : hits1.length;
int len2=hits2==null ? 0 : hits2.length;
if (end<=0) {
end = Math.max(len1,len2);
}
sb.append("Hits length1=").append(len1).append("\tlength2=").append(len2);
sb.append('\n');
for (int i=start; i 0) {
if (detail.length==1) {
// simple containment, unless it's a freq of: (which lets a query explain how the freq is calculated),
// just verify contained expl has same score
if (!expl.getDescription().endsWith("with freq of:"))
verifyExplanation(q,doc,score,deep,detail[0]);
} else {
// explanation must either:
// - end with one of: "product of:", "sum of:", "max of:", or
// - have "max plus times others" (where is float).
float x = 0;
String descr = expl.getDescription().toLowerCase(Locale.ROOT);
boolean productOf = descr.endsWith("product of:");
boolean sumOf = descr.endsWith("sum of:");
boolean maxOf = descr.endsWith("max of:");
boolean maxTimesOthers = false;
if (!(productOf || sumOf || maxOf)) {
// maybe 'max plus x times others'
int k1 = descr.indexOf("max plus ");
if (k1>=0) {
k1 += "max plus ".length();
int k2 = descr.indexOf(" ",k1);
try {
x = Float.parseFloat(descr.substring(k1,k2).trim());
if (descr.substring(k2).trim().equals("times others of:")) {
maxTimesOthers = true;
}
} catch (NumberFormatException e) {
}
}
}
// TODO: this is a TERRIBLE assertion!!!!
Assert.assertTrue(
q+": multi valued explanation description=\""+descr
+"\" must be 'max of plus x times others' or end with 'product of'"
+" or 'sum of:' or 'max of:' - "+expl,
productOf || sumOf || maxOf || maxTimesOthers);
float sum = 0;
float product = 1;
float max = 0;
for (int i=0; i
© 2015 - 2025 Weber Informatics LLC | Privacy Policy