org.apache.lucene.benchmark.quality.QualityStats Maven / Gradle / Ivy
/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ package org.apache.lucene.benchmark.quality; import java.io.PrintWriter; import java.text.NumberFormat; import java.util.ArrayList; import java.util.Locale; /** * Results of quality benchmark run for a single query or for a set of queries. */ public class QualityStats { /** Number of points for which precision is computed. */ public static final int MAX_POINTS = 20; private double maxGoodPoints; private double recall; private double pAt[]; private double pReleventSum = 0; private double numPoints = 0; private double numGoodPoints = 0; private double mrr = 0; private long searchTime; private long docNamesExtractTime; /** * A certain rank in which a relevant doc was found. */ public static class RecallPoint { private int rank; private double recall; private RecallPoint(int rank, double recall) { this.rank = rank; this.recall = recall; } /** Returns the rank: where on the list of returned docs this relevant doc appeared. */ public int getRank() { return rank; } /** Returns the recall: how many relevant docs were returned up to this point, inclusive. */ public double getRecall() { return recall; } } private ArrayList
hits}| /recallPoints; /** * Construct a QualityStats object with anticipated maximal number of relevant hits. * @param maxGoodPoints maximal possible relevant hits. */ public QualityStats(double maxGoodPoints, long searchTime) { this.maxGoodPoints = maxGoodPoints; this.searchTime = searchTime; this.recallPoints = new ArrayList<>(); pAt = new double[MAX_POINTS+1]; // pAt[0] unused. } /** * Add a (possibly relevant) doc. * @param n rank of the added doc (its ordinal position within the query results). * @param isRelevant true if the added doc is relevant, false otherwise. */ public void addResult(int n, boolean isRelevant, long docNameExtractTime) { if (Math.abs(numPoints+1 - n) > 1E-6) { throw new IllegalArgumentException("point "+n+" illegal after "+numPoints+" points!"); } if (isRelevant) { numGoodPoints+=1; recallPoints.add(new RecallPoint(n,numGoodPoints)); if (recallPoints.size()==1 && n<=5) { // first point, but only within 5 top scores. mrr = 1.0 / n; } } numPoints = n; double p = numGoodPoints / numPoints; if (isRelevant) { pReleventSum += p; } if (n n n
. * @param n requested precision point, must be at least 1 and at most {@link #MAX_POINTS}. */ public double getPrecisionAt(int n) { if (n<1 || n>MAX_POINTS) { throw new IllegalArgumentException("n="+n+" - but it must be in [1,"+MAX_POINTS+"] range!"); } if (n>numPoints) { return (numPoints * pAt[(int)numPoints])/n; } return pAt[n]; } /** * Return the average precision at recall points. */ public double getAvp() { return maxGoodPoints==0 ? 0 : pReleventSum/maxGoodPoints; } /** * Return the recall: |{relevant hits found}| / |{relevant hits existing}|. */ public double getRecall() { return recall; } /** * Log information on this QualityStats object. * @param logger Logger. * @param prefix prefix before each log line. */ public void log(String title, int paddLines, PrintWriter logger, String prefix) { for (int i=0; i0) { logger.println(title); } prefix = prefix==null ? "" : prefix; NumberFormat nf = NumberFormat.getInstance(Locale.ROOT); nf.setMaximumFractionDigits(3); nf.setMinimumFractionDigits(3); nf.setGroupingUsed(true); int M = 19; logger.println(prefix+format("Search Seconds: ",M)+ fracFormat(nf.format((double)searchTime/1000))); logger.println(prefix+format("DocName Seconds: ",M)+ fracFormat(nf.format((double)docNamesExtractTime/1000))); logger.println(prefix+format("Num Points: ",M)+ fracFormat(nf.format(numPoints))); logger.println(prefix+format("Num Good Points: ",M)+ fracFormat(nf.format(numGoodPoints))); logger.println(prefix+format("Max Good Points: ",M)+ fracFormat(nf.format(maxGoodPoints))); logger.println(prefix+format("Average Precision: ",M)+ fracFormat(nf.format(getAvp()))); logger.println(prefix+format("MRR: ",M)+ fracFormat(nf.format(getMRR()))); logger.println(prefix+format("Recall: ",M)+ fracFormat(nf.format(getRecall()))); for (int i=1; i<(int)numPoints && i 0) { m++; avg.numGoodPoints += stats[i].numGoodPoints; avg.numPoints += stats[i].numPoints; avg.pReleventSum += stats[i].getAvp(); avg.recall += stats[i].recall; avg.mrr += stats[i].getMRR(); avg.maxGoodPoints += stats[i].maxGoodPoints; for (int j=1; j 0 : "Fishy: no \"good\" queries!"; // take average: times go by all queries, other measures go by "good" queries only. avg.searchTime /= stats.length; avg.docNamesExtractTime /= stats.length; avg.numGoodPoints /= m; avg.numPoints /= m; avg.recall /= m; avg.mrr /= m; avg.maxGoodPoints /= m; for (int j=1; j * Reciprocal rank is defined as 1/r
wherer
is the * rank of the first correct result, or0
if there are no correct * results within the top 5 results. ** This follows the definition in * * Question Answering - CNLP at the TREC-10 Question Answering Track. */ public double getMRR() { return mrr; } /** * Returns the search time in milliseconds for the measured query. */ public long getSearchTime() { return searchTime; } }
© 2015 - 2025 Weber Informatics LLC | Privacy Policy