All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.benchmark.quality.trec.TrecJudge Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.quality.trec;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.PrintWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.StringTokenizer;

import org.apache.lucene.benchmark.quality.Judge;
import org.apache.lucene.benchmark.quality.QualityQuery;

/**
 * Judge if given document is relevant to given quality query, based on Trec format for judgements.
 */
public class TrecJudge implements Judge {

  HashMap judgements;
  
  /**
   * Constructor from a reader.
   * 

* Expected input format: *

   *     qnum  0   doc-name     is-relevant
   * 
* Two sample lines: *
 
   *     19    0   doc303       1
   *     19    0   doc7295      0
   * 
* @param reader where judgments are read from. * @throws IOException If there is a low-level I/O error. */ public TrecJudge (BufferedReader reader) throws IOException { judgements = new HashMap<>(); QRelJudgement curr = null; String zero = "0"; String line; try { while (null!=(line=reader.readLine())) { line = line.trim(); if (line.length()==0 || '#'==line.charAt(0)) { continue; } StringTokenizer st = new StringTokenizer(line); String queryID = st.nextToken(); st.nextToken(); String docName = st.nextToken(); boolean relevant = !zero.equals(st.nextToken()); assert !st.hasMoreTokens() : "wrong format: "+line+" next: "+st.nextToken(); if (relevant) { // only keep relevant docs if (curr==null || !curr.queryID.equals(queryID)) { curr = judgements.get(queryID); if (curr==null) { curr = new QRelJudgement(queryID); judgements.put(queryID,curr); } } curr.addRelevandDoc(docName); } } } finally { reader.close(); } } // inherit javadocs @Override public boolean isRelevant(String docName, QualityQuery query) { QRelJudgement qrj = judgements.get(query.getQueryID()); return qrj!=null && qrj.isRelevant(docName); } /** single Judgement of a trec quality query */ private static class QRelJudgement { private String queryID; private HashMap relevantDocs; QRelJudgement(String queryID) { this.queryID = queryID; relevantDocs = new HashMap<>(); } public void addRelevandDoc(String docName) { relevantDocs.put(docName,docName); } boolean isRelevant(String docName) { return relevantDocs.containsKey(docName); } public int maxRecall() { return relevantDocs.size(); } } // inherit javadocs @Override public boolean validateData(QualityQuery[] qq, PrintWriter logger) { HashMap missingQueries = new HashMap<>(judgements); ArrayList missingJudgements = new ArrayList<>(); for (int i=0; i0) { isValid = false; if (logger!=null) { logger.println("WARNING: "+missingJudgements.size()+" queries have no judgments! - "); for (int i=0; i0) { isValid = false; if (logger!=null) { logger.println("WARNING: "+missingQueries.size()+" judgments match no query! - "); for (final String id : missingQueries.keySet()) { logger.println(" "+id); } } } return isValid; } // inherit javadocs @Override public int maxRecall(QualityQuery query) { QRelJudgement qrj = judgements.get(query.getQueryID()); if (qrj!=null) { return qrj.maxRecall(); } return 0; } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy