All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.benchmark.quality.trec.QueryDriver Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.quality.trec;


import org.apache.lucene.benchmark.quality.utils.SimpleQQParser;
import org.apache.lucene.benchmark.quality.utils.SubmissionReport;
import org.apache.lucene.benchmark.quality.*;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.store.FSDirectory;

import java.io.OutputStreamWriter;
import java.io.PrintWriter;
import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;
import java.nio.file.Files;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.HashSet;
import java.util.Set;


/**
 * Command-line tool for doing a TREC evaluation run.
 **/
public class QueryDriver {
  public static void main(String[] args) throws Exception {
    if (args.length < 4 || args.length > 5) {
      System.err.println("Usage: QueryDriver     [querySpec]");
      System.err.println("topicsFile: input file containing queries");
      System.err.println("qrelsFile: input file containing relevance judgements");
      System.err.println("submissionFile: output submission file for trec_eval");
      System.err.println("indexDir: index directory");
      System.err.println("querySpec: string composed of fields to use in query consisting of T=title,D=description,N=narrative:");
      System.err.println("\texample: TD (query on Title + Description). The default is T (title only)");
      System.exit(1);
    }
    
    Path topicsFile = Paths.get(args[0]);
    Path qrelsFile = Paths.get(args[1]);
    Path submissionFile = Paths.get(args[2]);
    SubmissionReport submitLog = new SubmissionReport(new PrintWriter(Files.newBufferedWriter(submissionFile, StandardCharsets.UTF_8)), "lucene");
    FSDirectory dir = FSDirectory.open(Paths.get(args[3]));
    String fieldSpec = args.length == 5 ? args[4] : "T"; // default to Title-only if not specified.
    IndexReader reader = DirectoryReader.open(dir);
    IndexSearcher searcher = new IndexSearcher(reader);

    int maxResults = 1000;
    String docNameField = "docname";

    PrintWriter logger = new PrintWriter(new OutputStreamWriter(System.out, Charset.defaultCharset()), true);

    // use trec utilities to read trec topics into quality queries
    TrecTopicsReader qReader = new TrecTopicsReader();
    QualityQuery qqs[] = qReader.readQueries(Files.newBufferedReader(topicsFile, StandardCharsets.UTF_8));

    // prepare judge, with trec utilities that read from a QRels file
    Judge judge = new TrecJudge(Files.newBufferedReader(qrelsFile, StandardCharsets.UTF_8));

    // validate topics & judgments match each other
    judge.validateData(qqs, logger);

    Set fieldSet = new HashSet<>();
    if (fieldSpec.indexOf('T') >= 0) fieldSet.add("title");
    if (fieldSpec.indexOf('D') >= 0) fieldSet.add("description");
    if (fieldSpec.indexOf('N') >= 0) fieldSet.add("narrative");
    
    // set the parsing of quality queries into Lucene queries.
    QualityQueryParser qqParser = new SimpleQQParser(fieldSet.toArray(new String[0]), "body");

    // run the benchmark
    QualityBenchmark qrun = new QualityBenchmark(qqs, qqParser, searcher, docNameField);
    qrun.setMaxResults(maxResults);
    QualityStats stats[] = qrun.execute(judge, submitLog, logger);

    // print an avarage sum of the results
    QualityStats avg = QualityStats.average(stats);
    avg.log("SUMMARY", 2, logger, "  ");
    reader.close();
    dir.close();
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy