All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.benchmark.quality.trec.TrecTopicsReader Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.benchmark.quality.trec;

import java.io.BufferedReader;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;

import org.apache.lucene.benchmark.quality.QualityQuery;

/**
 * Read TREC topics.
 * 

* Expects this topic format - *

 *   <top>
 *   <num> Number: nnn
 *     
 *   <title> title of the topic
 *     
 *   <desc> Description:
 *   description of the topic
 *     
 *   <narr> Narrative:
 *   "story" composed by assessors.
 *    
 *   </top>
 * 
* Comment lines starting with '#' are ignored. */ public class TrecTopicsReader { private static final String newline = System.getProperty("line.separator"); /** * Constructor for Trec's TopicsReader */ public TrecTopicsReader() { super(); } /** * Read quality queries from trec format topics file. * @param reader where queries are read from. * @return the result quality queries. * @throws IOException if cannot read the queries. */ public QualityQuery[] readQueries(BufferedReader reader) throws IOException { ArrayList res = new ArrayList<>(); StringBuilder sb; try { while (null!=(sb=read(reader,"",null,false,false))) { HashMap fields = new HashMap<>(); // id sb = read(reader,"",null,true,false); int k = sb.indexOf(":"); String id = sb.substring(k+1).trim(); // title sb = read(reader,"",null,true,false); k = sb.indexOf(">"); String title = sb.substring(k+1).trim(); // description read(reader,"<desc>",null,false,false); sb.setLength(0); String line = null; while ((line = reader.readLine()) != null) { if (line.startsWith("<narr>")) break; if (sb.length() > 0) sb.append(' '); sb.append(line); } String description = sb.toString().trim(); // narrative sb.setLength(0); while ((line = reader.readLine()) != null) { if (line.startsWith("</top>")) break; if (sb.length() > 0) sb.append(' '); sb.append(line); } String narrative = sb.toString().trim(); // we got a topic! fields.put("title",title); fields.put("description",description); fields.put("narrative", narrative); QualityQuery topic = new QualityQuery(id,fields); res.add(topic); } } finally { reader.close(); } // sort result array (by ID) QualityQuery qq[] = res.toArray(new QualityQuery[0]); Arrays.sort(qq); return qq; } // read until finding a line that starts with the specified prefix private StringBuilder read (BufferedReader reader, String prefix, StringBuilder sb, boolean collectMatchLine, boolean collectAll) throws IOException { sb = (sb==null ? new StringBuilder() : sb); String sep = ""; while (true) { String line = reader.readLine(); if (line==null) { return null; } if (line.startsWith(prefix)) { if (collectMatchLine) { sb.append(sep).append(line); sep = newline; } break; } if (collectAll) { sb.append(sep).append(line); sep = newline; } } //System.out.println("read: "+sb); return sb; } } </code></pre> <br/> <br/> <!--<div id="right-banner">--> <!--</div>--> <!--<div id="left-banner">--> <!--</div>--> <div class='clear'></div> </main> </div> <br/><br/> <div class="align-center">© 2015 - 2024 <a href="/legal-notice.php">Weber Informatics LLC</a> | <a href="/data-protection.php">Privacy Policy</a></div> <br/><br/><br/><br/><br/><br/> </body> </html>