All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.benchmark.byTask.tasks.ReadTask Maven / Gradle / Ivy

There is a newer version: 9.11.1
Show newest version
package org.apache.lucene.benchmark.byTask.tasks;

/**
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import java.io.IOException;
import java.util.Collection;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Set;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.benchmark.byTask.PerfRunData;
import org.apache.lucene.benchmark.byTask.feeds.QueryMaker;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Fieldable;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.TopFieldCollector;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.Sort;
import org.apache.lucene.search.highlight.Highlighter;
import org.apache.lucene.search.highlight.InvalidTokenOffsetsException;
import org.apache.lucene.store.Directory;


/**
 * Read index (abstract) task.
 * Sub classes implement withSearch(), withWarm(), withTraverse() and withRetrieve()
 * methods to configure the actual action.
 * 

*

Note: All ReadTasks reuse the reader if it is already open. * Otherwise a reader is opened at start and closed at the end. *

* The search.num.hits config parameter sets * the top number of hits to collect during searching. *

Other side effects: none. */ public abstract class ReadTask extends PerfTask { public ReadTask(PerfRunData runData) { super(runData); } public int doLogic() throws Exception { int res = 0; boolean closeReader = false; // open reader or use existing one IndexReader ir = getRunData().getIndexReader(); if (ir == null) { Directory dir = getRunData().getDirectory(); ir = IndexReader.open(dir); closeReader = true; //res++; //this is confusing, comment it out } // optionally warm and add num docs traversed to count if (withWarm()) { Document doc = null; for (int m = 0; m < ir.maxDoc(); m++) { if (!ir.isDeleted(m)) { doc = ir.document(m); res += (doc == null ? 0 : 1); } } } if (withSearch()) { res++; final IndexSearcher searcher; if (closeReader) { searcher = new IndexSearcher(ir); } else { searcher = getRunData().getIndexSearcher(); } QueryMaker queryMaker = getQueryMaker(); Query q = queryMaker.makeQuery(); Sort sort = getSort(); TopDocs hits; final int numHits = numHits(); if (numHits > 0) { if (sort != null) { // TODO: change the following to create TFC with in/out-of order // according to whether the query's Scorer. TopFieldCollector collector = TopFieldCollector.create(sort, numHits, true, withScore(), withMaxScore(), false); searcher.search(q, collector); hits = collector.topDocs(); } else { hits = searcher.search(q, numHits); } //System.out.println("q=" + q + ":" + hits.totalHits + " total hits"); if (withTraverse()) { final ScoreDoc[] scoreDocs = hits.scoreDocs; int traversalSize = Math.min(scoreDocs.length, traversalSize()); if (traversalSize > 0) { boolean retrieve = withRetrieve(); int numHighlight = Math.min(numToHighlight(), scoreDocs.length); Analyzer analyzer = getRunData().getAnalyzer(); BenchmarkHighlighter highlighter = null; if (numHighlight > 0) { highlighter = getBenchmarkHighlighter(q); } for (int m = 0; m < traversalSize; m++) { int id = scoreDocs[m].doc; res++; if (retrieve) { Document document = retrieveDoc(ir, id); res += document != null ? 1 : 0; if (numHighlight > 0 && m < numHighlight) { Collection/**/ fieldsToHighlight = getFieldsToHighlight(document); for (Iterator iterator = fieldsToHighlight.iterator(); iterator.hasNext();) { String field = (String) iterator.next(); String text = document.get(field); res += highlighter.doHighlight(ir, id, field, document, analyzer, text); } } } } } } } searcher.close(); } if (closeReader) { ir.close(); } return res; } protected Document retrieveDoc(IndexReader ir, int id) throws IOException { return ir.document(id); } /** * Return query maker used for this task. */ public abstract QueryMaker getQueryMaker(); /** * Return true if search should be performed. */ public abstract boolean withSearch(); /** * Return true if warming should be performed. */ public abstract boolean withWarm(); /** * Return true if, with search, results should be traversed. */ public abstract boolean withTraverse(); /** Whether scores should be computed (only useful with * field sort) */ public boolean withScore() { return true; } /** Whether maxScores should be computed (only useful with * field sort) */ public boolean withMaxScore() { return true; } /** * Specify the number of hits to traverse. Tasks should override this if they want to restrict the number * of hits that are traversed when {@link #withTraverse()} is true. Must be greater than 0. *

* Read task calculates the traversal as: Math.min(hits.length(), traversalSize()) * * @return Integer.MAX_VALUE */ public int traversalSize() { return Integer.MAX_VALUE; } static final int DEFAULT_SEARCH_NUM_HITS = 10; private int numHits; public void setup() throws Exception { super.setup(); numHits = getRunData().getConfig().get("search.num.hits", DEFAULT_SEARCH_NUM_HITS); } /** * Specify the number of hits to retrieve. Tasks should override this if they want to restrict the number * of hits that are collected during searching. Must be greater than 0. * * @return 10 by default, or search.num.hits config if set. */ public int numHits() { return numHits; } /** * Return true if, with search & results traversing, docs should be retrieved. */ public abstract boolean withRetrieve(); /** * Set to the number of documents to highlight. * * @return The number of the results to highlight. O means no docs will be highlighted. */ public int numToHighlight() { return 0; } /** * @deprecated Use {@link #getBenchmarkHighlighter(Query)} */ final Highlighter getHighlighter(Query q) { // not called return null; } /** * Return an appropriate highlighter to be used with * highlighting tasks */ protected BenchmarkHighlighter getBenchmarkHighlighter(Query q){ return null; } /** * @return the maximum number of highlighter fragments * @deprecated Please define getBenchmarkHighlighter instead */ final int maxNumFragments(){ // not called -- we switched this method to final to // force any external subclasses to cutover to // getBenchmarkHighlighter instead return 10; } /** * * @return true if the highlighter should merge contiguous fragments * @deprecated Please define getBenchmarkHighlighter instead */ final boolean isMergeContiguousFragments(){ // not called -- we switched this method to final to // force any external subclasses to cutover to // getBenchmarkHighlighter instead return false; } /** * @deprecated Please define getBenchmarkHighlighter instead */ final int doHighlight(TokenStream ts, String text, Highlighter highlighter, boolean mergeContiguous, int maxFragments) throws IOException, InvalidTokenOffsetsException { // not called -- we switched this method to final to // force any external subclasses to cutover to // getBenchmarkHighlighter instead return 0; } protected Sort getSort() { return null; } /** * Define the fields to highlight. Base implementation returns all fields * @param document The Document * @return A Collection of Field names (Strings) */ protected Collection/**/ getFieldsToHighlight(Document document) { List/**/ fieldables = document.getFields(); Set/**/ result = new HashSet(fieldables.size()); for (Iterator iterator = fieldables.iterator(); iterator.hasNext();) { Fieldable fieldable = (Fieldable) iterator.next(); result.add(fieldable.name()); } return result; } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy