All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.pageseeder.flint.lucene.query.SearchResults Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2015 Allette Systems (Australia)
 * http://www.allette.com.au
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.pageseeder.flint.lucene.query;

import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.SortField;
import org.apache.lucene.search.TopFieldDocs;
import org.pageseeder.flint.IndexException;
import org.pageseeder.flint.indexing.FlintDocument;
import org.pageseeder.flint.indexing.FlintField;
import org.pageseeder.flint.indexing.FlintField.NumericType;
import org.pageseeder.flint.lucene.LuceneIndexIO;
import org.pageseeder.flint.lucene.search.Fields;
import org.pageseeder.flint.lucene.util.Dates;
import org.pageseeder.flint.lucene.util.Highlighter;
import org.pageseeder.xmlwriter.XML.NamespaceAware;
import org.pageseeder.xmlwriter.XMLStringWriter;
import org.pageseeder.xmlwriter.XMLWritable;
import org.pageseeder.xmlwriter.XMLWriter;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import java.io.IOException;
import java.text.ParseException;
import java.util.*;

/**
 * A container for search results.
 *
 * 

Use this class to serialise Lucene Search results as XML. * *

Note: the current implementation is a "throw away" object, once the toXML method has been * called, this instance is useless. * *

This class is not synchronized. * * @author Christophe Lauret (Weborganic) * @author Jean-Baptiste Reure (Weborganic) * @author William Liem (Allette Systems) * * @version 10 February 2012 */ public final class SearchResults implements XMLWritable { /** * Logger. */ private static final Logger LOGGER = LoggerFactory.getLogger(SearchResults.class); /** * Types of values formatted in the result. */ private enum ValueType {STRING, DATE, DATETIME, LONG, DOUBLE, INT, FLOAT} /** * The maximum length for a field to expand. */ private static final int MAX_FIELD_VALUE_LENGTH = 1000; /** * One minute in milliseconds. */ private static final int ONE_MINUTE_IN_MS = 60000; /** * One hour in milliseconds. */ private static final int ONE_HOUR_IN_MS = 3600000; /** * The actual search results from Lucene. */ private final ScoreDoc[] _scoredocs; /** * Fields used for sorting. */ private final SortField[] _sortfields; /** * Indicates the paging information. */ private final SearchPaging _paging; /** * The query used to produce these results. */ private final SearchQuery _query; /** * The analyzer used. */ private final Analyzer _analyzer; /** * The index searcher used. */ private final IndexSearcher _searcher; /** * The index I/O. */ private final SearchReaders readers; /** * List of fields to get the extract from. */ private final List extractFields = new ArrayList<>(); /** * The total number of results. */ private final int totalNbOfResults; // State variables // --------------------------------------------------------------------------------------------- /** * A state variable to indicate whether the search results instance can still be accessed. */ private boolean _terminated = false; /** * The timezone offset used to adjust the correct date and time. */ private int timezoneOffset; // Constructors // --------------------------------------------------------------------------------------------- /** * Creates a new SearchResults. * * @param query The search query that was used to produce these results. * @param docs The actual search results from Lucene in TopFieldDocs. * @param paging The paging configuration. * @param readers The list of readers to be released at the end * @param searcher The Lucene searcher. * */ public SearchResults(SearchQuery query, TopFieldDocs docs, SearchPaging paging, Map readers, IndexSearcher searcher) { this(query, null, docs.scoreDocs, docs.fields, (int) docs.totalHits.value, paging, new SearchReaders(readers), searcher); } /** * Creates a new SearchResults. * * @param query The search query that was used to produce these results. * @param docs The actual search results from Lucene in TopFieldDocs. * @param paging The paging configuration. * @param io The IndexIO object, used to release the searcher when terminated * @param searcher The Lucene searcher. * */ public SearchResults(SearchQuery query, TopFieldDocs docs, SearchPaging paging, LuceneIndexIO io, IndexSearcher searcher) { this(query, null, docs.scoreDocs, docs.fields, (int) docs.totalHits.value, paging, new SearchReaders(io), searcher); } /** * Creates a new SearchResults. * * @param query The search query that was used to produce these results. * @param docs The actual search results from Lucene in ScoreDoc. * @param paging The paging configuration. * @param io The IndexIO object, used to release the searcher when terminated * @param searcher The Lucene searcher. * */ public SearchResults(SearchQuery query, ScoreDoc[] docs, int totalHits, SearchPaging paging, LuceneIndexIO io, IndexSearcher searcher) { this(query, null, docs, null, totalHits, paging, new SearchReaders(io), searcher); } /** * Creates a new SearchResults. * * @param query The search query that was used to produce these results. * @param docs The actual search results from Lucene in ScoreDoc. * @param paging The paging configuration. * @param readers The list of readers to be released at the end * @param searcher The Lucene searcher. * */ public SearchResults(SearchQuery query, ScoreDoc[] docs, int totalHits, SearchPaging paging, Map readers, IndexSearcher searcher) { this(query, null, docs, null, totalHits, paging, new SearchReaders(readers), searcher); } /** * Creates a new SearchResults. * * @param query The original query * @param hits The actual search results from Lucene in ScoreDoc. * @param sortf The Field used to sort the results * @param paging The paging configuration. * @param readers The IndexIO object, used to release the searcher when terminated * @param searcher The Lucene searcher. */ public SearchResults(SearchQuery query, Analyzer analyzer, ScoreDoc[] hits, SortField[] sortf, int totalResults, SearchPaging paging, SearchReaders readers, IndexSearcher searcher) { this._query = query; this._analyzer = analyzer; this._scoredocs = hits; this._sortfields = sortf; this._paging = paging != null? paging : new SearchPaging(); this._searcher = searcher; this.readers = readers; this.totalNbOfResults = totalResults; // default timezone is the server's TimeZone tz = TimeZone.getDefault(); this.timezoneOffset = tz.getRawOffset(); // take daylight savings into account if (tz.inDaylightTime(new Date())) { this.timezoneOffset += ONE_HOUR_IN_MS; } } // Basic public methods // --------------------------------------------------------------------------------------------- /** * Add field names to get extracts from. * The order matters here as the first extract found is the one included in the results. * * @param fields list of field names */ public void addExtractFields(List fields) { this.extractFields.addAll(fields); } /** * Add a field name to get extracts from. * The order matters here as the first extract found is the one included in the results. * * @param field new field name */ public void addExtractField(String field) { this.extractFields.add(field); } /** * Returns the total number of results. * * @return the total number of results. */ public int getTotalNbOfResults() { return this.totalNbOfResults; } /** * Indicates whether the search results are empty. * * @return true if the results are empty; * false if there is more than one hit. */ public boolean isEmpty() { return this.totalNbOfResults == 0; } /** * Sets the time zone to use when formatting the results as XML. * * @param timezoneInMinutes the timezone offset in minutes (difference with GMT) */ public void setTimeZone(int timezoneInMinutes) { this.timezoneOffset = timezoneInMinutes * ONE_MINUTE_IN_MS; } /** * @return the index searcher, can be used to compute facets * * @throws IndexException if the results have been terminated and the searcher closed */ public IndexSearcher searcher() throws IndexException { if (this._terminated) throw new IndexException("Cannot retrieve searcher after termination", new IllegalStateException()); return this._searcher; } /** * @return the original query */ public SearchQuery query() { return this._query; } /** * Serialises the search results as XML. * * @param xml The XML writer. * * @throws IOException Should there be any I/O exception while writing the XML. */ @Override public void toXML(XMLWriter xml) throws IOException { xml.openElement("search-results", true); int firsthit = this._paging.getFirstHit(); int lasthit = this._paging.getLastHit(this.totalNbOfResults); // Include query if (this._query != null) { xml.openElement("query", true); xml.attribute("lucene", this._query.toQuery().toString()); this._query.toXML(xml); xml.closeElement(); } // Display some metadata on the search toMetadataXML(xml); // Returned documents xml.openElement("documents", true); // Iterate over the hits to find the extracts for (int i = firsthit - 1; i < lasthit; i++) { String score = Float.toString(this._scoredocs[i].score); Document doc = this._searcher.storedFields().document(this._scoredocs[i].doc); String extractXML = null; if (this._query != null && this._analyzer != null) { Highlighter highlighter = new Highlighter(this._query.toQuery(), this._searcher.getIndexReader(), this._analyzer); for (IndexableField f : doc.getFields()) { if (this.extractFields.isEmpty() || this.extractFields.contains(f.name())) { String extract = highlighter.highlight(f.name(), f.stringValue(), 200); // Documents.extract(Fields.toString(f), t.text(), 200); if (extract != null) { XMLStringWriter xsw = new XMLStringWriter(NamespaceAware.No); xsw.openElement("extract"); xsw.attribute("from", f.name()); xsw.writeXML(extract); xsw.closeElement(); extractXML = xsw.toString(); break; } } } } // document as XML documentToXML(doc, extractXML, score, this.timezoneOffset, xml); } // close 'documents' xml.closeElement(); // close 'results' xml.closeElement(); // close everything terminate(); } public static void documentToXML(Document doc, int timezoneOffset, XMLWriter xml) throws IOException { documentToXML(doc, null, null, timezoneOffset, xml); } private static void documentToXML(Document doc, String extract, String score, int timezoneOffset, XMLWriter xml) throws IOException { xml.openElement("document", true); if (score != null) xml.element("score", score); if (extract != null) xml.writeXML(extract); // display the value of each field for (IndexableField f : doc.getFields()) { // Retrieve the value String value = Fields.toString(f); ValueType type = ValueType.STRING; // check for numeric value Number number = f.numericValue(); if (number != null) { if (number instanceof Long) type = ValueType.LONG; else if (number instanceof Double) type = ValueType.DOUBLE; else if (number instanceof Integer) type = ValueType.INT; else if (number instanceof Float) type = ValueType.FLOAT; // format dates using ISO 8601 when possible } else if (value != null && value.length() > 0 && f.name().contains("date") && Dates.isLuceneDate(value)) { try { if (value.length() > 8) { value = Dates.toISODateTime(value, timezoneOffset); type = ValueType.DATETIME; } else { value = Dates.toISODate(value); if (value.length() == 10) { type = ValueType.DATE; } } } catch (ParseException ex) { LOGGER.warn("Unparseable date found {}", value); } } // unnecessary to return the full value of long fields if (value != null && value.length() < MAX_FIELD_VALUE_LENGTH) { xml.openElement("field"); xml.attribute("name", f.name()); // Display the correct attributes so that we know we can format the date if (type == ValueType.DATE) { xml.attribute("date", value); } else if (type == ValueType.DATETIME) { xml.attribute("datetime", value); } else if (type == ValueType.LONG) { xml.attribute("numeric-type", "long"); } else if (type == ValueType.DOUBLE) { xml.attribute("numeric-type", "double"); } else if (type == ValueType.FLOAT) { xml.attribute("numeric-type", "float"); } else if (type == ValueType.INT) { xml.attribute("numeric-type", "int"); } if (f.binaryValue() != null) xml.attribute("compressed", "true"); xml.writeText(value); xml.closeElement(); } } // close 'document' xml.closeElement(); } public static void flintDocumentToXML(FlintDocument doc, int timezoneOffset, XMLWriter xml) throws IOException { flintDocumentToXML(doc, null, null, timezoneOffset, xml); } public static void flintDocumentToXML(FlintDocument doc, String extract, String score, int timezoneOffset, XMLWriter xml) throws IOException { xml.openElement("result", true); if (score != null) xml.attribute("score", score); if (extract != null) xml.writeXML(extract); // display the value of each field for (FlintField f : doc.fields()) { // Retrieve the value String value = f.value() == null ? null : f.value().toString(); ValueType type = ValueType.STRING; // check for numeric value NumericType nt = f.numeric(); if (nt != null) { if (nt == NumericType.LONG) type = ValueType.LONG; else if (nt == NumericType.DOUBLE) type = ValueType.DOUBLE; else if (nt == NumericType.INT) type = ValueType.INT; else if (nt == NumericType.FLOAT) type = ValueType.FLOAT; // format dates using ISO 8601 when possible } else if (value != null && value.length() > 0 && f.name().contains("date") && Dates.isLuceneDate(value)) { try { if (value.length() > 8) { value = Dates.toISODateTime(value, timezoneOffset); type = ValueType.DATETIME; } else { value = Dates.toISODate(value); if (value.length() == 10) { type = ValueType.DATE; } } } catch (ParseException ex) { LOGGER.warn("Unparseable date found {}", value); } } // unnecessary to return the full value of long fields if (value != null && value.length() < MAX_FIELD_VALUE_LENGTH) { xml.openElement("field"); xml.attribute("name", f.name()); // Display the correct attributes so that we know we can format the date if (type == ValueType.DATE) { xml.attribute("date", value); } else if (type == ValueType.DATETIME) { xml.attribute("datetime", value); } else if (type == ValueType.LONG) { xml.attribute("numeric-type", "long"); } else if (type == ValueType.DOUBLE) { xml.attribute("numeric-type", "double"); } else if (type == ValueType.FLOAT) { xml.attribute("numeric-type", "float"); } else if (type == ValueType.INT) { xml.attribute("numeric-type", "int"); } xml.writeText(value); xml.closeElement(); } } // close 'document' xml.closeElement(); } public int getFirstHit() { return this._paging.getFirstHit(); } public int getLastHit() { return this._paging.getLastHit(this.totalNbOfResults); } /** * Return the actual results. * * @return the search results. * * @throws IndexException If the search results have already been terminated. */ public ScoreDoc[] getScoreDoc() throws IndexException { if (this._terminated) throw new IndexException("Cannot retrieve documents after termination", new IllegalStateException()); return this._scoredocs; } /** * Load a document from the index. * *

Note this * * @param id the id of the document * @return the document object loaded from the index, could be null * * @throws IndexException if the index is invalid */ public Document getDocument(int id) throws IndexException { if (this._terminated) throw new IndexException("Cannot retrieve documents after termination", new IllegalStateException()); try { return this._searcher.storedFields().document(id); } catch (CorruptIndexException e) { LOGGER.error("Failed to retrieve a document because of a corrupted Index", e); throw new IndexException("Failed to retrieve a document because of a corrupted Index", e); } catch (IOException ioe) { LOGGER.error("Failed to retrieve a document because of an I/O problem", ioe); throw new IndexException("Failed to retrieve a document because of an I/O problem", ioe); } } /** * Release all references to the searcher. * *

Does nothing if the results have already been terminated. * */ public void terminate() { if (this._terminated) return; this.readers.release(this._searcher); this._terminated = true; } /** * Provides an iterable class over the Lucene documents. * *

This allows Lucene documents from these results to be iterated over in a for each loop: *

   *   for (Document doc : results.documents()) {
   *     ...
   *   }
   * 
* * @return an iterable class over the Lucene documents. * * @throws IllegalStateException If these results have been closed (terminated already). */ public Iterable documents() { if (this._terminated) throw new IllegalStateException(); return new DocIterable(this._paging.getFirstHit() - 1, getLastHit()); } // Private helpers // ---------------------------------------------------------------------------------------------- /** * Write the search results metadata as XML. * * @param xml The XML writer * * @throws IOException Should an error occur while writing the XML */ private void toMetadataXML(XMLWriter xml) throws IOException { SearchPaging page = this._paging; int total = this.totalNbOfResults; // Display some metadata on the search xml.openElement("metadata", true); xml.openElement("hits", true); xml.element("per-page", Integer.toString(page.getHitsPerPage())); xml.element("total", Integer.toString(total)); xml.closeElement(); xml.openElement("page", true); xml.element("first-hit", Integer.toString(page.getFirstHit())); xml.element("last-hit", Integer.toString(page.getLastHit(total))); xml.element("current", Integer.toString(page.getPage())); xml.element("last", Integer.toString(page.getPageCount(total))); xml.closeElement(); if (this._sortfields != null) { xml.openElement("sort-fields", true); for (SortField field : this._sortfields) { xml.element("field", field.getField()); } xml.closeElement(); } xml.closeElement(); } // Private classes // ---------------------------------------------------------------------------------------------- /** * An iterable class over the documents in these results. * * @author christophe Lauret * @version 6 October 2011 */ private final class DocIterable implements Iterable { private final int _start; private final int _end; public DocIterable(int start, int end) { this._start = start; this._end = end; } /** * Provides an iterable class over the Lucene documents. * *

this can be used in a for each loop * * @return an iterable class over the Lucene documents. */ @Override public Iterator iterator() { return new DocIterator(this._start, this._end); } } /** * An iterator over the documents in these results. * * @author Christophe Lauret * @author Jean-Baptiste Reure * @version 16 August 2013 */ private final class DocIterator implements Iterator { /** * The index searcher used. */ private final IndexSearcher searcher = SearchResults.this._searcher; /** * The actual search results from Lucene. */ private final ScoreDoc[] scoredocs = SearchResults.this._scoredocs; /** * The current index for this iterator. */ private int index; /** * The current index for this iterator. */ private final int endIndex; public DocIterator(int start, int end) { this.index = start; this.endIndex = end; } @Override public boolean hasNext() { return this.index < this.scoredocs.length && this.index < this.endIndex; } @Override public Document next() { if (!hasNext()) throw new NoSuchElementException(); try { return this.searcher.storedFields().document(this.scoredocs[this.index++].doc); } catch (IOException ex) { throw new IllegalStateException("Error retrieving document", ex); } } /** * @throws UnsupportedOperationException as it's not possible */ @Override public void remove() { throw new UnsupportedOperationException("Cannot remove documents from search results"); } } private static class SearchReaders { private final LuceneIndexIO _single; private final Map _readers = new HashMap<>(); public SearchReaders(Map readers) { this._readers.putAll(readers); this._single = null; } public SearchReaders(LuceneIndexIO io) { this._single = io; } public void release(IndexSearcher searcher) { if (this._single != null) { this._single.releaseSearcher(searcher); } for (Map.Entry io : this._readers.entrySet()) io.getKey().releaseReader(io.getValue()); } } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy