All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.DocumentDictionary Maven / Gradle / Ivy

There is a newer version: 2024.11.18751.20241128T090041Z-241100
Show newest version
package org.apache.lucene.search.suggest;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
import java.io.IOException;
import java.util.Comparator;
import java.util.HashSet;
import java.util.Set;

import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;

/**
 * 

* Dictionary with terms, weights and optionally payload information * taken from stored/indexed fields in a Lucene index. *

* NOTE: *
    *
  • * The term and (optionally) payload fields have to be * stored *
  • *
  • * The weight field can be stored or can be a {@link NumericDocValues}. * If the weight field is not defined, the value of the weight is 0 *
  • *
  • * if any of the term or (optionally) payload fields supplied * do not have a value for a document, then the document is * skipped by the dictionary *
  • *
*/ public class DocumentDictionary implements Dictionary { /** {@link IndexReader} to load documents from */ protected final IndexReader reader; /** Field to read payload from */ protected final String payloadField; private final String field; private final String weightField; /** * Creates a new dictionary with the contents of the fields named field * for the terms and weightField for the weights that will be used for * the corresponding terms. */ public DocumentDictionary(IndexReader reader, String field, String weightField) { this(reader, field, weightField, null); } /** * Creates a new dictionary with the contents of the fields named field * for the terms, weightField for the weights that will be used for the * the corresponding terms and payloadField for the corresponding payloads * for the entry. */ public DocumentDictionary(IndexReader reader, String field, String weightField, String payloadField) { this.reader = reader; this.field = field; this.weightField = weightField; this.payloadField = payloadField; } @Override public InputIterator getEntryIterator() throws IOException { return new DocumentInputIterator(payloadField!=null); } /** Implements {@link InputIterator} from stored fields. */ protected class DocumentInputIterator implements InputIterator { private final int docCount; private final Set relevantFields; private final boolean hasPayloads; private final Bits liveDocs; private int currentDocId = -1; private long currentWeight; private BytesRef currentPayload; private final NumericDocValues weightValues; /** * Creates an iterator over term, weight and payload fields from the lucene * index. setting withPayload to false, implies an iterator * over only term and weight. */ public DocumentInputIterator(boolean hasPayloads) throws IOException { this.hasPayloads = hasPayloads; docCount = reader.maxDoc() - 1; weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null; liveDocs = (reader.leaves().size() > 0) ? MultiFields.getLiveDocs(reader) : null; relevantFields = getRelevantFields(new String [] {field, weightField, payloadField}); } @Override public long weight() { return currentWeight; } @Override public Comparator getComparator() { return null; } @Override public BytesRef next() throws IOException { while (currentDocId < docCount) { currentDocId++; if (liveDocs != null && !liveDocs.get(currentDocId)) { continue; } Document doc = reader.document(currentDocId, relevantFields); BytesRef tempPayload = null; BytesRef tempTerm = null; if (hasPayloads) { IndexableField payload = doc.getField(payloadField); if (payload == null || (payload.binaryValue() == null && payload.stringValue() == null)) { continue; } tempPayload = (payload.binaryValue() != null) ? payload.binaryValue() : new BytesRef(payload.stringValue()); } IndexableField fieldVal = doc.getField(field); if (fieldVal == null || (fieldVal.binaryValue() == null && fieldVal.stringValue() == null)) { continue; } tempTerm = (fieldVal.stringValue() != null) ? new BytesRef(fieldVal.stringValue()) : fieldVal.binaryValue(); currentPayload = tempPayload; currentWeight = getWeight(doc, currentDocId); return tempTerm; } return null; } @Override public BytesRef payload() { return currentPayload; } @Override public boolean hasPayloads() { return hasPayloads; } /** * Returns the value of the weightField for the current document. * Retrieves the value for the weightField if its stored (using doc) * or if its indexed as {@link NumericDocValues} (using docId) for the document. * If no value is found, then the weight is 0. */ protected long getWeight(Document doc, int docId) { IndexableField weight = doc.getField(weightField); if (weight != null) { // found weight as stored return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0; } else if (weightValues != null) { // found weight as NumericDocValue return weightValues.get(docId); } else { // fall back return 0; } } private Set getRelevantFields(String... fields) { Set relevantFields = new HashSet(); for (String relevantField : fields) { if (relevantField != null) { relevantFields.add(relevantField); } } return relevantFields; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy