All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.lucene.search.suggest.DocumentDictionary Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.lucene.search.suggest;

import java.io.IOException;
import java.util.Collections;
import java.util.HashSet;
import java.util.Set;
import org.apache.lucene.document.Document;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.MultiBits;
import org.apache.lucene.index.MultiDocValues;
import org.apache.lucene.index.NumericDocValues;
import org.apache.lucene.index.StoredFields;
import org.apache.lucene.search.spell.Dictionary;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;

/**
 * Dictionary with terms, weights, payload (optional) and contexts (optional) information taken from
 * stored/indexed fields in a Lucene index. NOTE:
 *
 * 
    *
  • The term field has to be stored; if it is missing, the document is skipped. *
  • The payload and contexts field are optional and are not required to be stored. *
  • The weight field can be stored or can be a {@link NumericDocValues}. If the weight field is * not defined, the value of the weight is 0 *
*/ public class DocumentDictionary implements Dictionary { /** {@link IndexReader} to load documents from */ protected final IndexReader reader; /** {@link StoredFields} for this reader */ protected final StoredFields storedFields; /** Field to read payload from */ protected final String payloadField; /** Field to read contexts from */ protected final String contextsField; private final String field; private final String weightField; /** * Creates a new dictionary with the contents of the fields named field for the terms * and weightField for the weights that will be used for the corresponding terms. */ public DocumentDictionary(IndexReader reader, String field, String weightField) throws IOException { this(reader, field, weightField, null); } /** * Creates a new dictionary with the contents of the fields named field for the * terms, weightField for the weights that will be used for the the corresponding * terms and payloadField for the corresponding payloads for the entry. */ public DocumentDictionary( IndexReader reader, String field, String weightField, String payloadField) throws IOException { this(reader, field, weightField, payloadField, null); } /** * Creates a new dictionary with the contents of the fields named field for the * terms, weightField for the weights that will be used for the the corresponding * terms, payloadField for the corresponding payloads for the entry and * contextsField for associated contexts. */ public DocumentDictionary( IndexReader reader, String field, String weightField, String payloadField, String contextsField) throws IOException { this.reader = reader; this.storedFields = reader.storedFields(); this.field = field; this.weightField = weightField; this.payloadField = payloadField; this.contextsField = contextsField; } @Override public InputIterator getEntryIterator() throws IOException { return new DocumentInputIterator(payloadField != null, contextsField != null); } /** Implements {@link InputIterator} from stored fields. */ protected class DocumentInputIterator implements InputIterator { private final int docCount; private final Set relevantFields; private final boolean hasPayloads; private final boolean hasContexts; private final Bits liveDocs; private int currentDocId = -1; private long currentWeight = 0; private BytesRef currentPayload = null; private Set currentContexts; private final NumericDocValues weightValues; IndexableField[] currentDocFields = new IndexableField[0]; int nextFieldsPosition = 0; /** * Creates an iterator over term, weight and payload fields from the lucene index. setting * withPayload to false, implies an iterator over only term and weight. */ public DocumentInputIterator(boolean hasPayloads, boolean hasContexts) throws IOException { this.hasPayloads = hasPayloads; this.hasContexts = hasContexts; docCount = reader.maxDoc() - 1; weightValues = (weightField != null) ? MultiDocValues.getNumericValues(reader, weightField) : null; liveDocs = (reader.leaves().size() > 0) ? MultiBits.getLiveDocs(reader) : null; relevantFields = getRelevantFields(new String[] {field, weightField, payloadField, contextsField}); } @Override public long weight() { return currentWeight; } @Override public BytesRef next() throws IOException { while (true) { if (nextFieldsPosition < currentDocFields.length) { // Still values left from the document IndexableField fieldValue = currentDocFields[nextFieldsPosition++]; if (fieldValue.binaryValue() != null) { return fieldValue.binaryValue(); } else if (fieldValue.stringValue() != null) { return new BytesRef(fieldValue.stringValue()); } else { continue; } } if (currentDocId == docCount) { // Iterated over all the documents. break; } currentDocId++; if (liveDocs != null && !liveDocs.get(currentDocId)) { continue; } Document doc = storedFields.document(currentDocId, relevantFields); BytesRef tempPayload = null; if (hasPayloads) { IndexableField payload = doc.getField(payloadField); if (payload != null) { if (payload.binaryValue() != null) { tempPayload = payload.binaryValue(); } else if (payload.stringValue() != null) { tempPayload = new BytesRef(payload.stringValue()); } } // in case that the iterator has payloads configured, use empty values // instead of null for payload if (tempPayload == null) { tempPayload = new BytesRef(); } } Set tempContexts; if (hasContexts) { tempContexts = new HashSet<>(); final IndexableField[] contextFields = doc.getFields(contextsField); for (IndexableField contextField : contextFields) { if (contextField.binaryValue() != null) { tempContexts.add(contextField.binaryValue()); } else if (contextField.stringValue() != null) { tempContexts.add(new BytesRef(contextField.stringValue())); } else { continue; } } } else { tempContexts = Collections.emptySet(); } currentDocFields = doc.getFields(field); nextFieldsPosition = 0; if (currentDocFields.length == 0) { // no values in this document continue; } IndexableField fieldValue = currentDocFields[nextFieldsPosition++]; BytesRef tempTerm; if (fieldValue.binaryValue() != null) { tempTerm = fieldValue.binaryValue(); } else if (fieldValue.stringValue() != null) { tempTerm = new BytesRef(fieldValue.stringValue()); } else { continue; } currentPayload = tempPayload; currentContexts = tempContexts; currentWeight = getWeight(doc, currentDocId); return tempTerm; } return null; } @Override public BytesRef payload() { return currentPayload; } @Override public boolean hasPayloads() { return hasPayloads; } /** * Returns the value of the weightField for the current document. Retrieves the * value for the weightField if it's stored (using doc) or if it's * indexed as {@link NumericDocValues} (using docId) for the document. If no value * is found, then the weight is 0. */ protected long getWeight(Document doc, int docId) throws IOException { IndexableField weight = doc.getField(weightField); if (weight != null) { // found weight as stored return (weight.numericValue() != null) ? weight.numericValue().longValue() : 0; } else if (weightValues != null) { // found weight as NumericDocValue if (weightValues.docID() < docId) { weightValues.advance(docId); } if (weightValues.docID() == docId) { return weightValues.longValue(); } else { // missing return 0; } } else { // fall back return 0; } } private Set getRelevantFields(String... fields) { Set relevantFields = new HashSet<>(); for (String relevantField : fields) { if (relevantField != null) { relevantFields.add(relevantField); } } return relevantFields; } @Override public Set contexts() { if (hasContexts) { return currentContexts; } return null; } @Override public boolean hasContexts() { return hasContexts; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy