org.elasticsearch.search.lookup.LeafIndexLookup Maven / Gradle / Ivy
The newest version!
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.elasticsearch.search.lookup;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexReaderContext;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.ReaderUtil;
import org.apache.lucene.search.IndexSearcher;
import org.elasticsearch.ElasticsearchException;
import org.elasticsearch.common.util.MinimalMap;
import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
public class LeafIndexLookup extends MinimalMap {
// Current reader from which we can get the term vectors. No info on term
// and field statistics.
private final LeafReader reader;
// The parent reader from which we can get proper field and term
// statistics
private final IndexReader parentReader;
// we need this later to get the field and term statistics of the shard
private final IndexSearcher indexSearcher;
// current docId
private int docId = -1;
// stores the objects that are used in the script. we maintain this map
// because we do not want to re-initialize the objects each time a field is
// accessed
private final Map indexFields = new HashMap<>();
// number of documents per shard. cached here because the computation is
// expensive
private int numDocs = -1;
// the maximum doc number of the shard.
private int maxDoc = -1;
// number of deleted documents per shard. cached here because the
// computation is expensive
private int numDeletedDocs = -1;
public int numDocs() {
if (numDocs == -1) {
numDocs = parentReader.numDocs();
}
return numDocs;
}
public int maxDoc() {
if (maxDoc == -1) {
maxDoc = parentReader.maxDoc();
}
return maxDoc;
}
public int numDeletedDocs() {
if (numDeletedDocs == -1) {
numDeletedDocs = parentReader.numDeletedDocs();
}
return numDeletedDocs;
}
public LeafIndexLookup(LeafReaderContext ctx) {
reader = ctx.reader();
parentReader = ReaderUtil.getTopLevelContext(ctx).reader();
indexSearcher = new IndexSearcher(parentReader);
indexSearcher.setQueryCache(null);
}
public void setDocument(int docId) {
if (this.docId == docId) { // if we are called with the same docId,
// nothing to do
return;
}
// We assume that docs are processed in ascending order of id. If this
// is not the case, we would have to re initialize all posting lists in
// IndexFieldTerm. TODO: Instead of assert we could also call
// setReaderInFields(); here?
if (this.docId > docId) {
// This might happen if the same SearchLookup is used in different
// phases, such as score and fetch phase.
// In this case we do not want to re initialize posting list etc.
// because we do not even know if term and field statistics will be
// needed in this new phase.
// Therefore we just remove all IndexFieldTerms.
indexFields.clear();
}
this.docId = docId;
setNextDocIdInFields();
}
protected void setNextDocIdInFields() {
for (IndexField stat : indexFields.values()) {
stat.setDocIdInTerms(this.docId);
}
}
/*
* TODO: here might be potential for running time improvement? If we knew in
* advance which terms are requested, we could provide an array which the
* user could then iterate over.
*/
@Override
public IndexField get(Object key) {
String stringField = (String) key;
IndexField indexField = indexFields.get(key);
if (indexField == null) {
try {
indexField = new IndexField(stringField, this);
indexFields.put(stringField, indexField);
} catch (IOException e) {
throw new ElasticsearchException(e.getMessage());
}
}
return indexField;
}
/*
* Get the lucene term vectors. See
* https://lucene.apache.org/core/4_0_0/core/org/apache/lucene/index/Fields.html
* *
*/
public Fields termVectors() throws IOException {
assert reader != null;
return reader.getTermVectors(docId);
}
LeafReader getReader() {
return reader;
}
public int getDocId() {
return docId;
}
public IndexReader getParentReader() {
if (parentReader == null) {
return reader;
}
return parentReader;
}
public IndexSearcher getIndexSearcher() {
return indexSearcher;
}
public IndexReaderContext getReaderContext() {
return getParentReader().getContext();
}
}