All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.elasticsearch.search.lookup.IndexFieldTerm Maven / Gradle / Ivy

The newest version!
/*
 * Licensed to Elasticsearch under one or more contributor
 * license agreements. See the NOTICE file distributed with
 * this work for additional information regarding copyright
 * ownership. Elasticsearch licenses this file to you under
 * the Apache License, Version 2.0 (the "License"); you may
 * not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.elasticsearch.search.lookup;

import org.apache.lucene.index.Fields;
import org.apache.lucene.index.FilterLeafReader.FilterPostingsEnum;
import org.apache.lucene.index.LeafReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermContext;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.TermStatistics;
import org.apache.lucene.util.Bits;
import org.apache.lucene.util.BytesRef;
import org.elasticsearch.ElasticsearchException;

import java.io.IOException;
import java.util.Iterator;

/**
 * Holds all information on a particular term in a field.
 * */
public class IndexFieldTerm implements Iterable {

    // The posting list for this term. Is null if the term or field does not
    // exist.
    PostingsEnum postings;

    // Stores if positions, offsets and payloads are requested.
    private final int flags;

    private final String fieldName;

    private final String term;

    private final PositionIterator iterator;

    // for lucene calls
    private final Term identifier;

    private final TermStatistics termStats;

    // get the document frequency of the term
    public long df() throws IOException {
        return termStats.docFreq();
    }

    // get the total term frequency of the term, that is, how often does the
    // term appear in any document?
    public long ttf() throws IOException {
        return termStats.totalTermFreq();
    }

    // when the reader changes, we have to get the posting list for this term
    // and reader
    private void setReader(LeafReader reader) {
        try {
            postings = getPostings(convertToLuceneFlags(flags), reader);

            if (postings == null) {
                // no term or field for this segment, fake out the postings...
                final DocIdSetIterator empty = DocIdSetIterator.empty();
                postings = new PostingsEnum() {
                    @Override
                    public int docID() {
                        return empty.docID();
                    }

                    @Override
                    public int nextDoc() throws IOException {
                        return empty.nextDoc();
                    }

                    @Override
                    public int advance(int target) throws IOException {
                        return empty.advance(target);
                    }

                    @Override
                    public long cost() {
                        return empty.cost();
                    }

                    @Override
                    public int freq() throws IOException {
                        return 1;
                    }

                    @Override
                    public int nextPosition() throws IOException {
                        return -1;
                    }

                    @Override
                    public int startOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public int endOffset() throws IOException {
                        return -1;
                    }

                    @Override
                    public BytesRef getPayload() throws IOException {
                        return null;
                    }
                };
            }
        } catch (IOException e) {
            throw new ElasticsearchException("Unable to get postings for field " + fieldName + " and term " + term, e);
        }

    }

    private int convertToLuceneFlags(int flags) {
        int lucenePositionsFlags = PostingsEnum.NONE;
        lucenePositionsFlags |= (flags & IndexLookup.FLAG_FREQUENCIES) > 0 ? PostingsEnum.FREQS : 0x0;
        lucenePositionsFlags |= (flags & IndexLookup.FLAG_POSITIONS) > 0 ? PostingsEnum.POSITIONS : 0x0;
        lucenePositionsFlags |= (flags & IndexLookup.FLAG_PAYLOADS) > 0 ? PostingsEnum.PAYLOADS : 0x0;
        lucenePositionsFlags |= (flags & IndexLookup.FLAG_OFFSETS) > 0 ? PostingsEnum.OFFSETS : 0x0;
        return lucenePositionsFlags;
    }

    private PostingsEnum getPostings(int luceneFlags, LeafReader reader) throws IOException {
        assert identifier.field() != null;
        assert identifier.bytes() != null;
        final Fields fields = reader.fields();
        PostingsEnum newPostings = null;
        if (fields != null) {
            final Terms terms = fields.terms(identifier.field());
            if (terms != null) {
                TermsEnum termsEnum = terms.iterator();
                if (termsEnum.seekExact(identifier.bytes())) {
                    newPostings = termsEnum.postings(postings, luceneFlags);
                    final Bits liveDocs = reader.getLiveDocs();
                    if (liveDocs != null) {
                        newPostings = new FilterPostingsEnum(newPostings) {
                            private int doNext(int d) throws IOException {
                                for (; d != NO_MORE_DOCS; d = super.nextDoc()) {
                                    if (liveDocs.get(d)) {
                                        return d;
                                    }
                                }
                                return NO_MORE_DOCS;
                            }
                            @Override
                            public int nextDoc() throws IOException {
                                return doNext(super.nextDoc());
                            }
                            @Override
                            public int advance(int target) throws IOException {
                                return doNext(super.advance(target));
                            }
                        };
                    }
                }
            }
        }
        return newPostings;
    }

    private int freq = 0;

    public void setDocument(int docId) {
        assert (postings != null);
        try {
            // we try to advance to the current document.
            int currentDocPos = postings.docID();
            if (currentDocPos < docId) {
                currentDocPos = postings.advance(docId);
            }
            if (currentDocPos == docId) {
                freq = postings.freq();
            } else {
                freq = 0;
            }
            iterator.nextDoc();
        } catch (IOException e) {
            throw new ElasticsearchException("While trying to initialize term positions in IndexFieldTerm.setNextDoc() ", e);
        }
    }

    public IndexFieldTerm(String term, String fieldName, LeafIndexLookup indexLookup, int flags) {
        assert fieldName != null;
        this.fieldName = fieldName;
        assert term != null;
        this.term = term;
        assert indexLookup != null;
        identifier = new Term(fieldName, (String) term);
        this.flags = flags;
        boolean doRecord = ((flags & IndexLookup.FLAG_CACHE) > 0);
        if (!doRecord) {
            iterator = new PositionIterator(this);
        } else {
            iterator = new CachedPositionIterator(this);
        }
        setReader(indexLookup.getReader());
        setDocument(indexLookup.getDocId());
        try {
            termStats = indexLookup.getIndexSearcher().termStatistics(identifier,
                    TermContext.build(indexLookup.getReaderContext(), identifier));
        } catch (IOException e) {
            throw new ElasticsearchException("Cannot get term statistics: ", e);
        }
    }

    public int tf() throws IOException {
        return freq;
    }

    @Override
    public Iterator iterator() {
        return iterator.reset();
    }

    /*
     * A user might decide inside a script to call get with _POSITIONS and then
     * a second time with _PAYLOADS. If the positions were recorded but the
     * payloads were not, the user will not have access to them. Therfore, throw
     * exception here explaining how to call get().
     */
    public void validateFlags(int flags2) {
        if ((this.flags & flags2) < flags2) {
            throw new ElasticsearchException("You must call get with all required flags! Instead of " + getCalledStatement(flags2)
                    + "call " + getCallStatement(flags2 | this.flags) + " once");
        }
    }

    private String getCalledStatement(int flags2) {
        String calledFlagsCall1 = getFlagsString(flags);
        String calledFlagsCall2 = getFlagsString(flags2);
        String callStatement1 = getCallStatement(calledFlagsCall1);
        String callStatement2 = getCallStatement(calledFlagsCall2);
        return " " + callStatement1 + " and " + callStatement2 + " ";
    }

    private String getCallStatement(String calledFlags) {
        return "_index['" + this.fieldName + "'].get('" + this.term + "', " + calledFlags + ")";
    }

    private String getFlagsString(int flags2) {
        String flagsString = null;
        if ((flags2 & IndexLookup.FLAG_FREQUENCIES) != 0) {
            flagsString = anddToFlagsString(flagsString, "_FREQUENCIES");
        }
        if ((flags2 & IndexLookup.FLAG_POSITIONS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_POSITIONS");
        }
        if ((flags2 & IndexLookup.FLAG_OFFSETS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_OFFSETS");
        }
        if ((flags2 & IndexLookup.FLAG_PAYLOADS) != 0) {
            flagsString = anddToFlagsString(flagsString, "_PAYLOADS");
        }
        if ((flags2 & IndexLookup.FLAG_CACHE) != 0) {
            flagsString = anddToFlagsString(flagsString, "_CACHE");
        }
        return flagsString;
    }

    private String anddToFlagsString(String flagsString, String flag) {
        if (flagsString != null) {
            flagsString += " | ";
        } else {
            flagsString = "";
        }
        flagsString += flag;
        return flagsString;
    }

    private String getCallStatement(int flags2) {
        String calledFlags = getFlagsString(flags2);
        String callStatement = getCallStatement(calledFlags);
        return " " + callStatement + " ";

    }

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy