org.apache.lucene.search.vectorhighlight.FieldTermStack Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of lucene-highlighter Show documentation
Show all versions of lucene-highlighter Show documentation
This is the highlighter for apache lucene java
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.search.vectorhighlight;
import java.io.IOException;
import java.util.Collections;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Set;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.PostingsEnum;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.CharsRefBuilder;
/**
* FieldTermStack
is a stack that keeps query terms in the specified field of the
* document to be highlighted.
*/
public class FieldTermStack {
private final String fieldName;
LinkedList termList = new LinkedList<>();
/**
* a constructor.
*
* @param reader IndexReader of the index
* @param docId document id to be highlighted
* @param fieldName field of the document to be highlighted
* @param fieldQuery FieldQuery object
* @throws IOException If there is a low-level I/O error
*/
public FieldTermStack(
IndexReader reader, int docId, String fieldName, final FieldQuery fieldQuery)
throws IOException {
this.fieldName = fieldName;
Set termSet = fieldQuery.getTermSet(fieldName);
// just return to make null snippet if un-matched fieldName specified when fieldMatch == true
if (termSet == null) return;
final Fields vectors = reader.termVectors().get(docId);
if (vectors == null) {
// null snippet
return;
}
final Terms vector = vectors.terms(fieldName);
if (vector == null || vector.hasPositions() == false) {
// null snippet
return;
}
final CharsRefBuilder spare = new CharsRefBuilder();
final TermsEnum termsEnum = vector.iterator();
PostingsEnum dpEnum = null;
BytesRef text;
int numDocs = reader.maxDoc();
while ((text = termsEnum.next()) != null) {
spare.copyUTF8Bytes(text);
final String term = spare.toString();
if (!termSet.contains(term)) {
continue;
}
dpEnum = termsEnum.postings(dpEnum, PostingsEnum.POSITIONS);
dpEnum.nextDoc();
// For weight look here:
// http://lucene.apache.org/core/3_6_0/api/core/org/apache/lucene/search/DefaultSimilarity.html
final float weight =
(float)
(Math.log(numDocs / (double) (reader.docFreq(new Term(fieldName, text)) + 1)) + 1.0);
final int freq = dpEnum.freq();
for (int i = 0; i < freq; i++) {
int pos = dpEnum.nextPosition();
if (dpEnum.startOffset() < 0) {
return; // no offsets, null snippet
}
termList.add(new TermInfo(term, dpEnum.startOffset(), dpEnum.endOffset(), pos, weight));
}
}
// sort by position
Collections.sort(termList);
// now look for dups at the same position, linking them together
int currentPos = -1;
TermInfo previous = null;
TermInfo first = null;
Iterator iterator = termList.iterator();
while (iterator.hasNext()) {
TermInfo current = iterator.next();
if (current.position == currentPos) {
assert previous != null;
previous.setNext(current);
previous = current;
iterator.remove();
} else {
if (previous != null) {
previous.setNext(first);
}
previous = first = current;
currentPos = current.position;
}
}
if (previous != null) {
previous.setNext(first);
}
}
/**
* @return field name
*/
public String getFieldName() {
return fieldName;
}
/**
* @return the top TermInfo object of the stack
*/
public TermInfo pop() {
return termList.poll();
}
/**
* @param termInfo the TermInfo object to be put on the top of the stack
*/
public void push(TermInfo termInfo) {
termList.push(termInfo);
}
/**
* to know whether the stack is empty
*
* @return true if the stack is empty, false if not
*/
public boolean isEmpty() {
return termList == null || termList.size() == 0;
}
/**
* Single term with its position/offsets in the document and IDF weight. It is Comparable but
* considers only position.
*/
public static class TermInfo implements Comparable {
private final String text;
private final int startOffset;
private final int endOffset;
private final int position;
// IDF-weight of this term
private final float weight;
// pointer to other TermInfo's at the same position.
// this is a circular list, so with no syns, just points to itself
private TermInfo next;
TermInfo(String text, int startOffset, int endOffset, int position, float weight) {
this.text = text;
this.startOffset = startOffset;
this.endOffset = endOffset;
this.position = position;
this.weight = weight;
this.next = this;
}
void setNext(TermInfo next) {
this.next = next;
}
/** Returns the next TermInfo at this same position. This is a circular list! */
public TermInfo getNext() {
return next;
}
public String getText() {
return text;
}
public int getStartOffset() {
return startOffset;
}
public int getEndOffset() {
return endOffset;
}
public int getPosition() {
return position;
}
public float getWeight() {
return weight;
}
@Override
public String toString() {
return text + '(' + startOffset + ',' + endOffset + ',' + position + ')';
}
@Override
public int compareTo(TermInfo o) {
return (this.position - o.position);
}
@Override
public int hashCode() {
final int prime = 31;
int result = 1;
result = prime * result + position;
return result;
}
@Override
public boolean equals(Object obj) {
if (this == obj) {
return true;
}
if (obj == null) {
return false;
}
if (getClass() != obj.getClass()) {
return false;
}
TermInfo other = (TermInfo) obj;
return position == other.position;
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy