
com.jaeksoft.searchlib.snippet.SnippetVectors Maven / Gradle / Ivy
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2013 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.snippet;
import it.unimi.dsi.fastutil.Arrays;
import it.unimi.dsi.fastutil.Swapper;
import it.unimi.dsi.fastutil.ints.IntComparator;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.Iterator;
import java.util.List;
import org.apache.commons.lang.ArrayUtils;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.index.TermPositionVector;
import org.apache.lucene.index.TermVectorOffsetInfo;
import com.jaeksoft.searchlib.SearchLibException;
import com.jaeksoft.searchlib.analysis.CompiledAnalyzer;
import com.jaeksoft.searchlib.analysis.TokenTerm;
import com.jaeksoft.searchlib.function.expression.SyntaxError;
import com.jaeksoft.searchlib.index.ReaderInterface;
import com.jaeksoft.searchlib.query.ParseException;
import com.jaeksoft.searchlib.schema.FieldValueItem;
import com.jaeksoft.searchlib.util.Timer;
class SnippetVectors {
static class SnippetVector {
public final int start;
public final int end;
public final int term;
public final int position;
public boolean remove;
public boolean query;
private SnippetVector(final int term,
final TermVectorOffsetInfo termVectorOffsetInfo,
final int position) {
this.term = term;
this.start = termVectorOffsetInfo.getStartOffset();
this.end = termVectorOffsetInfo.getEndOffset();
this.position = position;
this.remove = false;
this.query = false;
}
@Override
public String toString() {
return "Term: " + term + " Start: " + start + " End: " + end
+ " Pos:" + position;
}
}
final static Iterator extractTermVectorIterator(
final int docId, final ReaderInterface reader,
final SnippetQueries snippetQueries, final String fieldName,
List values, CompiledAnalyzer analyzer,
final Timer parentTimer, final long expiration) throws IOException,
ParseException, SyntaxError, SearchLibException {
if (ArrayUtils.isEmpty(snippetQueries.terms))
return null;
Timer t = new Timer(parentTimer, "getTermPositionVector " + fieldName);
TermPositionVector termVector = getTermPositionVector(
snippetQueries.terms, reader, docId, fieldName, values,
analyzer, t);
t.end(null);
if (termVector == null)
return null;
Collection vectors = new ArrayList();
t = new Timer(parentTimer, "populate");
populate(termVector, snippetQueries.terms, vectors, t);
t.end(null);
t = new Timer(parentTimer, "removeIncludes");
vectors = removeIncludes(vectors);
t.end(null);
t = new Timer(parentTimer, "checkQueries");
snippetQueries.checkQueries(vectors, t, expiration);
t.end(null);
t = new Timer(parentTimer, "removeNonQuery");
vectors = removeNonQuery(vectors);
t.end(null);
return vectors.iterator();
}
private static final TermPositionVector getTermPositionVector(
final String[] terms, final ReaderInterface readerInterface,
final int docId, final String field, List values,
CompiledAnalyzer analyzer, Timer timer) throws IOException,
SearchLibException, ParseException, SyntaxError {
TermFreqVector termFreqVector = readerInterface.getTermFreqVector(
docId, field);
if (termFreqVector != null)
if (termFreqVector instanceof TermPositionVector)
return (TermPositionVector) termFreqVector;
if (analyzer == null)
return null;
SnippetTermPositionVector stpv = new SnippetTermPositionVector(field,
terms);
int positionOffset = 0;
int characterOffset = 0;
List tokenTerms = new ArrayList();
for (FieldValueItem fieldValueItem : values) {
if (fieldValueItem.value == null)
continue;
analyzer.populate(fieldValueItem.value, tokenTerms);
positionOffset = stpv.addCollection(tokenTerms, characterOffset,
positionOffset);
characterOffset += fieldValueItem.value.length() + 1;
tokenTerms.clear();
}
stpv.compile();
return stpv;
}
private static final void populate(final TermPositionVector termVector,
final String[] terms, final Collection vectors,
Timer parentTimer) throws SearchLibException {
Timer t = new Timer(parentTimer, "indexesOf");
int[] termsIdx = termVector.indexesOf(terms, 0, terms.length);
t.end(null);
int i = 0;
for (int termId : termsIdx) {
Timer termTimer = new Timer(parentTimer, "term " + terms[i]);
if (termId != -1) {
t = new Timer(termTimer, "getOffsets");
TermVectorOffsetInfo[] offsets = termVector.getOffsets(termId);
t.end(null);
t = new Timer(termTimer, "getTermPositions");
int[] positions = termVector.getTermPositions(termId);
t.end(null);
t = new Timer(termTimer, "SnippetVector");
int j = 0;
for (TermVectorOffsetInfo offset : offsets)
vectors.add(new SnippetVector(i, offset, positions[j++]));
t.end(null);
}
termTimer.end(null);
i++;
}
}
private static final Collection removeIncludes(
final Collection vectorCollection) {
SnippetVector[] vectors = vectorCollection
.toArray(new SnippetVector[vectorCollection.size()]);
new SnippetVectorSort(vectors);
SnippetVector last = null;
for (SnippetVector current : vectors) {
if (last != null && current.start == last.start
&& current.end >= last.end)
last.remove = true;
last = current;
}
List vectorList = new ArrayList(
vectors.length);
for (SnippetVector vector : vectors)
if (!vector.remove)
vectorList.add(vector);
return vectorList;
}
private static final Collection removeNonQuery(
final Collection vectors) {
List vectorList = new ArrayList(
vectors.size());
for (SnippetVector vector : vectors)
if (vector.query)
vectorList.add(vector);
return vectorList;
}
private static class SnippetVectorSort implements IntComparator, Swapper {
private final SnippetVector[] vectors;
private SnippetVectorSort(final SnippetVector[] vectors) {
this.vectors = vectors;
Arrays.quickSort(0, vectors.length, this, this);
}
@Override
final public int compare(final Integer k1, final Integer k2) {
return compare((int) k1, (int) k2);
}
@Override
final public void swap(final int k1, final int k2) {
SnippetVector v1 = vectors[k1];
SnippetVector v2 = vectors[k2];
vectors[k2] = v1;
vectors[k1] = v2;
}
@Override
final public int compare(final int k1, final int k2) {
int i = vectors[k1].start - vectors[k2].start;
if (i == 0)
i = vectors[k1].end - vectors[k2].end;
return i;
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy