com.jaeksoft.searchlib.snippet.SnippetQueries Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of opensearchserver Show documentation
Show all versions of opensearchserver Show documentation
OpenSearchServer is a powerful, enterprise-class, search engine program. Using the web user interface, the crawlers (web, file, database, ...) and the REST/RESTFul API you will be able to integrate quickly and easily advanced full-text search capabilities in your application. OpenSearchServer runs on Windows and Linux/Unix/BSD.
The newest version!
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2013-2014 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.snippet;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import com.jaeksoft.searchlib.snippet.SnippetVectors.SnippetVector;
import com.jaeksoft.searchlib.util.Timer;
class SnippetQueries {
private final String field;
private final Map termMap;
private final List termList;
private final Set termQuerySet;
private final Set termPhraseSet;
private final List termSequenceList;
final String[] terms;
SnippetQueries(final Query query, final String field) {
this.field = field;
termQuerySet = new TreeSet();
termPhraseSet = new TreeSet();
termMap = new TreeMap();
termList = new ArrayList();
termSequenceList = new ArrayList(2);
parse(query);
terms = termList.toArray(new String[termList.size()]);
}
private final int checkTerm(final String term) {
Integer pos = termMap.get(term);
if (pos != null)
return pos;
pos = termList.size();
termMap.put(term, pos);
termList.add(term);
return pos;
}
private final void parse(final TermQuery query) {
Term term = query.getTerm();
if (!field.equals(term.field()))
return;
int pos = checkTerm(term.text());
termQuerySet.add(pos);
}
private final static class TermSequence {
private final int slop;
private final int[] terms;
private TermSequence(final List termPosSequence, final int slop) {
int i = 0;
terms = new int[termPosSequence.size()];
for (Integer termPos : termPosSequence)
terms[i++] = termPos;
this.slop = slop;
}
}
private final void parse(final PhraseQuery query) {
Term[] terms = query.getTerms();
if (terms == null)
return;
List termPosSequence = new ArrayList(terms.length);
for (Term term : terms) {
if (!field.equals(term.field()))
continue;
int pos = checkTerm(term.text());
termPosSequence.add(pos);
termPhraseSet.add(pos);
}
// Term sequences with one term are not phrase queries
if (termPosSequence.size() <= 1)
return;
termSequenceList
.add(new TermSequence(termPosSequence, query.getSlop()));
}
private final void parse(final BooleanQuery query) {
BooleanClause[] clauses = query.getClauses();
if (clauses == null)
return;
for (BooleanClause clause : clauses) {
switch (clause.getOccur()) {
case MUST:
case SHOULD:
parse(clause.getQuery());
break;
default:
break;
}
}
}
private final void parse(final Query query) {
if (query == null)
return;
if (query instanceof BooleanQuery)
parse((BooleanQuery) query);
else if (query instanceof TermQuery)
parse((TermQuery) query);
else if (query instanceof PhraseQuery)
parse((PhraseQuery) query);
}
private final void checkTermQueries(
final Collection vectors, final long expiration) {
if (termQuerySet.isEmpty())
return;
for (SnippetVector vector : vectors) {
if (!vector.query) {
if (termQuerySet.contains(vector.term))
vector.query = true;
if (expiration != 0)
if (System.currentTimeMillis() > expiration)
return;
}
}
}
private static class SequenceCollector {
private enum Result {
WRONG, CONTINUE, FULL
};
private final TermSequence termSequence;
private final SnippetVector[] vectors;
private int foundPos;
private int nextPosition;
private int nextTerm;
private SequenceCollector(final TermSequence termSequence,
final SnippetVector vector) {
this.termSequence = termSequence;
vectors = new SnippetVector[termSequence.terms.length];
foundPos = 0;
addVector(vector);
}
private final Result addVector(final SnippetVector vector) {
vectors[foundPos++] = vector;
nextPosition = vector.position + termSequence.slop + 1;
if (foundPos == vectors.length)
return Result.FULL;
nextTerm = termSequence.terms[foundPos];
return Result.CONTINUE;
}
private final Result collect(final SnippetVector vector) {
if (vector.position > nextPosition)
return Result.WRONG;
if (vector.term != nextTerm)
return Result.CONTINUE;
if (addVector(vector) != Result.FULL)
return Result.CONTINUE;
for (SnippetVector v : vectors)
v.query = true;
return Result.FULL;
}
}
private final void checkPhraseQueries(
final Collection vectors, final long expiration) {
if (termSequenceList.isEmpty())
return;
Set collectors = new HashSet();
List toRemove = new ArrayList();
for (SnippetVector vector : vectors) {
if (!(termPhraseSet.contains(vector.term)))
continue;
for (TermSequence termSequence : termSequenceList) {
if (termSequence.terms[0] == vector.term)
collectors.add(new SequenceCollector(termSequence, vector));
}
for (SequenceCollector collector : collectors) {
switch (collector.collect(vector)) {
case CONTINUE:
break;
case WRONG:
case FULL:
toRemove.add(collector);
break;
}
}
if (!toRemove.isEmpty()) {
collectors.removeAll(toRemove);
toRemove.clear();
}
if (expiration != 0)
if (System.currentTimeMillis() > expiration)
return;
}
}
final void checkQueries(final Collection vectors,
final Timer parentTimer, final long expiration) {
if (vectors == null)
return;
Timer t = new Timer(parentTimer, "checkTermQueries");
checkTermQueries(vectors, expiration);
t.end(null);
t = new Timer(parentTimer, "checkPhraseQueries");
checkPhraseQueries(vectors, expiration);
t.end(null);
}
}