
com.jaeksoft.searchlib.query.QueryParser Maven / Gradle / Ivy
/**
* License Agreement for OpenSearchServer
*
* Copyright (C) 2013-2015 Emmanuel Keller / Jaeksoft
*
* http://www.open-search-server.com
*
* This file is part of OpenSearchServer.
*
* OpenSearchServer is free software: you can redistribute it and/or
* modify it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* OpenSearchServer is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with OpenSearchServer.
* If not, see .
**/
package com.jaeksoft.searchlib.query;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Set;
import java.util.TreeSet;
import org.antlr.v4.runtime.ANTLRInputStream;
import org.antlr.v4.runtime.BailErrorStrategy;
import org.antlr.v4.runtime.BaseErrorListener;
import org.antlr.v4.runtime.CommonTokenStream;
import org.antlr.v4.runtime.RecognitionException;
import org.antlr.v4.runtime.Recognizer;
import org.antlr.v4.runtime.tree.TerminalNode;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import com.jaeksoft.searchlib.analysis.CompiledAnalyzer;
import com.jaeksoft.searchlib.query.parser.BooleanQueryBaseListener;
import com.jaeksoft.searchlib.query.parser.BooleanQueryLexer;
import com.jaeksoft.searchlib.query.parser.BooleanQueryParser;
import com.jaeksoft.searchlib.util.StringUtils;
public class QueryParser extends BooleanQueryBaseListener {
private final Set fields;
private final String defaultField;
private final int defaultOperator;
private final CompiledAnalyzer analyzer;
private final int phraseSlop;
private final Double termBoost;
private final Double phraseBoost;
private String currentField;
private int currentOperator;
private Query holdQuery;
private BooleanQuery booleanQuery;
private IOException ioError;
public QueryParser(final String defaultField, final Set fields,
final Occur occur, final CompiledAnalyzer analyzer,
final int phraseSlop, final Double termBoost,
final Double phraseBoost) {
this.defaultField = defaultField;
this.fields = fields;
this.defaultOperator = getOperator(occur);
this.analyzer = analyzer;
this.phraseSlop = phraseSlop;
this.termBoost = termBoost;
this.phraseBoost = phraseBoost;
}
final private static int getOperator(final Occur occur) {
if (occur == null)
return BooleanQueryLexer.AND;
switch (occur) {
default:
case MUST:
return BooleanQueryLexer.AND;
case MUST_NOT:
return BooleanQueryLexer.NOT;
case SHOULD:
return BooleanQueryLexer.OR;
}
}
final private void addBooleanClause(final Query query, final int operator) {
Occur occur = null;
switch (operator) {
case BooleanQueryLexer.AND:
occur = Occur.MUST;
break;
case BooleanQueryLexer.OR:
occur = Occur.SHOULD;
break;
case BooleanQueryLexer.NOT:
occur = Occur.MUST_NOT;
break;
}
booleanQuery.add(new BooleanClause(query, occur));
}
final private void addBooleanClause(final Query query) {
if (currentOperator == -1) {
holdQuery = query;
} else {
if (holdQuery != null) {
addBooleanClause(
holdQuery,
currentOperator == BooleanQueryLexer.NOT ? defaultOperator
: currentOperator);
holdQuery = null;
}
addBooleanClause(query, currentOperator);
}
currentOperator = defaultOperator;
}
final private List getWords(final String text) throws IOException {
List words = new ArrayList(1);
if (analyzer != null)
analyzer.extractTerms(text, words);
else
words.add(text);
return words;
}
final private void addTermQuery(final String text) throws IOException {
for (String word : getWords(text)) {
Term term = new Term(currentField, word);
TermQuery termQuery = new TermQuery(term);
if (termBoost != null)
termQuery.setBoost(termBoost.floatValue());
addBooleanClause(termQuery);
}
currentField = defaultField;
}
final private void addPhraseQuery(String text) throws IOException {
int s = 0;
if (text.startsWith("\""))
s = 1;
int l = text.length() + 1 - s;
if (text.endsWith("\""))
l--;
if (l > 0)
text = text.substring(s, l);
PhraseQuery phraseQuery = new PhraseQuery();
phraseQuery.setSlop(phraseSlop);
if (phraseBoost != null)
phraseQuery.setBoost(phraseBoost.floatValue());
for (String word : getWords(text))
phraseQuery.add(new Term(currentField, word));
addBooleanClause(phraseQuery);
currentField = defaultField;
}
final private void setCurrentField(final String text) throws IOException {
String field = text.endsWith(":") ? text
.substring(0, text.length() - 1) : text;
if (fields != null && fields.contains(field))
currentField = field;
else
addTermQuery(text);
}
@Override
final public void visitTerminal(final TerminalNode node) {
try {
int type = node.getSymbol().getType();
switch (type) {
case BooleanQueryLexer.AND:
case BooleanQueryLexer.OR:
case BooleanQueryLexer.NOT:
currentOperator = type;
break;
case BooleanQueryLexer.QSTRING:
addPhraseQuery(node.getText());
break;
case BooleanQueryLexer.STRING:
addTermQuery(node.getText());
break;
case BooleanQueryLexer.FIELD:
setCurrentField(node.getText());
break;
case BooleanQueryLexer.WS:
currentField = defaultField;
break;
default:
break;
}
} catch (IOException e) {
ioError = e;
}
}
private final class ErrorListener extends BaseErrorListener {
@Override
public void syntaxError(Recognizer, ?> recognizer,
Object offendingSymbol, int line, int charPositionInLine,
String msg, RecognitionException e) {
ioError = new IOException(StringUtils.fastConcat("line: ",
Integer.toString(line), " - pos: ",
Integer.toString(charPositionInLine), " - ", msg));
}
}
public final Query parse(String query) throws IOException {
try {
currentOperator = -1;
currentField = defaultField;
holdQuery = null;
booleanQuery = new BooleanQuery();
ioError = null;
ANTLRInputStream input = new ANTLRInputStream(query);
BooleanQueryLexer lexer = new BooleanQueryLexer(input);
ErrorListener errorListener = new ErrorListener();
lexer.removeErrorListeners();
lexer.addErrorListener(errorListener);
CommonTokenStream tokens = new CommonTokenStream(lexer);
BooleanQueryParser parser = new BooleanQueryParser(tokens);
BailErrorStrategy errorHandler = new BailErrorStrategy();
parser.setErrorHandler(errorHandler);
parser.addParseListener(this);
parser.removeErrorListeners();
parser.addErrorListener(errorListener);
parser.expression();
if (ioError != null)
throw ioError;
if (holdQuery != null)
addBooleanClause(holdQuery, currentOperator);
return booleanQuery;
} catch (org.antlr.v4.runtime.RecognitionException e) {
if (ioError != null)
throw ioError;
throw new IOException(e);
} catch (org.antlr.v4.runtime.misc.ParseCancellationException e) {
if (ioError != null)
throw ioError;
throw new IOException(e);
}
}
public final static void main(String[] arvs) throws IOException {
TreeSet fields = new TreeSet();
fields.add("field2");
fields.add("field3");
QueryParser queryParser = new QueryParser("field", fields, Occur.MUST,
null, 1, null, null);
System.out.println(queryParser.parse("word"));
System.out
.println(queryParser
.parse("word1 field2:word2 fauxfield\\:fauxword field:field3:\"quoted words\""));
System.out.println(queryParser.parse("field:\"quoted words\""));
System.out.println(queryParser.parse("\"quoted words\""));
System.out.println(queryParser.parse("\"quoted words\" word"));
System.out.println(queryParser.parse("word OR \"quoted words\""));
System.out.println(queryParser.parse("word OR \"quoted words"));
System.out.println(queryParser
.parse("word1 word2 AND \"quoted words\""));
System.out.println(queryParser
.parse("word1 OU word2 \"quoted words\" NON unwanted"));
System.out.println(queryParser.parse("\"\""));
System.out.println(queryParser.parse("\"non ending quote"));
System.out.println(queryParser.parse("22\""));
System.out.println(queryParser.parse("\""));
System.out.println(queryParser.parse("OU OU"));
System.out.println(queryParser.parse("é \"é\""));
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy