org.apache.jackrabbit.oak.query.fulltext.FullTextParser Maven / Gradle / Ivy
/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package org.apache.jackrabbit.oak.query.fulltext;
import java.text.ParseException;
import java.util.ArrayList;
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
/**
* A parser for fulltext condition literals. The grammar is defined in the
*
* JCR 2.0 specification, 6.7.19 FullTextSearch,
* as follows (a bit simplified):
*
* FullTextSearchLiteral ::= Disjunct {' OR ' Disjunct}
* Disjunct ::= Term {' ' Term}
* Term ::= ['-'] SimpleTerm
* SimpleTerm ::= Word | '"' Word {' ' Word} '"'
*
*/
public class FullTextParser {
private String propertyName;
private String text;
private int parseIndex;
public static FullTextExpression parse(String propertyName, String text) throws ParseException {
FullTextParser p = new FullTextParser();
p.propertyName = propertyName;
p.text = text;
FullTextExpression e = p.parseOr();
return e;
}
FullTextExpression parseOr() throws ParseException {
ArrayList list = new ArrayList();
list.add(parseAnd());
while (parseIndex < text.length()) {
if (text.substring(parseIndex).startsWith("OR ")) {
parseIndex += 3;
list.add(parseAnd());
} else {
break;
}
}
FullTextOr or = new FullTextOr(list);
return or.simplify();
}
FullTextExpression parseAnd() throws ParseException {
ArrayList list = new ArrayList();
list.add(parseTerm());
while (parseIndex < text.length()) {
if (text.substring(parseIndex).startsWith("OR ")) {
break;
}
list.add(parseTerm());
}
FullTextAnd and = new FullTextAnd(list);
return and.simplify();
}
FullTextExpression parseTerm() throws ParseException {
if (parseIndex >= text.length()) {
throw getSyntaxError("term");
}
boolean not = false;
StringBuilder buff = new StringBuilder();
char c = text.charAt(parseIndex);
if (c == '-' && parseIndex < text.length() - 1 &&
text.charAt(parseIndex + 1) != ' ') {
c = text.charAt(++parseIndex);
not = true;
}
boolean escaped = false;
String boost = null;
if (c == '\"') {
parseIndex++;
while (true) {
if (parseIndex >= text.length()) {
throw getSyntaxError("double quote");
}
c = text.charAt(parseIndex++);
if (c == '\\') {
escaped = true;
if (parseIndex >= text.length()) {
throw getSyntaxError("escaped char");
}
c = text.charAt(parseIndex++);
buff.append(c);
} else if (c == '\"') {
if (parseIndex < text.length()) {
if (text.charAt(parseIndex) == '^') {
boost = "";
} else if (text.charAt(parseIndex) != ' ') {
throw getSyntaxError("space");
}
}
parseIndex++;
break;
} else {
buff.append(c);
}
}
} else if (c == '\'' && FullTextSearchImpl.JACKRABBIT_2_SINGLE_QUOTED_PHRASE) {
// basically the same as double quote
parseIndex++;
while (true) {
if (parseIndex >= text.length()) {
throw getSyntaxError("single quote");
}
c = text.charAt(parseIndex++);
if (c == '\\') {
escaped = true;
if (parseIndex >= text.length()) {
throw getSyntaxError("escaped char");
}
c = text.charAt(parseIndex++);
buff.append(c);
} else if (c == '\'') {
if (parseIndex < text.length()) {
if (text.charAt(parseIndex) == '^') {
boost = "";
} else if (text.charAt(parseIndex) != ' ') {
throw getSyntaxError("space");
}
}
parseIndex++;
break;
} else {
buff.append(c);
}
}
} else {
do {
c = text.charAt(parseIndex++);
if (c == '\\') {
escaped = true;
if (parseIndex >= text.length()) {
throw getSyntaxError("escaped char");
}
c = text.charAt(parseIndex++);
buff.append(c);
} else if (c == '^') {
boost = "";
break;
} else if (c <= ' ') {
while (parseIndex < text.length()) {
c = text.charAt(parseIndex);
if (c > ' ') {
break;
}
parseIndex++;
}
break;
} else {
buff.append(c);
}
} while (parseIndex < text.length());
}
if (boost != null) {
StringBuilder b = new StringBuilder();
while (parseIndex < text.length()) {
c = text.charAt(parseIndex++);
if ((c < '0' || c > '9') && c != '.') {
break;
}
b.append(c);
}
boost = b.toString();
}
if (buff.length() == 0) {
throw getSyntaxError("term");
}
String text = buff.toString();
FullTextTerm term = new FullTextTerm(propertyName, text, not, escaped, boost);
return term.simplify();
}
private ParseException getSyntaxError(String expected) {
int index = Math.max(0, Math.min(parseIndex, text.length() - 1));
String query = text.substring(0, index) + "(*)" + text.substring(index).trim();
if (expected != null) {
query += "; expected: " + expected;
}
return new ParseException("FullText expression: " + query, index);
}
}