All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.query.fulltext.FullTextParser Maven / Gradle / Ivy

There is a newer version: 1.62.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.query.fulltext;

import java.text.ParseException;
import java.util.ArrayList;

import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;


/**
 * A parser for fulltext condition literals. The grammar is defined in the
 * 
 * JCR 2.0 specification, 6.7.19 FullTextSearch,
 * as follows (a bit simplified):
 * 
 * FullTextSearchLiteral ::= Disjunct {' OR ' Disjunct}
 * Disjunct ::= Term {' ' Term}
 * Term ::= ['-'] SimpleTerm
 * SimpleTerm ::= Word | '"' Word {' ' Word} '"'
 * 
*/ public class FullTextParser { private String propertyName; private String text; private int parseIndex; public static FullTextExpression parse(String propertyName, String text) throws ParseException { FullTextParser p = new FullTextParser(); p.propertyName = propertyName; p.text = text; FullTextExpression e = p.parseOr(); return e; } FullTextExpression parseOr() throws ParseException { ArrayList list = new ArrayList(); list.add(parseAnd()); while (parseIndex < text.length()) { if (text.substring(parseIndex).startsWith("OR ")) { parseIndex += 3; list.add(parseAnd()); } else { break; } } FullTextOr or = new FullTextOr(list); return or.simplify(); } FullTextExpression parseAnd() throws ParseException { ArrayList list = new ArrayList(); list.add(parseTerm()); while (parseIndex < text.length()) { if (text.substring(parseIndex).startsWith("OR ")) { break; } list.add(parseTerm()); } FullTextAnd and = new FullTextAnd(list); return and.simplify(); } FullTextExpression parseTerm() throws ParseException { if (parseIndex >= text.length()) { throw getSyntaxError("term"); } boolean not = false; StringBuilder buff = new StringBuilder(); char c = text.charAt(parseIndex); if (c == '-' && parseIndex < text.length() - 1 && text.charAt(parseIndex + 1) != ' ') { c = text.charAt(++parseIndex); not = true; } boolean escaped = false; String boost = null; if (c == '\"') { parseIndex++; while (true) { if (parseIndex >= text.length()) { throw getSyntaxError("double quote"); } c = text.charAt(parseIndex++); if (c == '\\') { escaped = true; if (parseIndex >= text.length()) { throw getSyntaxError("escaped char"); } c = text.charAt(parseIndex++); buff.append(c); } else if (c == '\"') { if (parseIndex < text.length()) { if (text.charAt(parseIndex) == '^') { boost = ""; } else if (text.charAt(parseIndex) != ' ') { throw getSyntaxError("space"); } } parseIndex++; break; } else { buff.append(c); } } } else if (c == '\'' && FullTextSearchImpl.JACKRABBIT_2_SINGLE_QUOTED_PHRASE) { // basically the same as double quote parseIndex++; while (true) { if (parseIndex >= text.length()) { throw getSyntaxError("single quote"); } c = text.charAt(parseIndex++); if (c == '\\') { escaped = true; if (parseIndex >= text.length()) { throw getSyntaxError("escaped char"); } c = text.charAt(parseIndex++); buff.append(c); } else if (c == '\'') { if (parseIndex < text.length()) { if (text.charAt(parseIndex) == '^') { boost = ""; } else if (text.charAt(parseIndex) != ' ') { throw getSyntaxError("space"); } } parseIndex++; break; } else { buff.append(c); } } } else { do { c = text.charAt(parseIndex++); if (c == '\\') { escaped = true; if (parseIndex >= text.length()) { throw getSyntaxError("escaped char"); } c = text.charAt(parseIndex++); buff.append(c); } else if (c == '^') { boost = ""; break; } else if (c <= ' ') { while (parseIndex < text.length()) { c = text.charAt(parseIndex); if (c > ' ') { break; } parseIndex++; } break; } else { buff.append(c); } } while (parseIndex < text.length()); } if (boost != null) { StringBuilder b = new StringBuilder(); while (parseIndex < text.length()) { c = text.charAt(parseIndex++); if ((c < '0' || c > '9') && c != '.') { break; } b.append(c); } boost = b.toString(); } if (buff.length() == 0) { throw getSyntaxError("term"); } String text = buff.toString(); FullTextTerm term = new FullTextTerm(propertyName, text, not, escaped, boost); return term.simplify(); } private ParseException getSyntaxError(String expected) { int index = Math.max(0, Math.min(parseIndex, text.length() - 1)); String query = text.substring(0, index) + "(*)" + text.substring(index).trim(); if (expected != null) { query += "; expected: " + expected; } return new ParseException("FullText expression: " + query, index); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy