All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.query.SimpleExcerptProvider Maven / Gradle / Ivy

There is a newer version: 1.62.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.query;

import java.util.BitSet;
import java.util.HashSet;
import java.util.Set;

import com.google.common.base.Splitter;
import com.google.common.collect.ImmutableSet;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.PropertyValue;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.query.ast.AndImpl;
import org.apache.jackrabbit.oak.query.ast.ConstraintImpl;
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
import org.apache.jackrabbit.oak.query.ast.OrImpl;
import org.apache.jackrabbit.oak.plugins.memory.PropertyValues;

import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;

/**
 * This class can extract excerpts from node.
 */
class SimpleExcerptProvider {

    static final String REP_EXCERPT_FN = "rep:excerpt(.)";
    static final String EXCERPT_END = "
"; static final String EXCERPT_BEGIN = "
"; private static int maxFragmentSize = 150; private SimpleExcerptProvider() { } static String getExcerpt(String path, String columnName, Query query, boolean highlight) { if (path == null) { return null; } Tree t = query.getTree(path); if (t == null || !t.exists()) { return null; } columnName = extractExcerptProperty(columnName); if (columnName != null && columnName.contains("/")) { for (String p : PathUtils.elements(PathUtils .getParentPath(columnName))) { if (t.hasChild(p)) { t = t.getChild(p); } else { return null; } } columnName = PathUtils.getName(columnName); } StringBuilder text = new StringBuilder(); String separator = ""; for (PropertyState p : t.getProperties()) { if (p.getType().tag() == Type.STRING.tag() && (columnName == null || columnName.equalsIgnoreCase(p .getName()))) { text.append(separator); separator = " "; for (String v : p.getValue(Type.STRINGS)) { text.append(v); } } } Set searchToken = extractFulltext(query); if (highlight && searchToken != null) { return highlight(text, searchToken); } return noHighlight(text); } private static String extractExcerptProperty(String column) { // most frequent case first if (REP_EXCERPT_FN.equalsIgnoreCase(column)) { return null; } return column.substring(column.indexOf("(") + 1, column.indexOf(")")); } private static Set extractFulltext(Query q) { // TODO instanceof should not be used if (q instanceof QueryImpl) { return extractFulltext(((QueryImpl) q).getConstraint()); } return ImmutableSet.of(); } private static Set extractFulltext(ConstraintImpl c) { Set tokens = new HashSet(); // TODO instanceof should not be used, // as it will break without us noticing if we extend the AST if (c instanceof FullTextSearchImpl) { FullTextSearchImpl f = (FullTextSearchImpl) c; if (f.getFullTextSearchExpression() instanceof LiteralImpl) { LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression(); tokens.add(l.getLiteralValue().getValue(Type.STRING)); } } if (c instanceof AndImpl) { for (ConstraintImpl constraint : ((AndImpl) c).getConstraints()) { tokens.addAll(extractFulltext(constraint)); } } if (c instanceof OrImpl) { for (ConstraintImpl constraint : ((OrImpl) c).getConstraints()) { tokens.addAll(extractFulltext(constraint)); } } return tokens; } private static Set tokenize(Set in) { Set tokens = new HashSet(); for (String s : in) { tokens.addAll(tokenize(s)); } return tokens; } private static Set tokenize(String in) { Set out = new HashSet(); StringBuilder token = new StringBuilder(); boolean quote = false; for (int i = 0; i < in.length(); ) { final int c = in.codePointAt(i); int length = Character.charCount(c); switch (c) { case ' ': if (quote) { token.append(' '); } else if (token.length() > 0) { out.add(token.toString()); token = new StringBuilder(); } break; case '"': case '\'': if (quote) { quote = false; if (token.length() > 0) { out.add(token.toString()); token = new StringBuilder(); } } else { quote = true; } break; default: token.append(new String(Character.toChars(c))); } i += length; } if (token.length() > 0) { out.add(token.toString()); } return out; } private static String noHighlight(StringBuilder text) { if (text.length() > maxFragmentSize) { int lastSpace = text.lastIndexOf(" ", maxFragmentSize); if (lastSpace != -1) { text.setLength(lastSpace); } else { text.setLength(maxFragmentSize); } text.append(" ..."); } StringBuilder excerpt = new StringBuilder("
"); excerpt.append(encodeIllegalXMLCharacters(text.toString())); excerpt.append("
"); return excerpt.toString(); } static String highlight(StringBuilder text, Set searchToken) { Set tokens = tokenize(searchToken); String escaped = encodeIllegalXMLCharacters(text.toString()); BitSet highlight = new BitSet(); for (String token : tokens) { highlight(escaped, highlight, token); } StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); boolean strong = false; for (int i = 0; i < escaped.length(); i++) { if (highlight.get(i) && !strong) { strong = true; excerpt.append(""); } else if (!highlight.get(i) && strong) { strong = false; excerpt.append(""); } excerpt.append(escaped.charAt(i)); } if (strong) { excerpt.append(""); } excerpt.append(EXCERPT_END); return excerpt.toString(); } private static void highlight(String text, BitSet highlightBits, String token) { boolean isLike = false; if (token.endsWith("*")) { if (token.length() == 1) { // don't highlight the '*' character itself return; } token = token.substring(0, token.length() - 1); isLike = true; } int index = 0; while (index < text.length()) { index = text.indexOf(token, index); if (index < 0) { break; } int endIndex = index + token.length(); if (isLike) { int nextSpace = text.indexOf(" ", endIndex); if (nextSpace != -1) { endIndex = nextSpace; } else { endIndex = text.length(); } } while (index < endIndex) { highlightBits.set(index++); } } } static PropertyValue getExcerpt(PropertyValue value) { Splitter listSplitter = Splitter.on(',').trimResults().omitEmptyStrings(); StringBuilder excerpt = new StringBuilder(EXCERPT_BEGIN); for (String v : listSplitter.splitToList(value.toString())) { excerpt.append(v); } excerpt.append(EXCERPT_END); return PropertyValues.newString(excerpt.toString()); } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy