All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.query.fulltext.SimpleExcerptProvider Maven / Gradle / Ivy

There is a newer version: 1.64.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.query.fulltext;

import static org.apache.jackrabbit.util.Text.encodeIllegalXMLCharacters;

import java.util.HashSet;
import java.util.Set;

import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Tree;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.query.Query;
import org.apache.jackrabbit.oak.query.QueryImpl;
import org.apache.jackrabbit.oak.query.ast.AndImpl;
import org.apache.jackrabbit.oak.query.ast.ConstraintImpl;
import org.apache.jackrabbit.oak.query.ast.FullTextSearchImpl;
import org.apache.jackrabbit.oak.query.ast.LiteralImpl;
import org.apache.jackrabbit.oak.query.ast.OrImpl;

import com.google.common.collect.ImmutableSet;

public class SimpleExcerptProvider {

    private static final String REP_EXCERPT_FN = "rep:excerpt(.)";

    private static int maxFragmentSize = 150;

    public static String getExcerpt(String path, String columnName,
            Query query, boolean highlight) {
        if (path == null) {
            return null;
        }
        Tree t = query.getTree(path);
        if (t == null || !t.exists()) {
            return null;
        }
        columnName = extractExcerptProperty(columnName);
        if (columnName != null && columnName.contains("/")) {
            for (String p : PathUtils.elements(PathUtils
                    .getParentPath(columnName))) {
                if (t.hasChild(p)) {
                    t = t.getChild(p);
                } else {
                    return null;
                }
            }
            columnName = PathUtils.getName(columnName);
        }

        StringBuilder text = new StringBuilder();
        String separator = "";
        for (PropertyState p : t.getProperties()) {
            if (p.getType().tag() == Type.STRING.tag()
                    && (columnName == null || columnName.equalsIgnoreCase(p
                            .getName()))) {
                text.append(separator);
                separator = " ";
                for (String v : p.getValue(Type.STRINGS)) {
                    text.append(v);
                }
            }
        }
        Set searchToken = extractFulltext(query);
        if (highlight && searchToken != null) {
            String h = highlight(text, searchToken);
            return h;
        }
        return noHighlight(text);
    }

    private static String extractExcerptProperty(String column) {
        // most frequent case first
        if (REP_EXCERPT_FN.equalsIgnoreCase(column)) {
            return null;
        }
        return column.substring(column.indexOf("(") + 1, column.indexOf(")"));
    }

    private static Set extractFulltext(Query q) {
        // TODO instanceof should not be used
        if (q instanceof QueryImpl) {
            return extractFulltext(((QueryImpl) q).getConstraint());
        }
        return ImmutableSet.of();
    }

    private static Set extractFulltext(ConstraintImpl c) {
        Set tokens = new HashSet();
        // TODO instanceof should not be used,
        // as it will break without us noticing if we extend the AST
        if (c instanceof FullTextSearchImpl) {
            FullTextSearchImpl f = (FullTextSearchImpl) c;
            if (f.getFullTextSearchExpression() instanceof LiteralImpl) {
                LiteralImpl l = (LiteralImpl) f.getFullTextSearchExpression();
                tokens.add(l.getLiteralValue().getValue(Type.STRING));
            }
        }
        if (c instanceof AndImpl) {
            AndImpl a = (AndImpl) c;
            tokens.addAll(extractFulltext(a.getConstraint1()));
            tokens.addAll(extractFulltext(a.getConstraint2()));
        }
        if (c instanceof OrImpl) {
            OrImpl o = (OrImpl) c;
            tokens.addAll(extractFulltext(o.getConstraint1()));
            tokens.addAll(extractFulltext(o.getConstraint2()));
        }
        return tokens;
    }

    private static Set tokenize(Set in) {
        Set tokens = new HashSet();
        for (String s : in) {
            tokens.addAll(tokenize(s));
        }
        return tokens;
    }

    private static Set tokenize(String in) {
        Set out = new HashSet();
        StringBuilder token = new StringBuilder();
        boolean quote = false;
        for (int i = 0; i < in.length();) {
            final int c = in.codePointAt(i);
            int length = Character.charCount(c);
            switch (c) {
            case ' ':
                if (quote) {
                    token.append(' ');
                } else if (token.length() > 0) {
                    out.add(token.toString());
                    token = new StringBuilder();
                }
                break;
            case '"':
            case '\'':
                if (quote) {
                    quote = false;
                    if (token.length() > 0) {
                        out.add(token.toString());
                        token = new StringBuilder();
                    }
                } else {
                    quote = true;
                }
                break;
            default:
                token.append(new String(Character.toChars(c)));
            }
            i += length;
        }
        if (token.length() > 0) {
            out.add(token.toString());
        }
        return out;
    }

    private static String noHighlight(StringBuilder text) {
        if (text.length() > maxFragmentSize) {
            int lastSpace = text.lastIndexOf(" ", maxFragmentSize);
            if (lastSpace != -1) {
                text.setLength(lastSpace);
            } else {
                text.setLength(maxFragmentSize);
            }
            text.append(" ...");
        }
        StringBuilder excerpt = new StringBuilder("
"); excerpt.append(encodeIllegalXMLCharacters(text.toString())); excerpt.append("
"); return excerpt.toString(); } private static String highlight(StringBuilder text, Set searchToken) { Set tokens = tokenize(searchToken); text = new StringBuilder(encodeIllegalXMLCharacters(text.toString())); for (String token : tokens) { text = replaceAll(text, token, "", ""); } StringBuilder excerpt = new StringBuilder("
"); excerpt.append(text.toString()); excerpt.append("
"); return excerpt.toString(); } private static StringBuilder replaceAll(StringBuilder in, String token, String start, String end) { boolean isLike = false; if (token.endsWith("*")) { token = token.substring(0, token.length() - 1); isLike = true; } int index = in.indexOf(token); while (index != -1) { int endIndex = index + token.length(); if (isLike) { int nextSpace = in.indexOf(" ", endIndex); if (nextSpace != -1) { endIndex = nextSpace; } else { endIndex = in.length(); } } String current = in.substring(index, endIndex); StringBuilder newToken = new StringBuilder(start); newToken.append(current); newToken.append(end); String newTokenS = newToken.toString(); in.replace(index, index + current.length(), newTokenS); index = in.indexOf(token, in.lastIndexOf(newTokenS) + newTokenS.length()); } return in; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy