All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.plugins.index.lucene.LuceneDocumentMaker Maven / Gradle / Ivy

/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.plugins.index.lucene;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.google.common.collect.Iterables;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.plugins.index.lucene.IndexDefinition.IndexingRule;
import org.apache.jackrabbit.oak.plugins.index.lucene.binary.BinaryTextExtractor;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.FacetsConfigProvider;
import org.apache.jackrabbit.oak.plugins.index.lucene.util.FunctionIndexProcessor;
import org.apache.jackrabbit.oak.plugins.memory.StringPropertyState;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DoubleDocValuesField;
import org.apache.lucene.document.DoubleField;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.LongField;
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedDocValuesField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.facet.FacetsConfig;
import org.apache.lucene.facet.sortedset.SortedSetDocValuesFacetField;
import org.apache.lucene.util.BytesRef;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.google.common.base.Preconditions.checkNotNull;
import static org.apache.jackrabbit.oak.commons.PathUtils.getName;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newAncestorsField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newDepthField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newFulltextField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPathField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.FieldFactory.newPropertyField;
import static org.apache.jackrabbit.oak.plugins.index.lucene.util.ConfigUtil.getPrimaryTypeName;

public class LuceneDocumentMaker {
    private static final Logger log = LoggerFactory.getLogger(LuceneDocumentMaker.class);
    private final BinaryTextExtractor textExtractor;
    private final FacetsConfigProvider facetsConfigProvider;
    private final IndexDefinition definition;
    private final IndexingRule indexingRule;
    private final IndexAugmentorFactory augmentorFactory;
    private final String path;

    public LuceneDocumentMaker(@Nonnull IndexDefinition definition,
                               @Nonnull IndexingRule indexingRule,
                               @Nonnull String path) {
        this(null, null, null, definition, indexingRule, path);
    }

    public LuceneDocumentMaker(@Nullable BinaryTextExtractor textExtractor,
                               @Nullable FacetsConfigProvider facetsConfigProvider,
                               @Nullable IndexAugmentorFactory augmentorFactory,
                               @Nonnull  IndexDefinition definition,
                               @Nonnull IndexingRule indexingRule,
                               @Nonnull String path) {
        this.textExtractor = textExtractor;
        this.facetsConfigProvider = facetsConfigProvider;
        this.definition = checkNotNull(definition);
        this.indexingRule = checkNotNull(indexingRule);
        this.augmentorFactory = augmentorFactory;
        this.path = checkNotNull(path);
    }

    @CheckForNull
    public Document makeDocument(NodeState state) throws IOException {
        return makeDocument(state, false, Collections.emptyList());
    }

    @CheckForNull
    public Document makeDocument(NodeState state, boolean isUpdate, List propertiesModified) throws IOException {
        boolean facet = false;

        List fields = new ArrayList();
        boolean dirty = false;

        //We 'intentionally' are indexing node names only on root state as we don't support indexing relative or
        //regex for node name indexing
        PropertyState nodenamePS =
                new StringPropertyState(FieldNames.NODE_NAME, getName(path));
        for (PropertyState property : Iterables.concat(state.getProperties(), Collections.singleton(nodenamePS))) {
            String pname = property.getName();

            if (!isVisible(pname) && !FieldNames.NODE_NAME.equals(pname)) {
                continue;
            }

            PropertyDefinition pd = indexingRule.getConfig(pname);

            if (pd == null || !pd.index){
                continue;
            }

            if (pd.ordered) {
                dirty |= addTypedOrderedFields(fields, property, pname, pd);
            }

            dirty |= indexProperty(path, fields, state, property, pname, pd);

            facet |= pd.facet;
        }

        boolean[] dirties = indexAggregates(path, fields, state);
        dirty |= dirties[0]; // any (aggregate) indexing happened
        facet |= dirties[1]; // facet indexing during (index-time) aggregation
        dirty |= indexNullCheckEnabledProps(path, fields, state);
        dirty |= indexFunctionRestrictions(path, fields, state);
        dirty |= indexNotNullCheckEnabledProps(path, fields, state);

        dirty |= augmentCustomFields(path, fields, state);

        // Check if a node having a single property was modified/deleted
        if (!dirty) {
            dirty = indexIfSinglePropertyRemoved(propertiesModified);
        }

        if (isUpdate && !dirty) {
            // updated the state but had no relevant changes
            return null;
        }

        String name = getName(path);
        if (indexingRule.isNodeNameIndexed()){
            addNodeNameField(fields, name);
            dirty = true;
        }

        //For property index no use making an empty document if
        //none of the properties are indexed
        if(!indexingRule.indexesAllNodesOfMatchingType() && !dirty){
            return null;
        }

        Document document = new Document();
        document.add(newPathField(path));


        if (indexingRule.isFulltextEnabled()) {
            document.add(newFulltextField(name));
        }

        if (definition.evaluatePathRestrictions()){
            document.add(newAncestorsField(PathUtils.getParentPath(path)));
            document.add(newDepthField(path));
        }

        // because of LUCENE-5833 we have to merge the suggest fields into a single one
        Field suggestField = null;
        for (Field f : fields) {
            if (FieldNames.SUGGEST.equals(f.name())) {
                if (suggestField == null) {
                    suggestField = FieldFactory.newSuggestField(f.stringValue());
                } else {
                    suggestField = FieldFactory.newSuggestField(suggestField.stringValue(), f.stringValue());
                }
            } else {
                document.add(f);
            }
        }
        if (suggestField != null) {
            document.add(suggestField);
        }

        if (facet && isFacetingEnabled()) {
            document = getFacetsConfig().build(document);
        }

        //TODO Boost at document level

        return document;
    }

    private boolean addFacetFields(List fields, PropertyState property, String pname, PropertyDefinition pd) {
        String facetFieldName = FieldNames.createFacetFieldName(pname);
        getFacetsConfig().setIndexFieldName(pname, facetFieldName);
        int tag = property.getType().tag();
        int idxDefinedTag = pd.getType();
        // Try converting type to the defined type in the index definition
        if (tag != idxDefinedTag) {
            log.debug("[{}] Facet property defined with type {} differs from property {} with type {} in "
                            + "path {}",
                    getIndexName(),
                    Type.fromTag(idxDefinedTag, false), property.toString(),
                    Type.fromTag(tag, false), path);
            tag = idxDefinedTag;
        }

        boolean fieldAdded = false;
        try {
            if (tag == Type.STRINGS.tag() && property.isArray()) {
                getFacetsConfig().setMultiValued(pname, true);
                Iterable values = property.getValue(Type.STRINGS);
                for (String value : values) {
                    if (value != null && value.length() > 0) {
                        fields.add(new SortedSetDocValuesFacetField(pname, value));
                    }
                }
                fieldAdded = true;
            } else if (tag == Type.STRING.tag()) {
                String value = property.getValue(Type.STRING);
                if (value.length() > 0) {
                    fields.add(new SortedSetDocValuesFacetField(pname, value));
                    fieldAdded = true;
                }
            }

        } catch (Throwable e) {
            log.warn("[{}] Ignoring facet property. Could not convert property {} of type {} to type {} for path {}",
                    getIndexName(), pname,
                    Type.fromTag(property.getType().tag(), false),
                    Type.fromTag(tag, false), path, e);
        }
        return fieldAdded;
    }

    private boolean indexProperty(String path,
                                  List fields,
                                  NodeState state,
                                  PropertyState property,
                                  String pname,
                                  PropertyDefinition pd) {
        boolean includeTypeForFullText = indexingRule.includePropertyType(property.getType().tag());

        boolean dirty = false;
        if (Type.BINARY.tag() == property.getType().tag()
                && includeTypeForFullText) {
            fields.addAll(newBinary(property, state, null, path + "@" + pname));
            dirty = true;
        } else {
            if (pd.propertyIndex && pd.includePropertyType(property.getType().tag())) {
                dirty |= addTypedFields(fields, property, pname);
            }

            if (pd.fulltextEnabled() && includeTypeForFullText) {
                for (String value : property.getValue(Type.STRINGS)) {
                    if (pd.analyzed && pd.includePropertyType(property.getType().tag())) {
                        String analyzedPropName = constructAnalyzedPropertyName(pname);
                        fields.add(newPropertyField(analyzedPropName, value, !pd.skipTokenization(pname), pd.stored));
                    }

                    if (pd.useInSuggest) {
                        fields.add(FieldFactory.newSuggestField(value));
                    }

                    if (pd.useInSpellcheck) {
                        fields.add(newPropertyField(FieldNames.SPELLCHECK, value, true, false));
                    }

                    if (pd.nodeScopeIndex) {
                        Field field = newFulltextField(value);
                        fields.add(field);
                    }
                    dirty = true;
                }
            }
            if (pd.facet && isFacetingEnabled()) {
                dirty |= addFacetFields(fields, property, pname, pd);
            }

        }

        return dirty;
    }

    private String constructAnalyzedPropertyName(String pname) {
        if (definition.getVersion().isAtLeast(IndexFormatVersion.V2)){
            return FieldNames.createAnalyzedFieldName(pname);
        }
        return pname;
    }

    private boolean addTypedFields(List fields, PropertyState property, String pname) {
        int tag = property.getType().tag();
        boolean fieldAdded = false;
        for (int i = 0; i < property.count(); i++) {
            Field f;
            if (tag == Type.LONG.tag()) {
                f = new LongField(pname, property.getValue(Type.LONG, i), Field.Store.NO);
            } else if (tag == Type.DATE.tag()) {
                String date = property.getValue(Type.DATE, i);
                f = new LongField(pname, FieldFactory.dateToLong(date), Field.Store.NO);
            } else if (tag == Type.DOUBLE.tag()) {
                f = new DoubleField(pname, property.getValue(Type.DOUBLE, i), Field.Store.NO);
            } else if (tag == Type.BOOLEAN.tag()) {
                f = new StringField(pname, property.getValue(Type.BOOLEAN, i).toString(), Field.Store.NO);
            } else {
                f = new StringField(pname, property.getValue(Type.STRING, i), Field.Store.NO);
            }

            fields.add(f);
            fieldAdded = true;
        }
        return fieldAdded;
    }

    private boolean addTypedOrderedFields(List fields,
                                          PropertyState property,
                                          String pname,
                                          PropertyDefinition pd) {
        // Ignore and warn if property multi-valued as not supported
        if (property.getType().isArray()) {
            log.warn(
                    "[{}] Ignoring ordered property {} of type {} for path {} as multivalued ordered property not supported",
                    getIndexName(), pname,
                    Type.fromTag(property.getType().tag(), true), path);
            return false;
        }

        int tag = property.getType().tag();
        int idxDefinedTag = pd.getType();
        // Try converting type to the defined type in the index definition
        if (tag != idxDefinedTag) {
            log.debug(
                    "[{}] Ordered property defined with type {} differs from property {} with type {} in "
                            + "path {}",
                    getIndexName(),
                    Type.fromTag(idxDefinedTag, false), property.toString(),
                    Type.fromTag(tag, false), path);
            tag = idxDefinedTag;
        }

        String name = FieldNames.createDocValFieldName(pname);
        boolean fieldAdded = false;
        Field f = null;
        try {
            if (tag == Type.LONG.tag()) {
                //TODO Distinguish fields which need to be used for search and for sort
                //If a field is only used for Sort then it can be stored with less precision
                f = new NumericDocValuesField(name, property.getValue(Type.LONG));
            } else if (tag == Type.DATE.tag()) {
                String date = property.getValue(Type.DATE);
                f = new NumericDocValuesField(name, FieldFactory.dateToLong(date));
            } else if (tag == Type.DOUBLE.tag()) {
                f = new DoubleDocValuesField(name, property.getValue(Type.DOUBLE));
            } else if (tag == Type.BOOLEAN.tag()) {
                f = new SortedDocValuesField(name,
                    new BytesRef(property.getValue(Type.BOOLEAN).toString()));
            } else if (tag == Type.STRING.tag()) {
                f = new SortedDocValuesField(name,
                    new BytesRef(property.getValue(Type.STRING)));
            }

            if (f != null) {
                fields.add(f);
                fieldAdded = true;
            }
        } catch (Exception e) {
            log.warn(
                    "[{}] Ignoring ordered property. Could not convert property {} of type {} to type {} for path {}",
                    getIndexName(), pname,
                    Type.fromTag(property.getType().tag(), false),
                    Type.fromTag(tag, false), path, e);
        }
        return fieldAdded;
    }

    private static boolean isVisible(String name) {
        return name.charAt(0) != ':';
    }

    private List newBinary(
            PropertyState property, NodeState state, String nodePath, String path) {
        if (textExtractor == null){
            //Skip text extraction for sync indexing
            return Collections.emptyList();
        }

        return textExtractor.newBinary(property, state, nodePath, path);
    }

    private boolean augmentCustomFields(final String path, final List fields,
                                        final NodeState document) {
        boolean dirty = false;

        if (augmentorFactory != null) {
            Iterable augmentedFields = augmentorFactory
                    .getIndexFieldProvider(indexingRule.getNodeTypeName())
                    .getAugmentedFields(path, document, definition.getDefinitionNodeState());

            for (Field field : augmentedFields) {
                fields.add(field);
                dirty = true;
            }
        }

        return dirty;
    }

    //~-------------------------------------------------------< NullCheck Support >

    private boolean indexNotNullCheckEnabledProps(String path, List fields, NodeState state) {
        boolean fieldAdded = false;
        for (PropertyDefinition pd : indexingRule.getNotNullCheckEnabledProperties()) {
            if (isPropertyNotNull(state, pd)) {
                fields.add(new StringField(FieldNames.NOT_NULL_PROPS, pd.name, Field.Store.NO));
                fieldAdded = true;
            }
        }
        return fieldAdded;
    }

    private boolean indexNullCheckEnabledProps(String path, List fields, NodeState state) {
        boolean fieldAdded = false;
        for (PropertyDefinition pd : indexingRule.getNullCheckEnabledProperties()) {
            if (isPropertyNull(state, pd)) {
                fields.add(new StringField(FieldNames.NULL_PROPS, pd.name, Field.Store.NO));
                fieldAdded = true;
            }
        }
        return fieldAdded;
    }

    private boolean indexFunctionRestrictions(String path, List fields, NodeState state) {
        boolean fieldAdded = false;
        for (PropertyDefinition pd : indexingRule.getFunctionRestrictions()) {
            PropertyState functionValue = calculateValue(path, state, pd.functionCode);
            if (functionValue != null) {
                if (pd.ordered) {
                    addTypedOrderedFields(fields, functionValue, pd.function, pd);
                }
                addTypedFields(fields, functionValue, pd.function);
                fieldAdded = true;
            }
        }
        return fieldAdded;
    }

    private static PropertyState calculateValue(String path, NodeState state, String[] functionCode) {
        try {
            return FunctionIndexProcessor.tryCalculateValue(path, state, functionCode);
        } catch (RuntimeException e) {
            log.error("Failed to calculate function value for {} at {}",
                    Arrays.toString(functionCode), path, e);
            throw e;
        }
    }

    private boolean indexIfSinglePropertyRemoved(List propertiesModified) {
        boolean dirty = false;
        for (PropertyState ps : propertiesModified) {
            PropertyDefinition pd = indexingRule.getConfig(ps.getName());
            if (pd != null
                    && pd.index
                    && (pd.includePropertyType(ps.getType().tag())
                            || indexingRule.includePropertyType(ps.getType().tag()))) {
                dirty = true;
                break;
            }
        }
        return dirty;
    }

    /**
     * Determine if the property as defined by PropertyDefinition exists or not.
     *
     * 

For relative property if the intermediate nodes do not exist then property is * not considered to be null

* * @return true if the property does not exist */ private boolean isPropertyNull(NodeState state, PropertyDefinition pd){ NodeState propertyNode = getPropertyNode(state, pd); if (!propertyNode.exists()){ return false; } return !propertyNode.hasProperty(pd.nonRelativeName); } /** * Determine if the property as defined by PropertyDefinition exists or not. * *

For relative property if the intermediate nodes do not exist then property is * considered to be null

* * @return true if the property exists */ private boolean isPropertyNotNull(NodeState state, PropertyDefinition pd){ NodeState propertyNode = getPropertyNode(state, pd); if (!propertyNode.exists()){ return false; } return propertyNode.hasProperty(pd.nonRelativeName); } private static NodeState getPropertyNode(NodeState nodeState, PropertyDefinition pd) { if (!pd.relative){ return nodeState; } NodeState node = nodeState; for (String name : pd.ancestors) { node = node.getChildNode(name); } return node; } /** * index aggregates on a certain path * @param path the path of the node * @param fields the list of fields * @param state the node state * @return an array of booleans whose first element is {@code true} if any indexing has happened * and the second element is {@code true} if facets on any (aggregate) property have been indexed */ private boolean[] indexAggregates(final String path, final List fields, final NodeState state) { final AtomicBoolean dirtyFlag = new AtomicBoolean(); final AtomicBoolean facetFlag = new AtomicBoolean(); indexingRule.getAggregate().collectAggregates(state, new Aggregate.ResultCollector() { @Override public void onResult(Aggregate.NodeIncludeResult result) { boolean dirty = indexAggregatedNode(path, fields, result); if (dirty) { dirtyFlag.set(true); } } @Override public void onResult(Aggregate.PropertyIncludeResult result) { boolean dirty = false; if (result.pd.ordered) { dirty |= addTypedOrderedFields(fields, result.propertyState, result.propertyPath, result.pd); } dirty |= indexProperty(path, fields, state, result.propertyState, result.propertyPath, result.pd); if (result.pd.facet) { facetFlag.set(true); } if (dirty) { dirtyFlag.set(true); } } }); return new boolean[]{dirtyFlag.get(), facetFlag.get()}; } /** * Create the fulltext field from the aggregated nodes. If result is for aggregate for a relative node * include then * @param path current node path * @param fields indexed fields * @param result aggregate result * @return true if a field was created for passed node result */ private boolean indexAggregatedNode(String path, List fields, Aggregate.NodeIncludeResult result) { //rule for node being aggregated might be null if such nodes //are not indexed on there own. In such cases we rely in current //rule for some checks IndexDefinition.IndexingRule ruleAggNode = definition .getApplicableIndexingRule(getPrimaryTypeName(result.nodeState)); boolean dirty = false; for (PropertyState property : result.nodeState.getProperties()){ String pname = property.getName(); String propertyPath = PathUtils.concat(result.nodePath, pname); if (!isVisible(pname)) { continue; } //Check if type is indexed int type = property.getType().tag(); if (ruleAggNode != null ) { if (!ruleAggNode.includePropertyType(type)) { continue; } } else if (!indexingRule.includePropertyType(type)){ continue; } //Check if any explicit property defn is defined via relative path // and is marked to exclude this property from being indexed. We exclude //it from aggregation if // 1. Its not to be indexed i.e. index=false // 2. Its explicitly excluded from aggregation i.e. excludeFromAggregation=true PropertyDefinition pdForRootNode = indexingRule.getConfig(propertyPath); if (pdForRootNode != null && (!pdForRootNode.index || pdForRootNode.excludeFromAggregate)) { continue; } if (Type.BINARY == property.getType()) { String aggreagtedNodePath = PathUtils.concat(path, result.nodePath); //Here the fulltext is being created for aggregate root hence nodePath passed //should be null String nodePath = result.isRelativeNode() ? result.rootIncludePath : null; fields.addAll(newBinary(property, result.nodeState, nodePath, aggreagtedNodePath + "@" + pname)); dirty = true; } else { PropertyDefinition pd = null; if (ruleAggNode != null){ pd = ruleAggNode.getConfig(pname); } if (pd != null && !pd.nodeScopeIndex){ continue; } for (String value : property.getValue(Type.STRINGS)) { Field field = result.isRelativeNode() ? newFulltextField(result.rootIncludePath, value) : newFulltextField(value) ; if (pd != null) { field.setBoost(pd.boost); } fields.add(field); dirty = true; } } } return dirty; } private String getIndexName() { return definition.getIndexName(); } private boolean isFacetingEnabled(){ return facetsConfigProvider != null; } private FacetsConfig getFacetsConfig(){ return facetsConfigProvider.getFacetsConfig(); } /** * Extracts the local name of the current node ignoring any namespace prefix * * @param name node name */ private static void addNodeNameField(List fields, String name) { //TODO Need to check if it covers all cases int colon = name.indexOf(':'); String value = colon < 0 ? name : name.substring(colon + 1); //For now just add a single term. Later we can look into using different analyzer //to analyze the node name and add multiple terms. Like add multiple terms for a //cameCase file name to allow faster like search fields.add(new StringField(FieldNames.NODE_NAME, value, Field.Store.NO)); } }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy