All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.adobe.cq.searchcollections.lucene.DefaultNodeIndexer Maven / Gradle / Ivy

/*************************************************************************
 *
 * ADOBE CONFIDENTIAL
 * __________________
 *
 *  Copyright 2012 Adobe Systems Incorporated
 *  All Rights Reserved.
 *
 * NOTICE:  All information contained herein is, and remains
 * the property of Adobe Systems Incorporated and its suppliers,
 * if any.  The intellectual and technical concepts contained
 * herein are proprietary to Adobe Systems Incorporated and its
 * suppliers and are protected by trade secret or copyright law.
 * Dissemination of this information or reproduction of this material
 * is strictly forbidden unless prior written permission is obtained
 * from Adobe Systems Incorporated.
 **************************************************************************/
package com.adobe.cq.searchcollections.lucene;

import java.util.Arrays;
import java.util.List;

import javax.jcr.Node;
import javax.jcr.Property;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import javax.jcr.Value;

import org.apache.jackrabbit.commons.JcrUtils;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.Field.TermVector;
import org.apache.lucene.facet.index.CategoryDocumentBuilder;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.TermQuery;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * @deprecated
 */
public class DefaultNodeIndexer implements NodeIndexer {

    private static final String primaryType = "jcr:primaryType";

    // --- fields    
    private static final String FIELD_UUID = ":uuid".intern();

    private static final String FIELD_FULLTEXT = ":fulltext".intern();

    private static final String FIELD_PRIMARY_TYPE = primaryType.intern();

    private static final String FIELD_PATH = ":path".intern();

    private static final String FIELD_PARENT = ":parent".intern();

    private static final String FIELD_NAME = ":name".intern();

    private static final String FIELD_LOCAL = ":local".intern();

    private static final String FIELD_PROPERTIES = ":properties".intern();

    // -- term query
    private static final Term TERM_PATH = new Term(":path");

    private static final int DEFAULT_MAX_AGGREGATION_LEVELS = 25;

    private static final List DEFAULT_SKIP_PROPERTIES_LIST = Arrays
            .asList("sling:*", "jcr:created", "jcr:createdBy",
                    "jcr:lastModified", "jcr:lastModifiedBy", "cq:template",
                    "cq:toolbars", "cq:lastModified", "cq:lastModifiedBy",
                    "textIsRich", "isDate");

    private static final Logger log = LoggerFactory
            .getLogger(DefaultNodeIndexer.class);

    public Query getSubtreeQuery(final String path) {
        return new TermQuery(TERM_PATH.createTerm(path));
    }

    public Document createDocument(Node node, CategoryDocumentBuilder categoryDocBuilder) throws RepositoryException {

        log.debug("Started to create lucene doc for {} / {}.", node.getPath(),
                node.getPrimaryNodeType().getName());
        long t = System.currentTimeMillis();

        Document document = new Document();

        document.add(new Field(FIELD_UUID, false, node.getIdentifier(),
                Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS,
                TermVector.NO));
        document.add(new Field(FIELD_PRIMARY_TYPE, false, node
                .getPrimaryNodeType().getName(), Field.Store.YES,
                Field.Index.NOT_ANALYZED_NO_NORMS, TermVector.NO));

        document.add(new Field(FIELD_PATH, false, node.getPath(), Store.YES,
                Index.NOT_ANALYZED_NO_NORMS, TermVector.NO));
        try {
            document.add(new Field(FIELD_PARENT, false, node.getParent()
                    .getPath(), Store.NO, Index.NOT_ANALYZED_NO_NORMS,
                    TermVector.NO));
        } catch (RepositoryException e) {
            // ignore; root node has no parent
        }
        
        
        
        String name = node.getName();
        document.add(new Field(FIELD_NAME, false, name, Store.NO,
                Index.NOT_ANALYZED_NO_NORMS, TermVector.NO));
        name = name.substring(name.indexOf(':') + 1);
        document.add(new Field(FIELD_LOCAL, false, name, Store.NO,
                Index.NOT_ANALYZED_NO_NORMS, TermVector.NO));

        for (Property property : JcrUtils.getProperties(node)) {
            name = property.getName();
            if (shouldSkipProperty(name)) {
                continue;
            }
            document.add(new Field(FIELD_PROPERTIES, false, name, Store.NO,
                    Index.NOT_ANALYZED_NO_NORMS, TermVector.NO));
            if (property.isMultiple()) {
                for (Value value : property.getValues()) {
                    addValue(document, name, value, false);
                }
            } else {
                addValue(document, name, property.getValue(), false);
            }
            // log.info(" + property {}.", property.getName());
        }
        // copy index aggregates info
        copyAggregatesFromOriginal(node, document);

        // TODO not sure if the re-alignment is needed any more
        // make sure that fulltext fields are aligned properly
        // first all stored fields, then remaining
        // Fieldable[] fulltextFields = document.getFieldables(FIELD_FULLTEXT);
        // document.removeFields(FIELD_FULLTEXT);
        // Arrays.sort(fulltextFields, FIELDS_COMPARATOR_STORED);
        // for (Fieldable f : fulltextFields) {
        // document.add(f);
        // }

        log.debug("Created lucene doc for {} took {} ms.",
                node.getIdentifier(), System.currentTimeMillis() - t);
        return document;
    }

    // private static final Comparator FIELDS_COMPARATOR_STORED = new
    // Comparator() {
    // public int compare(Fieldable o1, Fieldable o2) {
    // return Boolean.valueOf(o2.isStored()).compareTo(o1.isStored());
    // }
    // };

    protected void copyAggregatesFromOriginal(Node node, Document doc)
            throws RepositoryException {
        copyAggregatesFromOriginal(node, doc, 0);
    }

    private void copyAggregatesFromOriginal(Node node, Document doc, int level)
            throws RepositoryException {
        if (level == getAggregationMaxLevelsDeep()) {
            return;
        }
        for (Node c : JcrUtils.getChildNodes(node)) {
            if (shouldSkipChildNode(node, c, level + 1)) {
                continue;
            }
            copyAggregatesFromOriginal(c, doc, level + 1);
        }
        if (level > 0) {
            for (Property p : JcrUtils.getProperties(node)) {
                if (shouldSkipProperty(p.getName())) {
                    continue;
                }
                if (p.isMultiple()) {
                    for (Value value : p.getValues()) {
                        addValue(doc, p.getName(), value, true);
                    }
                } else {
                    addValue(doc, p.getName(), p.getValue(), true);
                }
            }
        }
    }

    protected boolean shouldSkipChildNode(Node node, Node child, int levelDeep) {
        return false;
    }

    private boolean shouldSkipProperty(String name) {
        List skips = skipProperties();
        if (skips == null) {
            return false;
        }
        for (String p : skips) {
            if (p.startsWith("*") && p.endsWith("*") && name.contains(p)) {
                return true;
            }
            if (p.startsWith("*") && name.endsWith(p)) {
                return true;
            }
            if (p.endsWith("*") && name.startsWith(p)) {
                return true;
            }
            if (p.equals(name)) {
                return true;
            }
            if (primaryType.equals(name)) {
                return true;
            }
        }
        return false;
    }

    protected List skipProperties() {
        return DEFAULT_SKIP_PROPERTIES_LIST;
    }

    protected int getAggregationMaxLevelsDeep() {
        return DEFAULT_MAX_AGGREGATION_LEVELS;
    }

    private void addValue(Document document, String name, Value value,
            boolean isFullText) throws RepositoryException {
        if (value.getType() != PropertyType.BINARY) {
            String fulltext = value.getString();
            String typed = fulltext;
            if (value.getType() == PropertyType.DATE) {
                typed = DateField.dateToString(value.getDate().getTime());
            } else if (value.getType() == PropertyType.DOUBLE) {
                typed = DoubleField.doubleToString(value.getDouble());
            } else if (value.getType() == PropertyType.LONG) {
                typed = LongField.longToString(value.getLong());
            } else if (value.getType() == PropertyType.DECIMAL) {
                typed = DecimalField.decimalToString(value.getDecimal());
            }
            if (!isFullText) {
                document.add(new Field(name, typed, Store.NO,
                        Index.NOT_ANALYZED_NO_NORMS));
                document.add(new Field(":fulltext:" + name, fulltext, Store.NO,
                        Index.ANALYZED_NO_NORMS));
            }
            document.add(new Field(FIELD_FULLTEXT, false, fulltext, Store.NO,
                    Index.ANALYZED_NO_NORMS, TermVector.NO));
        }
    }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy