com.day.cq.wcm.commons.ReferenceSearch Maven / Gradle / Ivy

/*
 * Copyright 1997-2008 Day Management AG
 * Barfuesserplatz 6, 4001 Basel, Switzerland
 * All Rights Reserved.
 *
 * This software is the confidential and proprietary information of
 * Day Management AG, ("Confidential Information"). You shall not
 * disclose such Confidential Information and shall use it only in
 * accordance with the terms of the license agreement you entered into
 * with Day.
 */
package com.day.cq.wcm.commons;

import java.io.UnsupportedEncodingException;
import java.util.BitSet;
import java.util.Collection;
import java.util.Collections;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Map;
import java.util.Set;
import java.util.Calendar;
import java.util.regex.Matcher;
import java.util.regex.Pattern;

import javax.jcr.Node;
import javax.jcr.NodeIterator;
import javax.jcr.Property;
import javax.jcr.PropertyIterator;
import javax.jcr.PropertyType;
import javax.jcr.RepositoryException;
import javax.jcr.Value;
import javax.jcr.query.Query;

import org.apache.jackrabbit.util.Text;
import org.apache.sling.api.SlingException;
import org.apache.sling.api.resource.PersistenceException;
import org.apache.sling.api.resource.Resource;
import org.apache.sling.api.resource.ResourceResolver;
import org.apache.sling.api.resource.ResourceUtil;
import org.apache.sling.jcr.api.SlingRepository;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import com.day.cq.commons.predicate.AbstractResourcePredicate;
import com.day.cq.wcm.api.Page;
import com.day.cq.wcm.api.PageManager;
import com.day.cq.wcm.api.WCMException;

/**
 * ReferenceSearch provides methods that search references to
 * resources (e.g. a path in a property to an image)
 */
public class ReferenceSearch {

    private SlingRepository repository;

    /**
     * default logger
     */
    private static final Logger log = LoggerFactory.getLogger(ReferenceSearch.class);

    /**
     * search root
     */
    private String searchRoot = "/";

    /**
     * exact
     */
    private boolean exact = false;

    /**
     * hollow
     */
    private boolean hollow = false;

    /**
     * reference limit per page
     */
    private int maxReferencesPerPage = -1;
    
    /**
     * Resource predicate, can be set to filter results based on a resource predicate {@link AbstractResourcePredicate}
     */
    private AbstractResourcePredicate resourcePredicate = null;

    /**
     * @deprecated The repository was used by the implementation to create an administrative session in
     * {@link #adjustReferences(ResourceResolver, String, String, String[])}. Instead, specify a resource resolver
     * which is sufficiently authorized to adjust references at the desired locations.
     * @param repository The repository
     * @return this
     */
    public ReferenceSearch setRepository(SlingRepository repository) {
        this.repository = repository;
        return this;
    }

    /**
     * Returns the search root. default is '/'
     * @return the search root.
     */
    public String getSearchRoot() {
        return searchRoot;
    }

    /**
     * Sets the search root
     * @param searchRoot the search root
     * @return this
     */
    public ReferenceSearch setSearchRoot(String searchRoot) {
        if (searchRoot == null || searchRoot.equals("")) {
            this.searchRoot = "/";
        } else {
            this.searchRoot = searchRoot;
        }
        return this;
    }

    /**
     * Returns the shallow flag.
     * @return the shallow flag.
     * @see #setExact(boolean)
     */
    public boolean isExact() {
        return exact;
    }

    /**
     * Sets the exact flag. If true only exact
     * references are searched (and replaced). otherwise also references to
     * child resources are included.
     *
     * @param exact true if perform an exact search
     * @return this;
     */
    public ReferenceSearch setExact(boolean exact) {
        this.exact = exact;
        return this;
    }

    /**
     * Returns the hollow flag.
     * @return the hollow flag.
     * @see #setHollow(boolean)
     */
    public boolean isHollow() {
        return hollow;
    }

    /**
     * Sets the hollow flag. If true
     * the returned info will contain only properties of the page
     * and not the page object itself.
     * @param hollow true if perform a hollow search
     * @return this;
     */
    public ReferenceSearch setHollow(boolean hollow) {
        this.hollow = hollow;
        return this;
    }

    /**
     * Returns the maximum number of references that can be added to a page info.
     * @return the reference limit per page.
     * @see #setMaxReferencesPerPage(int)
     */
    public int getMaxReferencesPerPage() {
        return maxReferencesPerPage;
    }

    /**
     * Sets the value of maxReferencesPerPage.
     * The maximum number of references that can be added to a page info.
     * @param maxReferencesPerPage The maximum number of references that can be added to a page info.
     * @return this;
     */
    public ReferenceSearch setMaxReferencesPerPage(int maxReferencesPerPage) {
        this.maxReferencesPerPage = maxReferencesPerPage;
        return this;
    }
    
    /**
     * Set the resourcePredicate, this predicate is evaluated for
     * each search result and can be used to filter results.
     * 
     * @param resourcePredicate an instance of {@link AbstractResourcePredicate}
     * @return this;
     */
    public ReferenceSearch setPredicate(AbstractResourcePredicate resourcePredicate) {
        this.resourcePredicate = resourcePredicate;
        return this;
    }

    /**
     * Searches for references to the given path.
     * @param resolver the resource resolver
     * @param path the path to search for
     * @return reference infos
     */
    public Map search(ResourceResolver resolver, String path) {
        if (path == null) {
            return Collections.emptyMap();
        }
        String root = searchRoot.equals("/") ? "" : searchRoot;
        PageManager manager = resolver.adaptTo(PageManager.class);
        Map infos = new HashMap();

        Pattern pattern = getSearchPattern(path);
        String qPath = escapeIllegalXpathSearchChars(path);
        String query = String.format("%s//*[jcr:contains(., '\"%s\"')]", root, qPath);
        search(resolver, manager, infos, pattern, query);

        // also search for escaped path, if contains special characters
        String escPath = Text.escapePath(path);
        if (!escPath.equals(path)) {
            Pattern escPattern = getSearchPattern(escPath);
            String qEscPath = escapeIllegalXpathSearchChars(escPath);
            query = String.format("%s//*[jcr:contains(., '%s')]", root, qEscPath);
            search(resolver, manager, infos, escPattern, query);
        }

        // also search for escaped path with upper case hex characters, if contains special characters
        escPath = escapePathUsingUpperCaseHex(path);
        if (!escPath.equals(path)) {
            Pattern escPattern = getSearchPattern(escPath);
            String qEscPath = escapeIllegalXpathSearchChars(escPath);
            query = String.format("%s//*[jcr:contains(., '%s')]", root, qEscPath);
            search(resolver, manager, infos, escPattern, query);
        }

        // filter out those infos that are empty
        for (Iterator> entries = infos.entrySet().iterator(); entries.hasNext();) {
            Map.Entry entry = entries.next();
            if (entry.getValue().getProperties().isEmpty()) {
                entries.remove();
            } else {
                //filter based on predicate
                if (resourcePredicate != null) {
                    if (entry.getValue().page != null) {
                        Resource pageResource = entry.getValue().page.adaptTo(Resource.class);
                        if (pageResource != null) {
                            if (!resourcePredicate.evaluate(pageResource)) {
                                entries.remove();
                            }
                        }
                    }
                }
            }
        }
        return infos;
    }

    private String escapePathUsingUpperCaseHex(String string) {
        try {
            BitSet validChars = Text.URISaveEx;
            char escape = '%';
            final char[] hexTable = "0123456789ABCDEF".toCharArray();
            byte[] bytes = string.getBytes("utf-8");
            StringBuilder out = new StringBuilder(bytes.length);
            for (byte aByte : bytes) {
                int c = aByte & 0xff;
                if (validChars.get(c) && c != escape) {
                    out.append((char) c);
                } else {
                    out.append(escape);
                    out.append(hexTable[(c >> 4) & 0x0f]);
                    out.append(hexTable[(c) & 0x0f]);
                }
            }
            return out.toString();
        } catch (UnsupportedEncodingException e) {
            throw new InternalError(e.toString());
        }
    }

    private void search(ResourceResolver resolver,
                        PageManager manager, Map infos,
                        Pattern pattern, String query) {
        log.debug("Searching for references using: {}", query);
        Iterator iter = null;
        try{
            iter = resolver.findResources(query, Query.XPATH);
        }catch(SlingException e){
            log.warn("error finding resources", e);
            return;
        }

        // process the search results and build the result set
        while (iter.hasNext()) {
            Resource res = iter.next();
            Page page = manager.getContainingPage(res);
            if (page != null) {
                Info info = infos.get(page.getPath());
                if (info == null) {
                    info = new Info(page, hollow);
                    infos.put(page.getPath(), info);
                }
                try {
                    // analyze the properties of the resource
                    Node node = res.adaptTo(Node.class);
                    for (PropertyIterator pIter = node.getProperties(); pIter.hasNext();) {
                        // don't add properties any further if limit is exceeded
                        if (getMaxReferencesPerPage() >= 0 && info.getProperties().size() >= getMaxReferencesPerPage()) {
                            break;
                        }
                        Property p = pIter.nextProperty();
                        // only check string and name properties
                        if (p.getType() == PropertyType.STRING || p.getType() == PropertyType.NAME) {
                            if (p.isMultiple()) {
                                for (Value v: p.getValues()) {
                                    String value = v.getString();
                                    if (pattern.matcher(value).find()) {
                                        info.addProperty(p.getPath());
                                        break;
                                    }
                                }
                            } else {
                                String value = p.getString();
                                if (pattern.matcher(value).find()) {
                                    info.addProperty(p.getPath());
                                }
                            }
                        }
                    }
                } catch (RepositoryException e) {
                    log.error("Error while accessing " + res.getPath(), e);
                }
            }
        }
    }

    /**
     * Adjusts all references to path to destination
     * in the pages specified by refPaths. If {@link #isExact()}
     * is true only exact references to path are
     * adjusted, otherwise all references to child resources are adjusted, too.
     *
     * The resource resolver needs to have sufficient permissions (i.e. jcr:read and
     * rep:alterProperties) on the nodes containing references.
     *
     * @param resolver resolver to operate on.
     * @param path source path
     * @param destination destination path
     * @param refPaths paths of pages to be adjusted
     * @return collection of path to properties that were adjusted
     */
    public Collection adjustReferences(ResourceResolver resolver,
                                               String path, String destination,
                                               String[] refPaths) {
        if (refPaths == null) {
            return Collections.emptyList();
        }
        Set adjusted = new HashSet();
        for (String p: refPaths) {
            Resource r = resolver.getResource(p);
            if (r == null) {
                log.warn("Given path does not address a resource: {}", p);
                continue;
            }
            Page page = r.adaptTo(Page.class);
            if (page == null) {
                log.warn("Given path does not address a page: {}", p);                
            }
            
            Resource content = page != null ? page.getContentResource() : null;           
            if (content == null) {
                log.warn("Given page does not have content: {}", p);
            }
                       
            try {
                //Can be a case of complex asset.
                Node node = content != null ? content.adaptTo(Node.class) : r.adaptTo(Node.class);
                adjusted.addAll(adjustReferences(node, path, destination));

                // CQ5-32249 - touch the pages outside of this loop to avoid an inefficient O(n^2) algorithm
            } catch (RepositoryException e) {
                log.error("Error while adjusting references on " + r.getPath(), e);
            }

            // #22466 - moving pages does not take into account usergenerated content
            try {
                String adjustedUGCPath = adjustUserGeneratedContentReference(r, path, destination);
                if (adjustedUGCPath != null) {
                    adjusted.add(adjustedUGCPath);
                    log.info("Adjusted user generated content path {}.", adjustedUGCPath);
                }
            } catch (Exception e) {
                log.error("Error while adjusting user generated references on " + r.getPath(), e);
            }
        }

        // CQ5-32249 - do the touch calls after the above loop to avoid an inefficient O(n^2) algorithm
        PageManager pm = resolver.adaptTo(PageManager.class);
        // #38440 - touch the pages that were adjusted
        for (final String pathOfAdjusted : adjusted) {
            final Resource adjustedResource = resolver.getResource(pathOfAdjusted);
            if (null != adjustedResource) {
                final Page adjustedPage = pm.getContainingPage(adjustedResource);
                if (null != adjustedPage) {
                    try {
                        pm.touch(adjustedPage.adaptTo(Node.class), true, Calendar.getInstance(), false);
                    } catch (WCMException e) {
                        log.error("could not update last modified on adjusted page [{}]: ", adjustedPage.getPath(), e);
                    }
                }
            }
        }

        // save changes
        try {
            resolver.commit();
        } catch (PersistenceException e) {
            log.error("Error while adjusting references.", e);
        }

        return adjusted;
    }

    /**
     * Adjusts all references to path to destination
     * in the properties below the specified node. If {@link #isExact()}
     * is true only exact references to path are
     * adjusted, otherwise all references to child resources are adjusted, too.
     *
     * @param node (content) node to traverse
     * @param path source path
     * @param destination destination path
     * @throws RepositoryException if an error during repository access occurs
     * @return collection of paths to properties that were adjusted
     */
    public Collection adjustReferences(Node node, String path, String destination)
            throws RepositoryException {
        return adjustReferences(node, path, destination, false, Collections.emptySet());
    }

    /**
     * Adjusts all references to path to destination
     * in the properties below the specified node. If {@link #isExact()}
     * is true only exact references to path are
     * adjusted, otherwise all references to child resources are adjusted, too.
     *
     * @param node (content) node to adjust
     * @param path source path
     * @param destination destination path
     * @param shallow if true child nodes are not traversed
     * @param excludedProperties a set of excluded property names
     * @throws RepositoryException if an error during repository access occurs
     * @return collection of paths to properties that were adjusted
     */
    public Collection adjustReferences(Node node, String path,
                                               String destination, boolean shallow,
                                               Set excludedProperties)
            throws RepositoryException {
        Set adjusted = new HashSet();
        Pattern pattern = getReplacementPattern(path);
        String escDest = Text.escapePath(destination);
        for (PropertyIterator iter = node.getProperties(); iter.hasNext();) {
            Property p = iter.nextProperty();
            // only check string, path and name properties
            if (!excludedProperties.contains(p.getName()) &&
                    p.getType() == PropertyType.STRING || p.getType() == PropertyType.NAME || p.getType() == PropertyType.PATH) {
                if (p.isMultiple()) {
                    Value[] values = p.getValues();
                    boolean modified = false;
                    for (int i=0; inull if not matches
     */
    protected String rewrite(String value, String from, Pattern p, String to, String escTo) {
        // first check unescaped direct property value
        if (value.equals(from)) {
            return to;
        } else if (value.startsWith(from + "#")
                || value.startsWith(from + ".html")) {
            // #34356 - handle cases where the path is followed by
            // an anchor or the .html suffix
            // TODO: There should be a less brittle way of doing this!
            return to + value.substring(from.length());
        } else if (!exact) {
            if (value.startsWith(from + "/")) {
                return to + value.substring(from.length());
            }
        }
        Matcher m = p.matcher(value);
        StringBuffer ret = null;
        String repl = "$1" + escTo + "$3";
        while (m.find()) {
            if (ret == null) {
                ret = new StringBuffer();
            }
            m.appendReplacement(ret, repl);
        }
        if (ret == null) {
            return null;
        } else {
            m.appendTail(ret);
            return ret.toString();
        }
    }

    /**
     * Escapes illegal XPath search characters.
     *
     * @param s the string to encode
     * @return the escaped string
     */
    public static String escapeIllegalXpathSearchChars(String s) {
        StringBuffer sb = new StringBuffer();
        for (char c: s.toCharArray()) {
            if (c == '!' || c == '(' || c == ')' || c == ':' || c == '^'
                || c == '[' || c == ']' || c == '{' || c == '}' || c == '?'
                || c == '"' || c == '\\' || c == ' ' || c == '~') {
                sb.append('\\');
            } else if (c == '\'') {
                sb.append(c);
            }
            sb.append(c);
        }
        return sb.toString();
    }

    /**
     * Holds information about the search results
     */
    public static final class Info {

        private final Page page;
        private final String pageTitle;
        private final String pagePath;

        private final Set properties = new HashSet();

        public Info(Page page) {
            this.page = page;
            pageTitle = page.getTitle();
            pagePath = page.getPath();
        }

        public Info(Page page, boolean hollow) {
            if (!hollow) {
                this.page = page;
            } else {
                this.page = null;
            }
            pageTitle = page.getTitle();
            pagePath = page.getPath();
        }

        private void addProperty(String path) {
            properties.add(path);
        }

        public Page getPage() {
            return page;
        }

        public Set getProperties() {
            return properties;
        }

        public String getPageTitle() {
            return pageTitle;
        }

        public String getPagePath() {
            return pagePath;
        }
    }

}