org.apache.jackrabbit.oak.plugins.document.util.Utils Maven / Gradle / Ivy

Show more of this group Show more artifacts with this name
Show all versions of oak-core Show documentation
There is a newer version: 1.62.0
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.apache.jackrabbit.oak.plugins.document.util;

import java.io.Closeable;
import java.io.IOException;
import java.nio.charset.Charset;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.sql.Timestamp;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import java.util.SortedMap;
import java.util.TreeMap;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.AbstractIterator;

import org.apache.commons.codec.binary.Hex;
import org.apache.jackrabbit.oak.commons.PathUtils;
import org.apache.jackrabbit.oak.commons.StringUtils;
import org.apache.jackrabbit.oak.plugins.document.Collection;
import org.apache.jackrabbit.oak.plugins.document.DocumentStore;
import org.apache.jackrabbit.oak.plugins.document.NodeDocument;
import org.apache.jackrabbit.oak.plugins.document.Revision;
import org.apache.jackrabbit.oak.plugins.document.RevisionVector;
import org.apache.jackrabbit.oak.plugins.document.StableRevisionComparator;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.collect.Iterables.transform;
import static org.apache.jackrabbit.oak.plugins.document.NodeDocument.isDeletedEntry;

/**
 * Utility methods.
 */
public class Utils {
    private static final Logger LOG = LoggerFactory.getLogger(Utils.class);

    /**
     * Approximate length of a Revision string.
     */
    private static final int REVISION_LENGTH =
            new Revision(System.currentTimeMillis(), 0, 0).toString().length();

    /**
     * The length of path (in characters), whose UTF-8 representation can not
     * possibly be too large to be used for the primary key for the document
     * store.
     */
    static final int PATH_SHORT = Integer.getInteger("oak.pathShort", 165);

    /**
     * The maximum length of the parent path, in bytes. If the parent path is
     * longer, then the id of a document is no longer the path, but the hash of
     * the parent, and then the node name.
     */
    static final int PATH_LONG = Integer.getInteger("oak.pathLong", 350);

    /**
     * The maximum size a node name, in bytes. This is only a problem for long path.
     */
    public static final int NODE_NAME_LIMIT = Integer.getInteger("oak.nodeNameLimit", 150);

    private static final Charset UTF_8 = Charset.forName("UTF-8");

    /**
     * A predicate for property and _deleted names.
     */
    public static final Predicate PROPERTY_OR_DELETED = new Predicate() {
        @Override
        public boolean apply(@Nullable String input) {
            return Utils.isPropertyName(input) || isDeletedEntry(input);
        }
    };

    /**
     * Make sure the name string does not contain unnecessary baggage (shared
     * strings).
     * 
     * This is only needed for older versions of Java (before Java 7 update 6).
     * See also
     * http://mail.openjdk.java.net/pipermail/core-libs-dev/2012-May/010257.html
     *
     * @param x the string
     * @return the new string
     */
    public static String unshareString(String x) {
        return new String(x);
    }

    public static int pathDepth(String path) {
        if (path.equals("/")) {
            return 0;
        }
        int depth = 0;
        for (int i = 0; i < path.length(); i++) {
            if (path.charAt(i) == '/') {
                depth++;
            }
        }
        return depth;
    }

    @SuppressWarnings("unchecked")
    public static int estimateMemoryUsage(Map map) {
        if (map == null) {
            return 0;
        }
        long size = 0;

        for (Entry e : map.entrySet()) {
            if (e.getKey() instanceof Revision) {
                size += 32;
            } else {
                size += StringUtils.estimateMemoryUsage(e.getKey().toString());
            }
            Object o = e.getValue();
            if (o instanceof String) {
                size += StringUtils.estimateMemoryUsage((String) o);
            } else if (o instanceof Long) {
                size += 16;
            } else if (o instanceof Boolean) {
                size += 8;
            } else if (o instanceof Integer) {
                size += 8;
            } else if (o instanceof Map) {
                size += 8 + (long)estimateMemoryUsage((Map) o);
            } else if (o == null) {
                // zero
            } else {
                throw new IllegalArgumentException("Can't estimate memory usage of " + o);
            }
        }

        // overhead for map object
        // TreeMap (80) + unmodifiable wrapper (32)
        size += 112;
        // 64 bytes per entry
        size += (long)map.size() * 64;

        if (size > Integer.MAX_VALUE) {
            LOG.debug("Estimated memory footprint larger than Integer.MAX_VALUE: {}.", size);
            size = Integer.MAX_VALUE;
        }
        return (int) size;
    }

    public static String escapePropertyName(String propertyName) {
        int len = propertyName.length();
        if (len == 0) {
            return "_";
        }
        // avoid creating a buffer if escaping is not needed
        StringBuilder buff = null;
        char c = propertyName.charAt(0);
        int i = 0;
        if (c == '_' || c == '$') {
            buff = new StringBuilder(len + 1);
            buff.append('_').append(c);
            i++;
        }
        for (; i < len; i++) {
            c = propertyName.charAt(i);
            char rep;
            switch (c) {
            case '.':
                rep = 'd';
                break;
            case '\\':
                rep = '\\';
                break;
            default:
                rep = 0;
            }
            if (rep != 0) {
                if (buff == null) {
                    buff = new StringBuilder(propertyName.substring(0, i));
                }
                buff.append('\\').append(rep);
            } else if (buff != null) {
                buff.append(c);
            }
        }
        return buff == null ? propertyName : buff.toString();
    }

    public static String unescapePropertyName(String key) {
        int len = key.length();
        if (key.startsWith("_")
                && (key.startsWith("__") || key.startsWith("_$") || len == 1)) {
            key = key.substring(1);
            len--;
        }
        // avoid creating a buffer if escaping is not needed
        StringBuilder buff = null;
        for (int i = 0; i < len; i++) {
            char c = key.charAt(i);
            if (c == '\\') {
                if (buff == null) {
                    buff = new StringBuilder(key.substring(0, i));
                }
                c = key.charAt(++i);
                if (c == '\\') {
                    // ok
                } else if (c == 'd') {
                    c = '.';
                }
                buff.append(c);
            } else if (buff != null) {
                buff.append(c);
            }
        }
        return buff == null ? key : buff.toString();
    }

    public static boolean isPropertyName(String key) {
        return !key.startsWith("_") || key.startsWith("__") || key.startsWith("_$");
    }

    public static String getIdFromPath(String path) {
        if (isLongPath(path)) {
            MessageDigest digest;
            try {
                digest = MessageDigest.getInstance("SHA-256");
            } catch (NoSuchAlgorithmException e) {
                throw new RuntimeException(e);
            }
            int depth = Utils.pathDepth(path);
            String parent = PathUtils.getParentPath(path);
            byte[] hash = digest.digest(parent.getBytes(UTF_8));
            String name = PathUtils.getName(path);
            return depth + ":h" + Hex.encodeHexString(hash) + "/" + name;
        }
        int depth = Utils.pathDepth(path);
        return depth + ":" + path;
    }

    /**
     * Returns the parent id for given id if possible
     *
     * 
It would return null in following cases
     * 

     *     If id is from long path
     *     If id is for root path
     *     If id is for an invalid path
     * 
     * @param id id for which parent id needs to be determined
     * @return parent id. null if parent id cannot be determined
     */
    @CheckForNull
    public static String getParentId(String id){
        if(Utils.isIdFromLongPath(id)){
            return null;
        }
        String path = Utils.getPathFromId(id);
        if (!PathUtils.isValid(path)) {
            return null;
        }
        if(PathUtils.denotesRoot(path)){
            return null;
        }
        String parentPath = PathUtils.getParentPath(path);
        return Utils.getIdFromPath(parentPath);
    }

    public static boolean isLongPath(String path) {
        // the most common case: a short path
        // avoid calculating the parent path
        if (path.length() < PATH_SHORT) {
            return false;
        }
        // check if the parent path is long
        byte[] parent = PathUtils.getParentPath(path).getBytes(UTF_8);
        if (parent.length < PATH_LONG) {
            return false;
        }
        String name = PathUtils.getName(path);
        if (name.getBytes(UTF_8).length > NODE_NAME_LIMIT) {
            throw new IllegalArgumentException("Node name is too long: " + path);
        }
        return true;
    }
    
    public static boolean isIdFromLongPath(String id) {
        int index = id.indexOf(':');
        return index != -1 && index < id.length() - 1 && id.charAt(index + 1) == 'h';
    }

    public static String getPathFromId(String id) {
        if (isIdFromLongPath(id)) {
            throw new IllegalArgumentException("Id is hashed: " + id);
        }
        int index = id.indexOf(':');
        return id.substring(index + 1);
    }

    public static int getDepthFromId(String id) throws IllegalArgumentException {
        try {
            int index = id.indexOf(':');
            if (index >= 0) {
                return Integer.parseInt(id.substring(0, index));
            }
        } catch (NumberFormatException e) {
            // ignore and throw IllegalArgumentException
        }
        throw new IllegalArgumentException("Invalid id: " + id);
    }

    public static String getPreviousPathFor(String path, Revision r, int height) {
        if (!PathUtils.isAbsolute(path)) {
            throw new IllegalArgumentException("path must be absolute: " + path);
        }
        StringBuilder sb = new StringBuilder(path.length() + REVISION_LENGTH + 3);
        sb.append("p").append(path);
        if (sb.charAt(sb.length() - 1) != '/') {
            sb.append('/');
        }
        r.toStringBuilder(sb).append("/").append(height);
        return sb.toString();
    }

    public static String getPreviousIdFor(String path, Revision r, int height) {
        return getIdFromPath(getPreviousPathFor(path, r, height));
    }

    /**
     * Determines if the passed id belongs to a previous doc
     *
     * @param id id to check
     * @return true if the id belongs to a previous doc
     */
    public static boolean isPreviousDocId(String id){
        int indexOfColon = id.indexOf(':');
        if (indexOfColon > 0 && indexOfColon < id.length() - 1){
            return id.charAt(indexOfColon + 1) == 'p';
        }
        return false;
    }

    /**
     * Determines if the passed id belongs to a leaf level previous doc
     *
     * @param id id to check
     * @return true if the id belongs to a leaf level previous doc
     */
    public static boolean isLeafPreviousDocId(String id){
        return isPreviousDocId(id) && id.endsWith("/0");
    }

    /**
     * Deep copy of a map that may contain map values.
     *
     * @param source the source map
     * @param target the target map
     * @param  the type of the map key
     */
    public static  void deepCopyMap(Map source, Map target) {
        for (Entry e : source.entrySet()) {
            Object value = e.getValue();
            Comparator comparator = null;
            if (value instanceof SortedMap) {
                @SuppressWarnings("unchecked")
                SortedMap map = (SortedMap) value;
                comparator = map.comparator();
            }
            if (value instanceof Map) {
                @SuppressWarnings("unchecked")
                Map old = (Map) value;
                Map c = new TreeMap(comparator);
                deepCopyMap(old, c);
                value = c;
            }
            target.put(e.getKey(), value);
        }
    }

    /**
     * Returns the lower key limit to retrieve the children of the given
     * path.
     *
     * @param path a path.
     * @return the lower key limit.
     */
    public static String getKeyLowerLimit(String path) {
        String from = PathUtils.concat(path, "a");
        from = getIdFromPath(from);
        from = from.substring(0, from.length() - 1);
        return from;
    }

    /**
     * Returns the upper key limit to retrieve the children of the given
     * path.
     *
     * @param path a path.
     * @return the upper key limit.
     */
    public static String getKeyUpperLimit(String path) {
        String to = PathUtils.concat(path, "z");
        to = getIdFromPath(to);
        to = to.substring(0, to.length() - 2) + "0";
        return to;
    }

    /**
     * Returns parentId extracted from the fromKey. fromKey is usually constructed
     * using Utils#getKeyLowerLimit
     *
     * @param fromKey key used as start key in queries
     * @return parentId if possible.
     */
    @CheckForNull
    public static String getParentIdFromLowerLimit(String fromKey){
        //If key just ends with slash 2:/foo/ then append a fake
        //name to create a proper id
        if(fromKey.endsWith("/")){
            fromKey = fromKey + "a";
        }
        return getParentId(fromKey);
    }

    /**
     * Returns true if a revision tagged with the given revision
     * should be considered committed, false otherwise. Committed
     * revisions have a tag, which equals 'c' or starts with 'c-'.
     *
     * @param tag the tag (may be null).
     * @return true if committed; false otherwise.
     */
    public static boolean isCommitted(@Nullable String tag) {
        return tag != null && (tag.equals("c") || tag.startsWith("c-"));
    }

    /**
     * Resolve the commit revision for the given revision rev and
     * the associated commit tag.
     *
     * @param rev a revision.
     * @param tag the associated commit tag.
     * @return the actual commit revision for rev.
     */
    @Nonnull
    public static Revision resolveCommitRevision(@Nonnull Revision rev,
                                                 @Nonnull String tag) {
        return checkNotNull(tag).startsWith("c-") ?
                Revision.fromString(tag.substring(2)) : rev;
    }

    /**
     * Closes the obj its of type {@link java.io.Closeable}. It is mostly
     * used to close Iterator/Iterables which are backed by say DBCursor
     *
     * @param obj object to close
     */
    public static void closeIfCloseable(Object obj){
        if(obj instanceof Closeable){
            try{
                ((Closeable) obj).close();
            } catch (IOException e) {
                LOG.warn("Error occurred while closing {}", obj, e);
            }
        }
    }

    /**
     * Provides a readable string for given timestamp
     */
    public static String timestampToString(long timestamp){
        return (new Timestamp(timestamp) + "00").substring(0, 23);
    }

    /**
     * Returns the revision with the newer timestamp or {@code null} if both
     * revisions are {@code null}. The implementation will return the first
     * revision if both have the same timestamp.
     *
     * @param a the first revision (or {@code null}).
     * @param b the second revision (or {@code null}).
     * @return the revision with the newer timestamp.
     */
    @CheckForNull
    public static Revision max(@Nullable Revision a, @Nullable Revision b) {
        return max(a, b, StableRevisionComparator.INSTANCE);
    }

    /**
     * Returns the revision which is considered more recent or {@code null} if
     * both revisions are {@code null}. The implementation will return the first
     * revision if both are considered equal. The comparison is done using the
     * provided comparator.
     *
     * @param a the first revision (or {@code null}).
     * @param b the second revision (or {@code null}).
     * @param c the comparator.
     * @return the revision considered more recent.
     */
    @CheckForNull
    public static Revision max(@Nullable Revision a,
                               @Nullable Revision b,
                               @Nonnull Comparator c) {
        if (a == null) {
            return b;
        } else if (b == null) {
            return a;
        }
        return c.compare(a, b) >= 0 ? a : b;
    }

    /**
     * Returns the revision with the older timestamp or {@code null} if both
     * revisions are {@code null}. The implementation will return the first
     * revision if both have the same timestamp.
     *
     * @param a the first revision (or {@code null}).
     * @param b the second revision (or {@code null}).
     * @return the revision with the older timestamp.
     */
    @CheckForNull
    public static Revision min(@Nullable Revision a, @Nullable Revision b) {
        return min(a, b, StableRevisionComparator.INSTANCE);
    }

    /**
     * Returns the revision which is considered older or {@code null} if
     * both revisions are {@code null}. The implementation will return the first
     * revision if both are considered equal. The comparison is done using the
     * provided comparator.
     *
     * @param a the first revision (or {@code null}).
     * @param b the second revision (or {@code null}).
     * @param c the comparator.
     * @return the revision considered more recent.
     */
    @CheckForNull
    public static Revision min(@Nullable Revision a,
                               @Nullable Revision b,
                               @Nonnull Comparator c) {
        if (a == null) {
            return b;
        } else if (b == null) {
            return a;
        }
        return c.compare(a, b) <= 0 ? a : b;
    }

    // default batch size for paging through a document store
    private static final int DEFAULT_BATCH_SIZE = 100;

    /**
     * Returns an {@link Iterable} over all {@link NodeDocument}s in the given
     * store. The returned {@linkplain Iterable} does not guarantee a consistent
     * view on the store. it may return documents that have been added to the
     * store after this method had been called.
     *
     * @param store
     *            a {@link DocumentStore}.
     * @return an {@link Iterable} over all documents in the store.
     */
    public static Iterable getAllDocuments(final DocumentStore store) {
        return internalGetSelectedDocuments(store, null, 0, DEFAULT_BATCH_SIZE);
    }

    /**
     * Returns the root node document of the given document store. The returned
     * document is retrieved from the document store via
     * {@link DocumentStore#find(Collection, String)}, which means the
     * implementation is allowed to return a cached version of the document.
     * The document is therefore not guaranteed to be up-to-date.
     *
     * @param store a document store.
     * @return the root document.
     * @throws IllegalStateException if there is no root document.
     */
    @Nonnull
    public static NodeDocument getRootDocument(@Nonnull DocumentStore store) {
        String rootId = Utils.getIdFromPath("/");
        NodeDocument root = store.find(Collection.NODES, rootId);
        if (root == null) {
            throw new IllegalStateException("missing root document");
        }
        return root;
    }

    /**
     * Returns an {@link Iterable} over all {@link NodeDocument}s in the given
     * store matching a condition on an indexed property. The returned
     * {@link Iterable} does not guarantee a consistent view on the store.
     * it may return documents that have been added to the store after this
     * method had been called.
     *
     * @param store
     *            a {@link DocumentStore}.
     * @param indexedProperty the name of the indexed property.
     * @param startValue the lower bound value for the indexed property
     *                   (inclusive).
     * @param batchSize number of documents to fetch at once
     * @return an {@link Iterable} over all documents in the store matching the
     *         condition
     */
    public static Iterable getSelectedDocuments(
            DocumentStore store, String indexedProperty, long startValue, int batchSize) {
        return internalGetSelectedDocuments(store, indexedProperty, startValue, batchSize);
    }

    /**
     * Like {@link #getSelectedDocuments(DocumentStore, String, long, int)} with
     * a default {@code batchSize}.
     */
    public static Iterable getSelectedDocuments(
            DocumentStore store, String indexedProperty, long startValue) {
        return internalGetSelectedDocuments(store, indexedProperty, startValue, DEFAULT_BATCH_SIZE);
    }

    private static Iterable internalGetSelectedDocuments(
            final DocumentStore store, final String indexedProperty,
            final long startValue, final int batchSize) {
        if (batchSize < 2) {
            throw new IllegalArgumentException("batchSize must be > 1");
        }
        return new Iterable() {
            @Override
            public Iterator iterator() {
                return new AbstractIterator() {

                    private String startId = NodeDocument.MIN_ID_VALUE;

                    private Iterator batch = nextBatch();

                    @Override
                    protected NodeDocument computeNext() {
                        // read next batch if necessary
                        if (!batch.hasNext()) {
                            batch = nextBatch();
                        }

                        NodeDocument doc;
                        if (batch.hasNext()) {
                            doc = batch.next();
                            // remember current id
                            startId = doc.getId();
                        } else {
                            doc = endOfData();
                        }
                        return doc;
                    }

                    private Iterator nextBatch() {
                        List result = indexedProperty == null ? store.query(Collection.NODES, startId,
                                NodeDocument.MAX_ID_VALUE, batchSize) : store.query(Collection.NODES, startId,
                                NodeDocument.MAX_ID_VALUE, indexedProperty, startValue, batchSize);
                        return result.iterator();
                    }
                };
            }
        };
    }

    /**
     * @return if {@code path} represent oak's internal path. That is, a path
     *          element start with a colon.
     */
    public static boolean isHiddenPath(@Nonnull String path) {
        return path.contains("/:");
    }

    /**
     * Transforms the given {@link Iterable} from {@link String} to
     * {@link StringValue} elements. The {@link Iterable} must no have
     * {@code null} values.
     */
    public static Iterable asStringValueIterable(
            @Nonnull Iterable values) {
        return transform(values, new Function() {
            @Override
            public StringValue apply(String input) {
                return new StringValue(input);
            }
        });
    }

    /**
     * Transforms the given paths into ids using {@link #getIdFromPath(String)}.
     */
    public static Iterable pathToId(@Nonnull Iterable paths) {
        return transform(paths, new Function() {
            @Override
            public String apply(String input) {
                return getIdFromPath(input);
            }
        });
    }

    /**
     * Returns the highest timestamp of all the passed external revisions.
     * A revision is considered external if the clusterId is different from the
     * passed {@code localClusterId}.
     *
     * @param revisions the revisions to consider.
     * @param localClusterId the id of the local cluster node.
     * @return the highest timestamp or {@link Long#MIN_VALUE} if none of the
     *          revisions is external.
     */
    public static long getMaxExternalTimestamp(Iterable revisions,
                                               int localClusterId) {
        long maxTime = Long.MIN_VALUE;
        for (Revision r : revisions) {
            if (r.getClusterId() == localClusterId) {
                continue;
            }
            maxTime = Math.max(maxTime, r.getTimestamp());
        }
        return maxTime;
    }

    /**
     * Returns the given number instance as a {@code Long}.
     *
     * @param n a number or {@code null}.
     * @return the number converted to a {@code Long} or {@code null}
     *      if {@code n} is {@code null}.
     */
    public static Long asLong(@Nullable Number n) {
        if (n == null) {
            return null;
        } else if (n instanceof Long) {
            return (Long) n;
        } else {
            return n.longValue();
        }
    }

    /**
     * Returns the minimum timestamp to use for a query for child documents that
     * have been modified between {@code fromRev} and {@code toRev}.
     *
     * @param fromRev the from revision.
     * @param toRev the to revision.
     * @param minRevisions the minimum revisions of foreign cluster nodes. These
     *                     are derived from the startTime of a cluster node.
     * @return the minimum timestamp.
     */
    public static long getMinTimestampForDiff(@Nonnull RevisionVector fromRev,
                                              @Nonnull RevisionVector toRev,
                                              @Nonnull RevisionVector minRevisions) {
        // make sure we have minimum revisions for all known cluster nodes
        fromRev = fromRev.pmax(minRevisions);
        toRev = toRev.pmax(minRevisions);
        // keep only revision entries that changed
        RevisionVector from = fromRev.difference(toRev);
        RevisionVector to = toRev.difference(fromRev);
        // now calculate minimum timestamp
        long min = Long.MAX_VALUE;
        for (Revision r : from) {
            min = Math.min(r.getTimestamp(), min);
        }
        for (Revision r : to) {
            min = Math.min(r.getTimestamp(), min);
        }
        return min;
    }

    /**
     * Returns true if all the revisions in the {@code a} greater or equals
     * to their counterparts in {@code b}. If {@code b} contains revisions
     * for cluster nodes that are not present in {@code a}, return false.
     *
     * @param a
     * @param b
     * @return true if all the revisions in the {@code a} are at least
     * as recent as their counterparts in the {@code b}
     */
    public static boolean isGreaterOrEquals(@Nonnull RevisionVector a,
                                            @Nonnull RevisionVector b) {
        return a.pmax(b).equals(a);
    }

    /**
     * Wraps the given iterable and aborts iteration over elements when the
     * predicate on an element evaluates to {@code false}.
     *
     * @param iterable the iterable to wrap.
     * @param p the predicate.
     * @return the aborting iterable.
     */
    public static  Iterable abortingIterable(final Iterable iterable,
                                                   final Predicate p) {
        checkNotNull(iterable);
        checkNotNull(p);
        return new Iterable() {
            @Override
            public Iterator iterator() {
                final Iterator it = iterable.iterator();
                return new AbstractIterator() {
                    @Override
                    protected T computeNext() {
                        if (it.hasNext()) {
                            T next = it.next();
                            if (p.apply(next)) {
                                return next;
                            }
                        }
                        return endOfData();
                    }
                };
            }
        };
    }
}