All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.apache.jackrabbit.oak.segment.SegmentWriter Maven / Gradle / Ivy

There is a newer version: 1.74.0
Show newest version
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */
package org.apache.jackrabbit.oak.segment;

import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkElementIndex;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkPositionIndex;
import static com.google.common.base.Preconditions.checkPositionIndexes;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Iterables.addAll;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Lists.newArrayListWithCapacity;
import static com.google.common.collect.Lists.newArrayListWithExpectedSize;
import static com.google.common.collect.Lists.partition;
import static com.google.common.collect.Maps.newHashMap;
import static com.google.common.io.ByteStreams.read;
import static java.util.Arrays.asList;
import static java.util.Collections.emptyMap;
import static java.util.Collections.nCopies;
import static org.apache.jackrabbit.oak.api.Type.BINARIES;
import static org.apache.jackrabbit.oak.api.Type.BINARY;
import static org.apache.jackrabbit.oak.api.Type.NAME;
import static org.apache.jackrabbit.oak.api.Type.NAMES;
import static org.apache.jackrabbit.oak.api.Type.STRING;
import static org.apache.jackrabbit.oak.segment.MapRecord.BUCKETS_PER_LEVEL;
import static org.apache.jackrabbit.oak.segment.RecordWriters.newNodeStateWriter;

import java.io.ByteArrayInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.SequenceInputStream;
import java.util.Arrays;
import java.util.Collection;
import java.util.List;
import java.util.Map;

import javax.annotation.CheckForNull;
import javax.annotation.Nonnull;
import javax.annotation.Nullable;
import javax.jcr.PropertyType;

import com.google.common.base.Supplier;
import com.google.common.base.Suppliers;
import com.google.common.io.Closeables;
import org.apache.commons.math3.stat.descriptive.SynchronizedDescriptiveStatistics;
import org.apache.jackrabbit.oak.api.Blob;
import org.apache.jackrabbit.oak.api.PropertyState;
import org.apache.jackrabbit.oak.api.Type;
import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean;
import org.apache.jackrabbit.oak.plugins.memory.ModifiedNodeState;
import org.apache.jackrabbit.oak.segment.WriteOperationHandler.WriteOperation;
import org.apache.jackrabbit.oak.spi.blob.BlobStore;
import org.apache.jackrabbit.oak.spi.state.ChildNodeEntry;
import org.apache.jackrabbit.oak.spi.state.DefaultNodeStateDiff;
import org.apache.jackrabbit.oak.spi.state.NodeState;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * A {@code SegmentWriter} converts nodes, properties, values, etc. to records and
 * persists them with the help of a {@link WriteOperationHandler}.
 * All public methods of this class are thread safe if and only if the
 * {@link WriteOperationHandler} passed to the constructor is thread safe.
 */
public class SegmentWriter {
    private static final Logger LOG = LoggerFactory.getLogger(SegmentWriter.class);

    static final int BLOCK_SIZE = 1 << 12; // 4kB

    @Nonnull
    private final WriterCacheManager cacheManager;

    @Nonnull
    private final SegmentStore store;

    @Nonnull
    private final SegmentReader reader;

    @CheckForNull
    private final BlobStore blobStore;

    @Nonnull
    private final WriteOperationHandler writeOperationHandler;

    @Nonnull
    private final BinaryReferenceConsumer binaryReferenceConsumer;

    /**
     * Create a new instance of a {@code SegmentWriter}. Note the thread safety properties
     * pointed out in the class comment.
     *
     * @param store      store to write to
     * @param reader     segment reader for the {@code store}
     * @param blobStore  the blog store or {@code null} for inlined blobs
     * @param cacheManager  cache manager instance for the de-duplication caches used by this writer
     * @param writeOperationHandler  handler for write operations.
     */
    public SegmentWriter(@Nonnull SegmentStore store,
                         @Nonnull SegmentReader reader,
                         @Nullable BlobStore blobStore,
                         @Nonnull WriterCacheManager cacheManager,
            @Nonnull WriteOperationHandler writeOperationHandler,
            @Nonnull BinaryReferenceConsumer binaryReferenceConsumer
    ) {
        this.store = checkNotNull(store);
        this.reader = checkNotNull(reader);
        this.blobStore = blobStore;
        this.cacheManager = checkNotNull(cacheManager);
        this.writeOperationHandler = checkNotNull(writeOperationHandler);
        this.binaryReferenceConsumer = checkNotNull(binaryReferenceConsumer);
    }

    public void flush() throws IOException {
        writeOperationHandler.flush();
    }

    /**
     * @return  Statistics for the string deduplication cache or {@code null} if not available.
     */
    @CheckForNull
    public CacheStatsMBean getStringCacheStats() {
        return cacheManager.getStringCacheStats();
    }

    /**
     * @return  Statistics for the template deduplication cache or {@code null} if not available.
     */
    @CheckForNull
    public CacheStatsMBean getTemplateCacheStats() {
        return cacheManager.getTemplateCacheStats();
    }

    /**
     * @return  Statistics for the node deduplication cache or {@code null} if not available.
     */
    @CheckForNull
    public CacheStatsMBean getNodeCacheStats() {
        return cacheManager.getNodeCacheStats();
    }

    /**
     * Write a map record.
     * @param base      base map relative to which the {@code changes} are applied ot
     *                  {@code null} for the empty map.
     * @param changes   the changed mapping to apply to the {@code base} map.
     * @return          the map record written
     * @throws IOException
     */
    @Nonnull
    public MapRecord writeMap(@Nullable final MapRecord base,
                              @Nonnull final Map changes)
    throws IOException {
        RecordId mapId = writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeMap(base, changes);
            }
        });
        return new MapRecord(reader, mapId);
    }

    /**
     * Write a list record.
     * @param list  the list to write.
     * @return      the record id of the list written
     * @throws IOException
     */
    @Nonnull
    public RecordId writeList(@Nonnull final List list) throws IOException {
        return writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeList(list);
            }
        });
    }

    /**
     * Write a string record.
     * @param string  the string to write.
     * @return         the record id of the string written.
     * @throws IOException
     */
    @Nonnull
    public RecordId writeString(@Nonnull final String string) throws IOException {
        return writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeString(string);
            }
        });
    }

    /**
     * Write a blob (as list of block records)
     * @param blob  blob to write
     * @return      The segment blob written
     * @throws IOException
     */
    @Nonnull
    public SegmentBlob writeBlob(@Nonnull final Blob blob) throws IOException {
        RecordId blobId = writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeBlob(blob);
            }
        });
        return new SegmentBlob(blobStore, blobId);
    }

    /**
     * Writes a block record containing the given block of bytes.
     *
     * @param bytes source buffer
     * @param offset offset within the source buffer
     * @param length number of bytes to write
     * @return block record identifier
     */
    @Nonnull
    public RecordId writeBlock(@Nonnull final byte[] bytes, final int offset, final int length)
    throws IOException {
        return writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeBlock(bytes, offset, length);
            }
        });
    }

    /**
     * Writes a stream value record. The given stream is consumed and closed by
     * this method.
     *
     * @param stream stream to be written
     * @return blob for the passed {@code stream}
     * @throws IOException if the input stream could not be read or the output could not be written
     */
    @Nonnull
    public SegmentBlob writeStream(@Nonnull final InputStream stream) throws IOException {
        RecordId blobId = writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeStream(stream);
            }
        });
        return new SegmentBlob(blobStore, blobId);
    }

    /**
     * Write a property.
     * @param state  the property to write
     * @return       the property state written
     * @throws IOException
     */
    @Nonnull
    public SegmentPropertyState writeProperty(@Nonnull final PropertyState state)
    throws IOException {
        RecordId id = writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return with(writer).writeProperty(state);
            }
        });
        return new SegmentPropertyState(reader, id, state.getName(), state.getType());
    }

    /**
     * Write a node state
     * @param state node state to write
     * @return segment node state equal to {@code state}
     * @throws IOException
     */
    @Nonnull
    public SegmentNodeState writeNode(@Nonnull final NodeState state) throws IOException {
        RecordId nodeId = writeOperationHandler.execute(new SegmentWriteOperation() {
            @Override
            public RecordId execute(SegmentBufferWriter writer) throws IOException {
                return new CompactionStats(writeNodeStats, compactNodeStats, false)
                        .writeNode(this, writer, state);
            }
        });
        return new SegmentNodeState(reader, this, nodeId);
    }

    /**
     * Write a node state, unless cancelled using a dedicated write operation handler.
     * The write operation handler is automatically {@link WriteOperationHandler#flush() flushed}
     * once the node has been written successfully.
     * @param state   node state to write
     * @param writeOperationHandler  the write operation handler through which all write calls
     *                               induced by by this call are routed.
     * @param cancel  supplier to signal cancellation of this write operation
     * @return segment node state equal to {@code state} or {@code null} if cancelled.
     * @throws IOException
     */
    @CheckForNull
    public SegmentNodeState writeNode(@Nonnull final NodeState state,
                                      @Nonnull WriteOperationHandler writeOperationHandler,
                                      @Nonnull Supplier cancel)
    throws IOException {
        try {
            RecordId nodeId = writeOperationHandler.execute(new SegmentWriteOperation(cancel) {
                @Override
                public RecordId execute(SegmentBufferWriter writer) throws IOException {
                    return new CompactionStats(writeNodeStats, compactNodeStats, true)
                            .writeNode(this, writer, state);
                }
            });
            writeOperationHandler.flush();
            return new SegmentNodeState(reader, this, nodeId);
        } catch (SegmentWriteOperation.CancelledWriteException ignore) {
            return null;
        }
    }

    private final SynchronizedDescriptiveStatistics writeNodeStats = new SynchronizedDescriptiveStatistics();
    private final SynchronizedDescriptiveStatistics compactNodeStats = new SynchronizedDescriptiveStatistics();

    // FIXME OAK-4445: Collect write statistics: clean this up:
    // - It should be possible to switch the statistics on/off. There should be no
    // performance penalty when off.
    // - Expose via logging and/or MBean?
    // - What metrics should we collect? Use the Metrics API!?
    // - Decouple this from the SegmentWriter
    private static class CompactionStats {
        @Nonnull
        private final SynchronizedDescriptiveStatistics writeNodeStats;
        @Nonnull
        private final SynchronizedDescriptiveStatistics compactNodeStats;

        /*
         * {@code true} iff this is an explicit compaction (vs. an implicit
         * and deferred one triggered by a commit referring to an "old" base
         * state.
         */
        private final boolean isCompaction;

        /*
         * Total number of nodes in the subtree rooted at the node passed
         * to {@link #writeNode(SegmentWriteOperation, SegmentBufferWriter, NodeState)}
         */
        public int nodeCount;

        /*
         * Number of cache hits for a deferred compacted node
         */
        public int cacheHits;

        /*
         * Number of cache misses for a deferred compacted node
         */
        public int cacheMiss;

        /*
         * Number of nodes that where de-duplicated as the store already contained
         * them.
         */
        public int deDupNodes;

        /*
         * Number of nodes that actually had to be written as there was no de-duplication
         * and a cache miss (in case of a deferred compaction).
         */
        public int writesOps;

        public CompactionStats(
                @Nonnull SynchronizedDescriptiveStatistics writeNodeStats,
                @Nonnull SynchronizedDescriptiveStatistics compactNodeStats,
                boolean isCompaction) {
            this.writeNodeStats = writeNodeStats;
            this.compactNodeStats = compactNodeStats;
            this.isCompaction = isCompaction;
        }

        /*
         * The operation caused a deferred compaction iff it accessed the cache.
         */
        public boolean isDeferredCompactionOp() {
            return cacheHits + cacheMiss > 0;
        }

        @Nonnull
        public RecordId writeNode(
                @Nonnull SegmentWriteOperation op,
                @Nonnull SegmentBufferWriter writer,
                @Nonnull NodeState state)
        throws IOException {
            long t = System.nanoTime();
            try {
                return op.with(writer).with(this).writeNode(state, 0);
            } finally {
                if (isCompaction) {
                    LOG.info("Write node stats: {}", writeNodeStats);
                    LOG.info("Compact node stats: {}", compactNodeStats);
                    writeNodeStats.clear();
                    compactNodeStats.clear();
                } else {
                    if (isDeferredCompactionOp()) {
                        compactNodeStats.addValue(System.nanoTime() - t);
                        LOG.info(toString());
                    } else {
                        writeNodeStats.addValue(System.nanoTime() - t);
                    }
                }
            }
        }

        @Override
        public String toString() {
            return "NodeStats{" +
                "op=" + (isDeferredCompactionOp() ? "compact" : "write") +
                ", nodeCount=" + nodeCount +
                ", writeOps=" + writesOps +
                ", deDupNodes=" + deDupNodes +
                ", cacheHits=" + cacheHits +
                ", cacheMiss=" + cacheMiss +
                ", hitRate=" + (100*(double) cacheHits / ((double) cacheHits + (double) cacheMiss)) +
                '}';
        }
    }

    /**
     * This {@code WriteOperation} implementation is used internally to provide
     * context to a recursive chain of calls without having pass the context
     * as a separate argument (a poor mans monad). As such it is entirely
     * not thread safe.
     */
    private abstract class SegmentWriteOperation implements WriteOperation {

        /**
         * This exception is used internally to signal cancellation of a (recursive)
         * write node operation.
         */
        private class CancelledWriteException extends IOException {
            public CancelledWriteException() {
                super("Cancelled write operation");
            }
        }

        @Nonnull
        private final Supplier cancel;

        @CheckForNull
        private CompactionStats compactionStats;

        private SegmentBufferWriter writer;
        private RecordCache stringCache;
        private RecordCache