org.apache.jackrabbit.oak.plugins.segment.SegmentBufferWriter Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of oak-segment Show documentation
There is a newer version: 1.6.23
/*
 * Licensed to the Apache Software Foundation (ASF) under one
 * or more contributor license agreements.  See the NOTICE file
 * distributed with this work for additional information
 * regarding copyright ownership.  The ASF licenses this file
 * to you under the Apache License, Version 2.0 (the
 * "License"); you may not use this file except in compliance
 * with the License.  You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing,
 * software distributed under the License is distributed on an
 * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 * KIND, either express or implied.  See the License for the
 * specific language governing permissions and limitations
 * under the License.
 */

package org.apache.jackrabbit.oak.plugins.segment;

import static com.google.common.base.Charsets.UTF_8;
import static com.google.common.base.Preconditions.checkArgument;
import static com.google.common.base.Preconditions.checkNotNull;
import static com.google.common.base.Preconditions.checkState;
import static com.google.common.collect.Lists.newArrayList;
import static com.google.common.collect.Maps.newLinkedHashMap;
import static com.google.common.collect.Sets.newHashSet;
import static java.lang.System.arraycopy;
import static java.lang.System.currentTimeMillis;
import static java.lang.System.identityHashCode;
import static org.apache.jackrabbit.oak.plugins.segment.RecordWriters.newValueWriter;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.MAX_SEGMENT_SIZE;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.RECORD_ID_BYTES;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.SEGMENT_REFERENCE_LIMIT;
import static org.apache.jackrabbit.oak.plugins.segment.Segment.align;

import java.io.IOException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;

import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/**
 * This class encapsulates the state of a segment being written. It provides methods
 * for writing primitive data types and for pre-allocating buffer space in the current
 * segment. Should the current segment not have enough space left the current segment
 * is flushed and a fresh one is allocated.
 * 
 * The common usage pattern is:
 * 
 *    SegmentBufferWriter writer = ...
 *    writer.prepare(...)  // allocate buffer
 *    writer.writeXYZ(...)
 * 
 * The behaviour of this class is undefined should the pre-allocated buffer be
 * overrun be calling any of the write methods.
 */
class SegmentBufferWriter {
    private static final Logger LOG = LoggerFactory.getLogger(SegmentBufferWriter.class);

    /**
     * The set of root records (i.e. ones not referenced by other records)
     * in this segment.
     */
    private final Map roots = newLinkedHashMap();

    /**
     * Identifiers of the external blob references stored in this segment.
     */
    private final List blobrefs = newArrayList();

    private final SegmentStore store;

    /**
     * Version of the segment storage format.
     */
    private final SegmentVersion version;

    /**
     * Id of this writer.
     */
    private final String wid;

    private final SegmentTracker tracker;

    /**
     * The segment write buffer, filled from the end to the beginning
     * (see OAK-629).
     */
    private byte[] buffer;

    private Segment segment;

    /**
     * The number of bytes already written (or allocated). Counted from
     * the end of the buffer.
     */
    private int length;

    /**
     * Current write position within the buffer. Grows up when raw data
     * is written, but shifted downwards by the prepare methods.
     */
    private int position;

    public SegmentBufferWriter(SegmentStore store, SegmentVersion version, String wid) throws IOException {
        this.store = store;
        this.version = version;
        this.wid = (wid == null
                ? "w-" + identityHashCode(this)
                : wid);

        this.tracker = store.getTracker();
        this.buffer = createNewBuffer(version);
        newSegment(this.wid);
    }

    /**
     * Allocate a new segment and write the segment meta data.
     * The segment meta data is a string of the format {@code "{wid=W,sno=S,gc=G,t=T}"}
     * where:
     * 
     * {@code W} is the writer id {@code wid}, 
     * {@code S} is a unique, increasing sequence number corresponding to the allocation order
     * of the segments in this store, 
     * {@code G} is the garbage collection generation (i.e. the number of compaction cycles
     * that have been run),
     * {@code T} is a time stamp according to {@link System#currentTimeMillis()}.
     * 
     * The segment meta data is guaranteed to be the first string record in a segment.
     * @param wid  the writer id
     */
    private void newSegment(String wid) throws IOException {
        this.segment = new Segment(tracker, buffer);
        String metaInfo = "{\"wid\":\"" + wid + '"' +
                ",\"sno\":" + tracker.getNextSegmentNo() +
                ",\"gc\":" + tracker.getCompactionMap().getGeneration() +
                ",\"t\":" + currentTimeMillis() + "}";

        byte[] data = metaInfo.getBytes(UTF_8);
        newValueWriter(data.length, data).write(this);
    }

    static byte[] createNewBuffer(SegmentVersion v) {
        byte[] buffer = new byte[Segment.MAX_SEGMENT_SIZE];
        buffer[0] = '0';
        buffer[1] = 'a';
        buffer[2] = 'K';
        buffer[3] = SegmentVersion.asByte(v);
        buffer[4] = 0; // reserved
        buffer[5] = 0; // refcount
        return buffer;
    }

    public void writeByte(byte value) {
        buffer[position++] = value;
    }

    public void writeShort(short value) {
        buffer[position++] = (byte) (value >> 8);
        buffer[position++] = (byte) value;
    }

    public void writeInt(int value) {
        buffer[position++] = (byte) (value >> 24);
        buffer[position++] = (byte) (value >> 16);
        buffer[position++] = (byte) (value >> 8);
        buffer[position++] = (byte) value;
    }

    public void writeLong(long value) {
        writeInt((int) (value >> 32));
        writeInt((int) value);
    }

    /**
     * Write a record id, and marks the record id as referenced (removes it from
     * the unreferenced set).
     *
     * @param listId the record id
     */
    public void writeRecordId(RecordId listId) {
        checkNotNull(listId);
        roots.remove(listId);

        int offset = listId.getOffset();
        checkState(0 <= offset && offset < MAX_SEGMENT_SIZE);
        checkState(offset == align(offset, 1 << Segment.RECORD_ALIGN_BITS));

        buffer[position++] = (byte) getSegmentRef(listId.getSegmentId());
        buffer[position++] = (byte) (offset >> (8 + Segment.RECORD_ALIGN_BITS));
        buffer[position++] = (byte) (offset >> Segment.RECORD_ALIGN_BITS);
    }

    private int getSegmentRef(SegmentId segmentId) {
        int refCount = segment.getRefCount();
        if (refCount > SEGMENT_REFERENCE_LIMIT) {
            throw new SegmentOverflowException(
                    "Segment cannot have more than 255 references " + segment.getSegmentId());
        }
        for (int index = 0; index < refCount; index++) {
            if (segmentId.equals(segment.getRefId(index))) {
                return index;
            }
        }

        ByteBuffer.wrap(buffer, refCount * 16, 16)
                .putLong(segmentId.getMostSignificantBits())
                .putLong(segmentId.getLeastSignificantBits());
        buffer[Segment.REF_COUNT_OFFSET] = (byte) refCount;
        return refCount;
    }

    public void writeBytes(byte[] data, int offset, int length) {
        arraycopy(data, offset, buffer, position, length);
        position += length;
    }

    public void addBlobRef(RecordId blobId) {
        blobrefs.add(blobId);
    }

    /**
     * Adds a segment header to the buffer and writes a segment to the segment
     * store. This is done automatically (called from prepare) when there is not
     * enough space for a record. It can also be called explicitly.
     */
    public void flush() throws IOException {
        if (length > 0) {
            int refcount = segment.getRefCount();

            int rootcount = roots.size();
            buffer[Segment.ROOT_COUNT_OFFSET] = (byte) (rootcount >> 8);
            buffer[Segment.ROOT_COUNT_OFFSET + 1] = (byte) rootcount;

            int blobrefcount = blobrefs.size();
            buffer[Segment.BLOBREF_COUNT_OFFSET] = (byte) (blobrefcount >> 8);
            buffer[Segment.BLOBREF_COUNT_OFFSET + 1] = (byte) blobrefcount;

            length = align(
                    refcount * 16 + rootcount * 3 + blobrefcount * 2 + length,
                    16);

            checkState(length <= buffer.length);

            int pos = refcount * 16;
            if (pos + length <= buffer.length) {
                // the whole segment fits to the space *after* the referenced
                // segment identifiers we've already written, so we can safely
                // copy those bits ahead even if concurrent code is still
                // reading from that part of the buffer
                arraycopy(buffer, 0, buffer, buffer.length - length, pos);
                pos += buffer.length - length;
            } else {
                // this might leave some empty space between the header and
                // the record data, but this case only occurs when the
                // segment is >252kB in size and the maximum overhead is <<4kB,
                // which is acceptable
                length = buffer.length;
            }

            for (Map.Entry entry : roots.entrySet()) {
                int offset = entry.getKey().getOffset();
                buffer[pos++] = (byte) entry.getValue().ordinal();
                buffer[pos++] = (byte) (offset >> (8 + Segment.RECORD_ALIGN_BITS));
                buffer[pos++] = (byte) (offset >> Segment.RECORD_ALIGN_BITS);
            }

            for (RecordId blobref : blobrefs) {
                int offset = blobref.getOffset();
                buffer[pos++] = (byte) (offset >> (8 + Segment.RECORD_ALIGN_BITS));
                buffer[pos++] = (byte) (offset >> Segment.RECORD_ALIGN_BITS);
            }

            SegmentId segmentId = segment.getSegmentId();
            int segmentOffset = buffer.length - length;

            LOG.debug("Writing data segment {} ({} bytes)", segmentId, length);
            store.writeSegment(segmentId, buffer, segmentOffset, length);

            // Keep this segment in memory as it's likely to be accessed soon
            ByteBuffer data;
            if (segmentOffset > 4096) {
                data = ByteBuffer.allocate(length);
                data.put(buffer, segmentOffset, length);
                data.rewind();
            } else {
                data = ByteBuffer.wrap(buffer, segmentOffset, length);
            }

            // It is important to put the segment into the cache only *after* it has been
            // written to the store since as soon as it is in the cache it becomes eligible
            // for eviction, which might lead to SNFEs when it is not yet in the store at that point.
            tracker.setSegment(segmentId, new Segment(tracker, segmentId, data));

            buffer = createNewBuffer(version);
            roots.clear();
            blobrefs.clear();
            length = 0;
            position = buffer.length;
            newSegment(wid);
        }
    }

    /**
     * Before writing a record (which are written backwards, from the end of the
     * file to the beginning), this method is called, to ensure there is enough
     * space. A new segment is also created if there is not enough space in the
     * segment lookup table or elsewhere.
     * 
     * This method does not actually write into the segment, just allocates the
     * space (flushing the segment if needed and starting a new one), and sets
     * the write position (records are written from the end to the beginning,
     * but within a record from left to right).
     *
     * @param type the record type (only used for root records)
     * @param size the size of the record, excluding the size used for the
     *            record ids
     * @param ids the record ids
     * @return a new record id
     */
    public RecordId prepare(RecordType type, int size, Collection ids) throws IOException {
        checkArgument(size >= 0);
        checkNotNull(ids);

        int idCount = ids.size();
        int recordSize = align(size + idCount * RECORD_ID_BYTES, 1 << Segment.RECORD_ALIGN_BITS);

        // First compute the header and segment sizes based on the assumption
        // that *all* identifiers stored in this record point to previously
        // unreferenced segments.
        int refCount = segment.getRefCount() + idCount;
        int blobRefCount = blobrefs.size() + 1;
        int rootCount = roots.size() + 1;
        int headerSize = refCount * 16 + rootCount * 3 + blobRefCount * 2;
        int segmentSize = align(headerSize + recordSize + length, 16);

        // If the size estimate looks too big, recompute it with a more
        // accurate refCount value. We skip doing this when possible to
        // avoid the somewhat expensive list and set traversals.
        if (segmentSize > buffer.length - 1
                || refCount > Segment.SEGMENT_REFERENCE_LIMIT) {
            refCount -= idCount;

            Set segmentIds = newHashSet();

            // The set of old record ids in this segment
            // that were previously root record ids, but will no longer be,
            // because the record to be written references them.
            // This needs to be a set, because the list of ids can
            // potentially reference the same record multiple times
            Set notRoots = new HashSet();
            for (RecordId recordId : ids) {
                SegmentId segmentId = recordId.getSegmentId();
                if (!(segmentId.equals(segment.getSegmentId()))) {
                    segmentIds.add(segmentId);
                } else if (roots.containsKey(recordId)) {
                    notRoots.add(recordId);
                }
            }
            rootCount -= notRoots.size();

            if (!segmentIds.isEmpty()) {
                for (int refid = 1; refid < refCount; refid++) {
                    segmentIds.remove(segment.getRefId(refid));
                }
                refCount += segmentIds.size();
            }

            headerSize = refCount * 16 + rootCount * 3 + blobRefCount * 2;
            segmentSize = align(headerSize + recordSize + length, 16);
        }

        if (segmentSize > buffer.length - 1
                || blobRefCount > 0xffff
                || rootCount > 0xffff
                || refCount > Segment.SEGMENT_REFERENCE_LIMIT) {
            flush();
        }

        length += recordSize;
        position = buffer.length - length;
        checkState(position >= 0);

        RecordId id = new RecordId(segment.getSegmentId(), position);
        roots.put(id, type);
        return id;
    }

}