org.apache.jackrabbit.oak.segment.data.SegmentData Maven / Gradle / Ivy

Go to download
Show more of this group Show more artifacts with this name
Show all versions of oak-segment-tar Show documentation
There is a newer version: 1.9.6
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.jackrabbit.oak.segment.data;

import java.io.IOException;
import java.io.OutputStream;
import java.nio.charset.StandardCharsets;

import org.apache.jackrabbit.oak.commons.Buffer;

/**
 * Access the data of a segment.
 * 
 * A segment is composed of a header and the proper data. The header has a
 * fixed- and a variable-length part.
 * 

 * The fixed-length part of the header contains a {@link #getSignature()
 * signature}, which is a string that uniquely identifies the rest of the
 * content as a segment; a {@link #getVersion()}, which specifies the version of
 * the binary format used to serialize the content of the segment; a {@link
 * #getFullGeneration() full generation}, which describes the generation of the
 * segment with regards to full compaction; a {@link #getGeneration()
 * generation}, which identifies the generation of the segment with regards to
 * full or tail compaction; a {@link #isCompacted() compacted flag}, which
 * determines if the segment was written by a compaction operation; the {@link
 * #getRecordReferencesCount() number of record references}, which is the number
 * of record entries in the segment; the {@link #getSegmentReferencesCount()
 * number of segment references}, which is the number of identifiers of other
 * segments used by this segment.
 * 

 * The variable part of the header contains the list of segment references and
 * the list of record references. A segment references is composed by the {@link
 * #getSegmentReferenceMsb(int) most significant bits} and {@link
 * #getSegmentReferenceLsb(int) lsb} of the segment identifier. A record
 * reference is composed of a {@link #getRecordReferenceNumber(int) record
 * number}, a {@link #getRecordReferenceType(int) record type} and a {@link
 * #getRecordReferenceOffset(int) record offset}.
 * 
 * The most prominent use for a segment is to hold record data. Many methods of
 * this class allows access to the record data. These methods accept an integer
 * representing an absolute position pointing to the record data. The absolute
 * position, though, is virtual: it is computed on a virtual segment 256K long.
 * This offset is usually obtained by accessing the {@link
 * #getRecordReferenceOffset(int) record offset} of a record reference entry.
 * The class will normalize the offset for the actual size of the segment, which
 * can be smaller than 256K. It is acceptable to displace the offset of a record
 * reference entry by a positive amount. This can be useful to access a field of
 * a composite record saved at a specific offset.
 */
public interface SegmentData {

    int MAX_SMALL_LENGTH_VALUE = 1 << 7;

    int MAX_MEDIUM_LENGTH_VALUE = (1 << 14) + MAX_SMALL_LENGTH_VALUE;

    static SegmentData newSegmentData(Buffer buffer) {
        return SegmentDataLoader.newSegmentData(buffer);
    }

    static SegmentData newRawSegmentData(Buffer buffer) {
        return SegmentDataLoader.newRawSegmentData(buffer);
    }

    byte getVersion();

    String getSignature();

    int getFullGeneration();

    boolean isCompacted();

    int getGeneration();

    int getSegmentReferencesCount();

    int getRecordReferencesCount();

    int getRecordReferenceNumber(int i);

    byte getRecordReferenceType(int i);

    int getRecordReferenceOffset(int i);

    long getSegmentReferenceMsb(int i);

    long getSegmentReferenceLsb(int i);

    default long readLength(int recordReferenceOffset) {
        int head = readByte(recordReferenceOffset) & 0xff;

        if ((head & 0x80) == 0) {
            return head;
        }

        if ((head & 0x40) == 0) {
            return MAX_SMALL_LENGTH_VALUE + (readShort(recordReferenceOffset) & 0x3fff);
        }

        return MAX_MEDIUM_LENGTH_VALUE + (readLong(recordReferenceOffset) & 0x3fffffffffffffffL);
    }

    default StringData readString(int recordReferenceOffset) {
        long length = readLength(recordReferenceOffset);

        if (length >= Integer.MAX_VALUE) {
            throw new IllegalStateException("String is too long: " + length + "; possibly trying to read a "
                                            + "BLOB using getString; can not convert BLOB to String");
        }

        if (length >= MAX_MEDIUM_LENGTH_VALUE) {
            return new StringData(readRecordId(recordReferenceOffset + Long.BYTES), (int) length);
        }

        int index = length >= MAX_SMALL_LENGTH_VALUE
                    ? recordReferenceOffset + Short.BYTES
                    : recordReferenceOffset + Byte.BYTES;
        Buffer buffer = readBytes(index, (int) length);
        String string = buffer.decode(StandardCharsets.UTF_8).toString();
        return new StringData(string, (int)length);
    }

    default RecordIdData readRecordId(int recordReferenceOffset) {
        int segmentReference = readShort(recordReferenceOffset) & 0xffff;
        int recordNumber = readInt(recordReferenceOffset + Short.BYTES);
        return new RecordIdData(segmentReference, recordNumber);
    }

    byte readByte(int recordReferenceOffset);

    int readInt(int recordReferenceOffset);

    short readShort(int recordReferenceOffset);

    long readLong(int recordReferenceOffset);

    Buffer readBytes(int recordReferenceOffset, int size);

    int size();

    void hexDump(OutputStream stream) throws IOException;

    void binDump(OutputStream stream) throws IOException;

    int estimateMemoryUsage();

}