All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.sleepycat.je.tree.StorageSize Maven / Gradle / Ivy

The newest version!
/*-
 * Copyright (C) 2002, 2018, Oracle and/or its affiliates. All rights reserved.
 *
 * This file was distributed by Oracle as part of a version of Oracle Berkeley
 * DB Java Edition made available at:
 *
 * http://www.oracle.com/technetwork/database/database-technologies/berkeleydb/downloads/index.html
 *
 * Please see the LICENSE file included in the top-level directory of the
 * appropriate version of Oracle Berkeley DB Java Edition for a copy of the
 * license and additional information.
 */

package com.sleepycat.je.tree;

/**
 * Contains static methods for estimating record storage size.
 *
 * Currently this only applies to KVS because we assume that VLSNs are
 * preserved.
 */
public class StorageSize {

    /*
     * Maximum size of the per-LN overhead.
     *
     * The overhead is variable and depends on several factors, see
     * LNLogEntry.getSize(). The following cases are considered:
     *
     *  25: cleaned and migrated LN (no txn info), no TTL:
     *      22: header (type, checksum, flags, prevOffset, size, vlsn)
     *      2: data length
     *      1: flags
     *
     *  43: insertion, with TTL:
     *      25: same as above
     *      2: expiration
     *      8: txnId
     *      8: lastLoggedLsn
     *
     *  53: update, with TTL:
     *      43: same as above
     *      8: abortLsn
     *      2: abortExpiration
     *
     * 50 is used as a conservative estimate for LN_OVERHEAD. Updates will be
     * relatively infrequent.
     */
    private final static int LN_OVERHEAD = 50;

    /*
     * Maximum size of the per-slot overhead.
     *
     * The overhead is variable and depends on several factors, see
     * IN.getLogSize. The following cases are considered:
     *
     *  11: Minimum for all cases
     *      8: lsn
     *      1: keySize
     *      1: state
     *      1: expiration
     *
     *  12: Secondary DB, with TTL
     *      11: minimum above
     *      1: data size
     *
     *  13: Separate LN in primary DB, with TTL
     *      11: minimum above
     *      2: lastLoggedSize
     *
     *  20: Embedded LN in primary DB, with TTL
     *      11: minimum above
     *      1: data size
     *      8: vlsn
     *
     * 12 is used for SEC_SLOT_OVERHEAD as a conservative estimate.
     *
     * 14 is used for PRI_SLOT_OVERHEAD and in the customer formula for both
     * the separate LN and embedded LN cases. The slot overhead for the
     * embedded case will be larger, but in that case there are significant
     * savings because the primary key is not duplicated.
     */
    private final static int SEC_SLOT_OVERHEAD = 12;
    private final static int PRI_SLOT_OVERHEAD = 14;
    private final static int PRI_EMBEDDED_LN_SLOT_OVERHEAD = 20;

    /* Static methods only. */
    private StorageSize() {}

    /**
     * Returns the estimated disk storage size for the record in the given BIN
     * slot. This method does not fetch the LN.
     * 

* For KVS, a formula that customers will use to predict the storage for a * given set of records, not including obsolete size (size available for * reclamation by the cleaner), is as follows. *

* The storage overhead for a single Row (JE primary record) is: *

     *  Serialized size of the Row, all fields (JE key + data size)
     *    +
     *  Serialized size of the PrimaryKey fields (JE key size)
     *    +
     *  Fixed per-Row internal overhead (64: LN_OVERHEAD + PRI_SLOT_OVERHEAD)
     * 
* * The storage overhead for an Index record is: *
     *  Serialized size of the IndexKey fields (JE key size)
     *    +
     *  Serialized size of the PrimaryKey fields (JE data size)
     *    +
     *  Fixed per-IndexKey internal overhead (12: SEC_SLOT_OVERHEAD)
     * 
* * This method returns the size estimate for an actual record based on the * use of that formula, getting the key and data size (or lastLoggedSize) * from the BIN. The amount calculated using the formula above will * normally be larger than the size returned by this method, for several * reasons: *
    *
  • * This method uses the key size after it is reduced by prefix * compression. *
  • *
  • * For a separate (non-embedded) LN, this method uses the lastLoggedSize * rather than adding LN_OVERHEAD to the data size (this is why * LN_OVERHEAD is not referenced in code here). This is more accurate * since the actual LN overhead is reduced due to integer packing, etc. * Also, this method cannot fetch the LN, so the data size is unknown. *
  • *
  • * For an embedded LN in a primary DB, the returned size does not * include the LN size, since the LN is always obsolete. This means the * primary key size is not counted redundantly and the LN_OVERHEAD is not * included in the return value, as they are in the formula. These are * significant differences, but since embedded LNs require a data size * LTE 16, this is not expected to be a common use case. If it becomes * common, we should add a new case for this to the customer formula. *
  • *
* * In addition, the size returned by this method will normally be larger * than the actual storage size on disk. This is because this method uses * PRI_SLOT_OVERHEAD and SEC_SLOT_OVERHEAD to calculate the Btree slot * space, rather than using the serialized size of the slot. These constant * values are somewhat larger than the actual overheads, since they do not * take into account integer packing, etc. See the comments above these * constants. The serialized slot size was not used here for simplicity and * speed, plus this additional size compensates for uncounted sizes such as * per-BIN and UIN overhead. * * @return the estimated storage size, or zero when the size is unknown * because a non-embedded LN is not resident and the LN was logged with a * JE version prior to 6.0. */ public static int getStorageSize(final BIN bin, final int idx) { final int storedKeySize = bin.getStoredKeySize(idx); /* * For a JE secondary DB record (KVS Index record), return: * * data-size + key-size + SEC_SLOT_OVERHEAD * * where data-size is serialized IndexKey size * and key-size is serialized PrimaryKey size. * * The storedKeySize includes key-size, data-size, and one extra byte * for data (primary key) size. We subtract it here because it is * included in SEC_SLOT_OVERHEAD. */ if (bin.getDatabase().getSortedDuplicates()) { return storedKeySize - 1 + SEC_SLOT_OVERHEAD; } /* * For an embedded-LN JE primary DB record (KVS Row): * * Return data-size + key-size + PRI_SLOT_OVERHEAD * * where (data-size + key-size) is serialized Row size * and key-size is serialized PrimaryKey size * * The storedKeySize includes key-size, data-size, and one extra byte * for data (primary key) size. We subtract it here because it is * included in PRI_EMBEDDED_LN_SLOT_OVERHEAD. */ if (bin.isEmbeddedLN(idx)) { return storedKeySize - 1 + PRI_EMBEDDED_LN_SLOT_OVERHEAD; } /* * For a separate (non-embedded) JE primary DB record (KVS Row): * * Return LN-log-size + key-size + PRI_SLOT_OVERHEAD * * where LN-log-size is LN_OVERHEAD (or less) + data-size + key-size * and (data-size + key-size) is serialized Row size * and key-size is serialized PrimaryKey size * * The storedKeySize is the key-size alone. */ final int lastLoggedSize = bin.getLastLoggedSize(idx); if (lastLoggedSize == 0) { /* Size is unknown. */ return 0; } return lastLoggedSize + storedKeySize + PRI_SLOT_OVERHEAD; } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy