All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ucar.nc2.iosp.hdf5.FractalHeap Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */

package ucar.nc2.iosp.hdf5;

import ucar.unidata.io.RandomAccessFile;
import ucar.unidata.util.SpecialMathFunction;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;

/**
 * HDF5 fractal heaps
 *
 * @author caron
 * @since 6/27/12
 */
public class FractalHeap {
  private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(FractalHeap.class);

  // level 1E "Fractal Heap" used for both Global and Local heaps in 1.8.0+
  /*
   * 1) the root indirect block knows how many rows it has from the header, which i can divide into
   * direct and indirect using:
   * 
   * int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2;
   * 
   * in the example file i have, maxDirectBlockSize = 216, startingBlockSize = 2^10, tableWidth = 4, so
   * maxrows = 8. So I will see 8 rows, with direct sizes:
   * 2^10, 2^10, 2^11, 2^12, 2^13, 2^14, 2^15, 2^16
   * 
   * So if nrows > 8, I will see indirect rows of size
   * 2^17, 2^18, .....
   * 
   * this value is the .
   * 
   * 2) now read a 1st level indirect block of size 217:
   * 
   *  = lg2() - lg2()) + 1
   * 
   *  = 17 - 10 - 2 + 1 = 6.
   * 
   * All indirect blocks of "size" 2^17 will have: (for the parameters above)
   * row 0: (direct blocks): 4 x 2^10 = 2^12
   * row 1: (direct blocks): 4 x 2^10 = 2^12
   * row 2: (direct blocks): 4 x 2^11 = 2^13
   * row 3: (direct blocks): 4 x 2^12 = 2^14
   * row 4: (direct blocks): 4 x 2^13 = 2^15
   * row 5: (direct blocks): 4 x 2^14 = 2^16
   * ===============
   * Total size: 2^17
   * 
   * Then there are 7 rows for indirect block of size 218, 8 rows for indirect block of size 219, etc.
   * An indirect block of size 2^20 will have nine rows, the last one of which are indirect blocks that are size 2^17,
   * an indirect block of size 2^21 will have ten rows, the last two rows of which are indirect blocks that are size
   * 2^17 & 2^18, etc.
   * 
   * One still uses
   * 
   * int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2
   * 
   * Where startingBlockSize is from the header, ie the same for all indirect blocks.
   * 
   * 
   */
  private java.io.PrintStream debugOut = System.out;
  static boolean debugDetail, debugFractalHeap, debugPos;

  private final H5headerIF h5;
  private final RandomAccessFile raf;

  int version;
  short heapIdLen;
  byte flags;
  int maxSizeOfObjects;
  long nextHugeObjectId, freeSpace, managedSpace, allocatedManagedSpace, offsetDirectBlock, nManagedObjects,
      sizeHugeObjects, nHugeObjects, sizeTinyObjects, nTinyObjects;
  long btreeAddressHugeObjects, freeSpaceTrackerAddress;

  short maxHeapSize, startingNumRows, currentNumRows;
  long maxDirectBlockSize;
  short tableWidth;
  long startingBlockSize;

  long rootBlockAddress;
  IndirectBlock rootBlock;

  // filters
  short ioFilterLen;
  long sizeFilteredRootDirectBlock;
  int ioFilterMask;
  byte[] ioFilterInfo;

  DoublingTable doublingTable;
  BTree2 btreeHugeObjects;


  public FractalHeap(H5headerIF h5, String forWho, long address, MemTracker memTracker) throws IOException {
    this.h5 = h5;
    this.raf = h5.getRandomAccessFile();

    // header information is in le byte order
    raf.order(RandomAccessFile.LITTLE_ENDIAN);
    raf.seek(h5.getFileOffset(address));

    if (debugDetail)
      debugOut.println("-- readFractalHeap position=" + raf.getFilePointer());

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FRHP"))
      throw new IllegalStateException(magic + " should equal FRHP");

    version = raf.readByte();
    heapIdLen = raf.readShort(); // bytes
    ioFilterLen = raf.readShort(); // bytes
    flags = raf.readByte();

    maxSizeOfObjects = raf.readInt(); // greater than this are huge objects
    nextHugeObjectId = h5.readLength(); // next id to use for a huge object
    btreeAddressHugeObjects = h5.readOffset(); // v2 btee to track huge objects
    freeSpace = h5.readLength(); // total free space in managed direct blocks
    freeSpaceTrackerAddress = h5.readOffset();
    managedSpace = h5.readLength(); // total amount of managed space in the heap
    allocatedManagedSpace = h5.readLength(); // total amount of managed space in the heap actually allocated
    offsetDirectBlock = h5.readLength(); // linear heap offset where next direct block should be allocated
    nManagedObjects = h5.readLength(); // number of managed objects in the heap
    sizeHugeObjects = h5.readLength(); // total size of huge objects in the heap (in bytes)
    nHugeObjects = h5.readLength(); // number huge objects in the heap
    sizeTinyObjects = h5.readLength(); // total size of tiny objects packed in heap Ids (in bytes)
    nTinyObjects = h5.readLength(); // number of tiny objects packed in heap Ids

    tableWidth = raf.readShort(); // number of columns in the doubling table for managed blocks, must be power of 2
    startingBlockSize = h5.readLength(); // starting direct block size in bytes, must be power of 2
    maxDirectBlockSize = h5.readLength(); // maximum direct block size in bytes, must be power of 2
    maxHeapSize = raf.readShort(); // log2 of the maximum size of heap's linear address space, in bytes
    startingNumRows = raf.readShort(); // starting number of rows of the root indirect block, 0 = maximum needed
    rootBlockAddress = h5.readOffset(); // This is the address of the root block for the heap.
                                        // It can be the undefined address if there is no data in the heap.
                                        // It either points to a direct block (if the Current # of Rows in the Root
                                        // Indirect Block value is 0), or an indirect block.
    currentNumRows = raf.readShort(); // current number of rows of the root indirect block, 0 = direct block

    boolean hasFilters = (ioFilterLen > 0);
    if (hasFilters) {
      sizeFilteredRootDirectBlock = h5.readLength();
      ioFilterMask = raf.readInt();
      ioFilterInfo = new byte[ioFilterLen];
      raf.readFully(ioFilterInfo);
    }
    int checksum = raf.readInt();

    if (debugDetail || debugFractalHeap) {
      debugOut.println("FractalHeap for " + forWho + " version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen="
          + ioFilterLen + " flags= " + flags);
      debugOut.println(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId
          + " btreeAddress=" + btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace="
          + allocatedManagedSpace + " freeSpace=" + freeSpace);
      debugOut.println(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects="
          + nTinyObjects + " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize);
      debugOut.println(" DoublingTable: tableWidth=" + tableWidth + " startingBlockSize=" + startingBlockSize);
      debugOut.println(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows
          + " currentNumRows=" + currentNumRows);
    }
    if (debugPos)
      debugOut.println("    *now at position=" + raf.getFilePointer());

    long pos = raf.getFilePointer();
    if (debugDetail)
      debugOut.println("-- end FractalHeap position=" + raf.getFilePointer());
    int hsize = 8 + 2 * h5.getSizeLengths() + h5.getSizeOffsets();
    if (memTracker != null)
      memTracker.add("Group FractalHeap (" + forWho + ")", address, pos);

    doublingTable = new DoublingTable(tableWidth, startingBlockSize, allocatedManagedSpace, maxDirectBlockSize);

    // data
    rootBlock = new IndirectBlock(currentNumRows, startingBlockSize);

    if (currentNumRows == 0) {
      DataBlock dblock = new DataBlock();
      doublingTable.blockList.add(dblock);
      readDirectBlock(h5.getFileOffset(rootBlockAddress), address, dblock);
      dblock.size = startingBlockSize; // - dblock.extraBytes; // removed 10/1/2013
      rootBlock.add(dblock);

    } else {

      readIndirectBlock(rootBlock, h5.getFileOffset(rootBlockAddress), address, hasFilters);

      // read in the direct blocks
      for (DataBlock dblock : doublingTable.blockList) {
        if (dblock.address > 0) {
          readDirectBlock(h5.getFileOffset(dblock.address), address, dblock);
          // dblock.size -= dblock.extraBytes; // removed 10/1/2013
        }
      }
    }

  }

  public void showDetails(Formatter f) {
    f.format("FractalHeap version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen=" + ioFilterLen + " flags= "
        + flags + "%n");
    f.format(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId + " btreeAddress="
        + btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace=" + allocatedManagedSpace
        + " freeSpace=" + freeSpace + "%n");
    f.format(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects=" + nTinyObjects
        + " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize + "%n");
    f.format(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows + " currentNumRows="
        + currentNumRows + "%n%n");
    rootBlock.showDetails(f);
    // doublingTable.showDetails(f);
  }


  public DHeapId getFractalHeapId(byte[] heapId) {
    return new DHeapId(heapId);
  }

  public class DHeapId {

    int type;
    int subtype; // 1 = indirect no filter, 2 = indirect, filter 3 = direct, no filter, 4 = direct, filter
    int n; // the offset field size
    int m;
    int offset; // This field is the offset of the object in the heap.
    int size; // This field is the length of the object in the heap

    DHeapId(byte[] heapId) {
      type = (heapId[0] & 0x30) >> 4;

      if (type == 0) {
        n = maxHeapSize / 8; // This field's size is the minimum number of bytes necessary to encode the Maximum Heap
        // Size value
        m = h5.getNumBytesFromMax(maxDirectBlockSize - 1); // This field is the length of the object in the heap.
        // It is determined by taking the minimum value of Maximum Direct Block Size and Maximum Size of Managed Objects
        // in the Fractal Heap Header.
        // Again, the minimum number of bytes needed to encode that value is used for the size of this field.

        offset = h5.makeIntFromBytes(heapId, 1, n);
        size = h5.makeIntFromBytes(heapId, 1 + n, m);
      } else if (type == 1) {
        // how fun to guess the subtype
        boolean hasBtree = (btreeAddressHugeObjects > 0);
        boolean hasFilters = (ioFilterLen > 0);
        if (hasBtree)
          subtype = hasFilters ? 2 : 1;
        else
          subtype = hasFilters ? 4 : 3;

        switch (subtype) {
          case 1:
          case 2:
            offset = h5.makeIntFromBytes(heapId, 1, (heapId.length - 1));
            break;
        }
      } else if (type == 2) {
        /*
         * The sub-type for tiny heap IDs depends on whether the heap ID is large enough to store objects greater than
         * 16 bytes or not.
         * If the heap ID length is 18 bytes or smaller, the "normal" tiny heap ID form is used. If the heap ID length
         * is greater than 18 bytes in length,
         * the "extented" form is used.
         */
        subtype = (heapId.length <= 18) ? 1 : 2; // 0 == normal, 1 = extended
      } else {
        throw new UnsupportedOperationException(); // "DHeapId subtype ="+subtype);
      }
    }

    public long getPos() throws IOException {
      switch (type) {
        case 0:
          return doublingTable.getPos(offset);
        case 1: {
          switch (subtype) {
            case 1:
            case 2:
              if (btreeHugeObjects == null) {
                btreeHugeObjects = new BTree2(h5, "FractalHeap btreeHugeObjects", btreeAddressHugeObjects);
                assert btreeHugeObjects.btreeType == subtype;
              }
              BTree2.Record1 record1 = btreeHugeObjects.getEntry1(offset);
              if (record1 == null) {
                btreeHugeObjects.getEntry1(offset); // debug
                throw new RuntimeException("Cant find DHeapId=" + offset);
              }
              return record1.hugeObjectAddress;

            case 3:
            case 4:
              return offset; // guess
          }
        }
        default:
          throw new RuntimeException("Unknown DHeapId type =" + type);
      }
    }

    public String toString() {
      return type + "," + n + "," + m + "," + offset + "," + size;
    }

    public void show(Formatter f) throws IOException {
      f.format("   %2d %2d %2d %6d %4d %8d", type, n, m, offset, size, getPos());
    }

  }

  private class DoublingTable {
    int tableWidth;
    long startingBlockSize, managedSpace, maxDirectBlockSize;
    // int nrows, nDirectRows, nIndirectRows;
    List blockList;

    DoublingTable(int tableWidth, long startingBlockSize, long managedSpace, long maxDirectBlockSize) {
      this.tableWidth = tableWidth;
      this.startingBlockSize = startingBlockSize;
      this.managedSpace = managedSpace;
      this.maxDirectBlockSize = maxDirectBlockSize;
      this.blockList = new ArrayList<>(tableWidth * currentNumRows);
    }

    private int calcNrows(long max) {
      int n = 0;
      long sizeInBytes = 0;
      long blockSize = startingBlockSize;
      while (sizeInBytes < max) {
        sizeInBytes += blockSize * tableWidth;
        n++;
        if (n > 1)
          blockSize *= 2;
      }
      return n;
    }

    private void assignSizes() {
      int block = 0;
      long blockSize = startingBlockSize;
      for (DataBlock db : blockList) {
        db.size = blockSize;
        block++;
        if ((block % tableWidth == 0) && (block / tableWidth > 1))
          blockSize *= 2;
      }
    }

    long getPos(long offset) {
      int block = 0;
      for (DataBlock db : blockList) {
        if (db.address < 0)
          continue;
        if ((offset >= db.offset) && (offset <= db.offset + db.size)) {
          long localOffset = offset - db.offset;
          return db.dataPos + localOffset;
        }
        block++;
      }

      log.error("DoublingTable: illegal offset=" + offset);
      // return -1; // temporary skip
      throw new IllegalStateException("offset=" + offset);
    }

    void showDetails(Formatter f) {
      f.format(" DoublingTable: tableWidth= %d startingBlockSize = %d managedSpace=%d maxDirectBlockSize=%d%n",
          tableWidth, startingBlockSize, managedSpace, maxDirectBlockSize);
      f.format(" DataBlocks:%n");
      f.format("  address            dataPos            offset size%n");
      for (DataBlock dblock : blockList) {
        f.format("  %#-18x %#-18x %5d  %4d%n", dblock.address, dblock.dataPos, dblock.offset, dblock.size);
      }
    }
  }

  private class IndirectBlock {
    long size;
    int nrows, directRows, indirectRows;
    List directBlocks;
    List indirectBlocks;

    IndirectBlock(int nrows, long iblock_size) {
      this.nrows = nrows;
      this.size = iblock_size;

      if (nrows < 0) {
        double n = SpecialMathFunction.log2(iblock_size) - SpecialMathFunction.log2(startingBlockSize * tableWidth) + 1;
        nrows = (int) n;
      }

      int maxrows_directBlocks =
          (int) (SpecialMathFunction.log2(maxDirectBlockSize) - SpecialMathFunction.log2(startingBlockSize)) + 2;
      if (nrows < maxrows_directBlocks) {
        directRows = nrows;
        indirectRows = 0;
      } else {
        directRows = maxrows_directBlocks;
        indirectRows = (nrows - maxrows_directBlocks);
      }
      if (debugFractalHeap)
        debugOut.println("  readIndirectBlock directChildren" + directRows + " indirectChildren= " + indirectRows);
    }

    void add(DataBlock dblock) {
      if (directBlocks == null)
        directBlocks = new ArrayList<>();
      directBlocks.add(dblock);
    }

    void add(IndirectBlock iblock) {
      if (indirectBlocks == null)
        indirectBlocks = new ArrayList<>();
      indirectBlocks.add(iblock);
    }

    void showDetails(Formatter f) {
      f.format("%n IndirectBlock: nrows= %d directRows = %d indirectRows=%d startingSize=%d%n", nrows, directRows,
          indirectRows, size);
      f.format(" DataBlocks:%n");
      f.format("  address            dataPos            offset size end%n");
      if (directBlocks != null)
        for (DataBlock dblock : directBlocks)
          f.format("  %#-18x %#-18x %5d  %4d %5d %n", dblock.address, dblock.dataPos, dblock.offset, dblock.size,
              (dblock.offset + dblock.size));
      if (indirectBlocks != null)
        for (IndirectBlock iblock : indirectBlocks)
          iblock.showDetails(f);
    }
  }

  private static class DataBlock {
    long address;
    long sizeFilteredDirectBlock;
    int filterMask;

    long dataPos;
    long offset;
    long size;
    int extraBytes;
    boolean wasRead; // when empty, object exists, but fields are not init. not yet sure where to use.

    @Override
    public String toString() {
      return "DataBlock{" + "offset=" + offset + ", size=" + size + ", dataPos=" + dataPos + '}';
    }
  }

  void readIndirectBlock(IndirectBlock iblock, long pos, long heapAddress, boolean hasFilter) throws IOException {
    raf.seek(pos);

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FHIB"))
      throw new IllegalStateException(magic + " should equal FHIB");

    byte version = raf.readByte();
    long heapHeaderAddress = h5.readOffset();
    if (heapAddress != heapHeaderAddress)
      throw new IllegalStateException();

    int nbytes = maxHeapSize / 8;
    if (maxHeapSize % 8 != 0)
      nbytes++;
    long blockOffset = h5.readVariableSizeUnsigned(nbytes);

    if (debugDetail || debugFractalHeap) {
      debugOut.println(" -- FH IndirectBlock version=" + version + " blockOffset= " + blockOffset);
    }

    long npos = raf.getFilePointer();
    if (debugPos)
      debugOut.println("    *now at position=" + npos);

    // child direct blocks
    long blockSize = startingBlockSize;
    for (int row = 0; row < iblock.directRows; row++) {

      if (row > 1)
        blockSize *= 2;

      for (int i = 0; i < doublingTable.tableWidth; i++) {
        DataBlock directBlock = new DataBlock();
        iblock.add(directBlock);

        directBlock.address = h5.readOffset(); // This field is the address of the child direct block. The size of the
                                               // [uncompressed] direct block can be computed by its offset in the
                                               // heap's linear address space.
        if (hasFilter) {
          directBlock.sizeFilteredDirectBlock = h5.readLength();
          directBlock.filterMask = raf.readInt();
        }
        if (debugDetail || debugFractalHeap)
          debugOut.println("  DirectChild " + i + " address= " + directBlock.address);

        directBlock.size = blockSize;

        // if (directChild.address >= 0)
        doublingTable.blockList.add(directBlock);
      }
    }

    // child indirect blocks
    for (int row = 0; row < iblock.indirectRows; row++) {
      blockSize *= 2;
      for (int i = 0; i < doublingTable.tableWidth; i++) {
        IndirectBlock iblock2 = new IndirectBlock(-1, blockSize);
        iblock.add(iblock2);

        long childIndirectAddress = h5.readOffset();
        if (debugDetail || debugFractalHeap)
          debugOut.println("  InDirectChild " + row + " address= " + childIndirectAddress);
        if (childIndirectAddress >= 0)
          readIndirectBlock(iblock2, childIndirectAddress, heapAddress, hasFilter);
      }
    }

  }

  void readDirectBlock(long pos, long heapAddress, DataBlock dblock) throws IOException {
    if (pos < 0)
      return; // means its empty
    raf.seek(pos);

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FHDB"))
      throw new IllegalStateException(magic + " should equal FHDB");

    byte version = raf.readByte();
    long heapHeaderAddress = h5.readOffset(); // This is the address for the fractal heap header that this block belongs
                                              // to. This field is principally used for file integrity checking.
    if (heapAddress != heapHeaderAddress)
      throw new IllegalStateException();

    dblock.extraBytes = 5; // keep track of how much room is taken out of block size, that is, how much is left for the
                           // object
    dblock.extraBytes += h5.isOffsetLong() ? 8 : 4;

    int nbytes = maxHeapSize / 8;
    if (maxHeapSize % 8 != 0)
      nbytes++;
    dblock.offset = h5.readVariableSizeUnsigned(nbytes); // This is the offset of the block within the fractal heap's
                                                         // address space (in bytes).
    dblock.dataPos = pos; // raf.getFilePointer(); // offsets are from the start of the block

    dblock.extraBytes += nbytes;
    if ((flags & 2) != 0)
      dblock.extraBytes += 4; // ?? size of checksum
    // dblock.size -= size; // subtract space used by other fields

    dblock.wasRead = true;
    if (debugDetail || debugFractalHeap)
      debugOut.println("  DirectBlock offset= " + dblock.offset + " dataPos = " + dblock.dataPos);
  }

} // FractalHeap




© 2015 - 2025 Weber Informatics LLC | Privacy Policy