All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ucar.nc2.iosp.hdf5.FractalHeap Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */

package ucar.nc2.iosp.hdf5;

import ucar.unidata.io.RandomAccessFile;
import ucar.unidata.util.SpecialMathFunction;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;

/**
 * HDF5 fractal heaps
 *
 * @author caron
 * @since 6/27/12
 */
public class FractalHeap {
  static private org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(FractalHeap.class);

    // level 1E "Fractal Heap" used for both Global and Local heaps in 1.8.0+
  /*
  1) the root indirect block knows how many rows it has from the header, which i can divide into
direct and indirect using:

 int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2;

in the example file i have, maxDirectBlockSize = 216, startingBlockSize = 2^10, tableWidth = 4, so
maxrows = 8. So I will see 8 rows, with direct sizes:
	2^10, 2^10, 2^11, 2^12, 2^13, 2^14, 2^15, 2^16

So if nrows > 8, I will see indirect rows of size
	2^17, 2^18, .....

this value is the .

2) now read a 1st level indirect block of size 217:

 = lg2() - lg2()) + 1

 = 17 - 10 - 2 + 1 = 6.

 All indirect blocks of "size" 2^17 will have: (for the parameters above)
        row 0: (direct blocks): 4 x 2^10 = 2^12
        row 1: (direct blocks): 4 x 2^10 = 2^12
        row 2: (direct blocks): 4 x 2^11 = 2^13
        row 3: (direct blocks): 4 x 2^12 = 2^14
        row 4: (direct blocks): 4 x 2^13 = 2^15
        row 5: (direct blocks): 4 x 2^14 = 2^16
                    ===============
                       Total size: 2^17

Then there are 7 rows for indirect block of size 218, 8 rows for indirect block of size 219, etc.
An indirect block of size 2^20 will have nine rows, the last one of which are indirect blocks that are size 2^17,
an indirect block of size 2^21 will have ten rows, the last two rows of which are indirect blocks that are size
2^17 & 2^18, etc.

One still uses

 int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2

Where startingBlockSize is from the header, ie the same for all indirect blocks.


*/


  private java.io.PrintStream debugOut = System.out;
  static boolean debugDetail, debugFractalHeap, debugPos;

  private final H5header h5;
  private final RandomAccessFile raf;

  int version;
  short heapIdLen;
  byte flags;
  int maxSizeOfObjects;
  long nextHugeObjectId, freeSpace, managedSpace, allocatedManagedSpace, offsetDirectBlock,
          nManagedObjects, sizeHugeObjects, nHugeObjects, sizeTinyObjects, nTinyObjects;
  long btreeAddressHugeObjects, freeSpaceTrackerAddress;

  short maxHeapSize, startingNumRows, currentNumRows;
  long maxDirectBlockSize;
  short tableWidth;
  long startingBlockSize;

  long rootBlockAddress;
  IndirectBlock rootBlock;

  // filters
  short ioFilterLen;
  long sizeFilteredRootDirectBlock;
  int ioFilterMask;
  byte[] ioFilterInfo;

  DoublingTable doublingTable;
  BTree2 btreeHugeObjects;


  FractalHeap(H5header h5, String forWho, long address, MemTracker memTracker) throws IOException {
    this.h5 = h5;
    this.raf = h5.raf;

    // header information is in le byte order
    raf.order(RandomAccessFile.LITTLE_ENDIAN);
    raf.seek(h5.getFileOffset(address));

    if (debugDetail) debugOut.println("-- readFractalHeap position=" + raf.getFilePointer());

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FRHP"))
      throw new IllegalStateException(magic + " should equal FRHP");

    version = raf.readByte();
    heapIdLen = raf.readShort(); // bytes
    ioFilterLen = raf.readShort();  // bytes
    flags = raf.readByte();

    maxSizeOfObjects = raf.readInt(); // greater than this are huge objects
    nextHugeObjectId = h5.readLength(); // next id to use for a huge object
    btreeAddressHugeObjects = h5.readOffset(); // v2 btee to track huge objects
    freeSpace = h5.readLength();  // total free space in managed direct blocks
    freeSpaceTrackerAddress = h5.readOffset();
    managedSpace = h5.readLength(); // total amount of managed space in the heap
    allocatedManagedSpace = h5.readLength(); // total amount of managed space in the heap actually allocated
    offsetDirectBlock = h5.readLength(); // linear heap offset where next direct block should be allocated
    nManagedObjects = h5.readLength();  // number of managed objects in the heap
    sizeHugeObjects = h5.readLength(); // total size of huge objects in the heap (in bytes)
    nHugeObjects = h5.readLength(); // number huge objects in the heap
    sizeTinyObjects = h5.readLength(); // total size of tiny objects packed in heap Ids (in bytes)
    nTinyObjects = h5.readLength(); // number of tiny objects packed in heap Ids

    tableWidth = raf.readShort(); // number of columns in the doubling table for managed blocks, must be power of 2
    startingBlockSize = h5.readLength(); // starting direct block size in bytes, must be power of 2
    maxDirectBlockSize = h5.readLength(); // maximum direct block size in bytes, must be power of 2
    maxHeapSize = raf.readShort(); // log2 of the maximum size of heap's linear address space, in bytes
    startingNumRows = raf.readShort(); // starting number of rows of the root indirect block, 0 = maximum needed
    rootBlockAddress = h5.readOffset(); // This is the address of the root block for the heap.
                                        // It can be the undefined address if there is no data in the heap.
                                        // It either points to a direct block (if the Current # of Rows in the Root Indirect Block value is 0), or an indirect block.
    currentNumRows = raf.readShort(); // current number of rows of the root indirect block, 0 = direct block

    boolean hasFilters = (ioFilterLen > 0);
    if (hasFilters) {
      sizeFilteredRootDirectBlock = h5.readLength();
      ioFilterMask = raf.readInt();
      ioFilterInfo = new byte[ioFilterLen];
      raf.readFully(ioFilterInfo);
    }
    int checksum = raf.readInt();

    if (debugDetail || debugFractalHeap) {
      debugOut.println("FractalHeap for " + forWho + " version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen=" + ioFilterLen + " flags= " + flags);
      debugOut.println(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId + " btreeAddress="
              + btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace=" + allocatedManagedSpace + " freeSpace=" + freeSpace);
      debugOut.println(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects=" + nTinyObjects +
              " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize);
      debugOut.println(" DoublingTable: tableWidth=" + tableWidth + " startingBlockSize=" + startingBlockSize);
      debugOut.println(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows + " currentNumRows=" + currentNumRows);
    }
    if (debugPos) debugOut.println("    *now at position=" + raf.getFilePointer());

    long pos = raf.getFilePointer();
    if (debugDetail) debugOut.println("-- end FractalHeap position=" + raf.getFilePointer());
    int hsize = 8 + 2 * h5.sizeLengths + h5.sizeOffsets;
    if (memTracker != null) memTracker.add("Group FractalHeap (" + forWho + ")", address, pos);

    doublingTable = new DoublingTable(tableWidth, startingBlockSize, allocatedManagedSpace, maxDirectBlockSize);

    // data
    rootBlock = new IndirectBlock(currentNumRows, startingBlockSize);

    if (currentNumRows == 0) {
      DataBlock dblock = new DataBlock();
      doublingTable.blockList.add(dblock);
      readDirectBlock(h5.getFileOffset(rootBlockAddress), address, dblock);
      dblock.size = startingBlockSize; // - dblock.extraBytes;  // removed 10/1/2013
      rootBlock.add(dblock);

    } else {

      readIndirectBlock(rootBlock, h5.getFileOffset(rootBlockAddress), address, hasFilters);

      // read in the direct blocks
      for (DataBlock dblock : doublingTable.blockList) {
        if (dblock.address > 0) {
          readDirectBlock(h5.getFileOffset(dblock.address), address, dblock);
          // dblock.size -= dblock.extraBytes;  // removed 10/1/2013
        }
      }
    }

  }

  void showDetails(Formatter f) {
    f.format("FractalHeap version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen=" + ioFilterLen + " flags= " + flags + "%n");
    f.format(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId + " btreeAddress="
            + btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace=" + allocatedManagedSpace + " freeSpace=" + freeSpace + "%n");
    f.format(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects=" + nTinyObjects +
            " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize + "%n");
    f.format(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows + " currentNumRows=" + currentNumRows + "%n%n");
    rootBlock.showDetails(f);
    // doublingTable.showDetails(f);
  }


  DHeapId getFractalHeapId(byte[] heapId) throws IOException {
    return new DHeapId(heapId);
  }

  class DHeapId {
    int type;
    int subtype;  // 1 = indirect no filter, 2 = indirect, filter 3 = direct, no filter, 4 = direct, filter
    int n;        // the offset field size
    int m;
    int offset; // This field is the offset of the object in the heap.
    int size;   // This field is the length of the object in the heap

    DHeapId(byte[] heapId) throws IOException {
      type = (heapId[0] & 0x30) >> 4;

      if (type == 0) {
        n = maxHeapSize / 8;      // This field's size is the minimum number of bytes necessary to encode the Maximum Heap Size value
        m = h5.getNumBytesFromMax(maxDirectBlockSize - 1);  // This field is the length of the object in the heap.
        // It is determined by taking the minimum value of Maximum Direct Block Size and Maximum Size of Managed Objects in the Fractal Heap Header.
        // Again, the minimum number of bytes needed to encode that value is used for the size of this field.

        offset = h5.makeIntFromBytes(heapId, 1, n);
        size = h5.makeIntFromBytes(heapId, 1 + n, m);
      }

      else if (type == 1) {
        // how fun to guess the subtype
        boolean hasBtree = (btreeAddressHugeObjects > 0);
        boolean hasFilters = (ioFilterLen > 0);
        if (hasBtree)
          subtype = hasFilters ? 2 : 1;
        else
          subtype = hasFilters ? 4 : 3;

        switch (subtype) {
          case 1:
          case 2:
            offset = h5.makeIntFromBytes(heapId, 1, (heapId.length-1));
            break;
        }
      } else if (type == 2) {
        /* The sub-type for tiny heap IDs depends on whether the heap ID is large enough to store objects greater than 16 bytes or not.
          If the heap ID length is 18 bytes or smaller, the "normal" tiny heap ID form is used. If the heap ID length is greater than 18 bytes in length,
          the "extented" form is used. */
        subtype = (heapId.length <= 18) ? 1 : 2; // 0 == normal, 1 = extended
      }

      else  {
        throw new UnsupportedOperationException(); // "DHeapId subtype ="+subtype);
      }


    }

    long getPos() throws IOException {
      switch (type) {
        case 0:
          return doublingTable.getPos(offset);
        case 1: {
          switch (subtype) {
            case 1:
            case 2:
              if (btreeHugeObjects == null) {
                btreeHugeObjects = new BTree2(h5, "FractalHeap btreeHugeObjects", btreeAddressHugeObjects);
                assert btreeHugeObjects.btreeType == subtype;
              }
              BTree2.Record1 record1 = btreeHugeObjects.getEntry1(offset);
              if (record1 == null) {
                btreeHugeObjects.getEntry1(offset); // debug
                throw new RuntimeException("Cant find DHeapId="+offset);
              }
              return record1.hugeObjectAddress;

            case 3:
            case 4:
              return offset;     // guess
          }
        }
        default:
          throw new RuntimeException("Unknown DHeapId type ="+type);
      }
    }

    public String toString() {
      return type + "," + n + "," + m + "," + offset + "," + size;
    }
  }

  private class DoublingTable {
    int tableWidth;
    long startingBlockSize, managedSpace, maxDirectBlockSize;
    // int nrows, nDirectRows, nIndirectRows;
    List blockList;

    DoublingTable(int tableWidth, long startingBlockSize, long managedSpace, long maxDirectBlockSize) {
      this.tableWidth = tableWidth;
      this.startingBlockSize = startingBlockSize;
      this.managedSpace = managedSpace;
      this.maxDirectBlockSize = maxDirectBlockSize;
      this.blockList = new ArrayList<>(tableWidth * currentNumRows);
    }

    private int calcNrows(long max) {
      int n = 0;
      long sizeInBytes = 0;
      long blockSize = startingBlockSize;
      while (sizeInBytes < max) {
        sizeInBytes += blockSize * tableWidth;
        n++;
        if (n > 1) blockSize *= 2;
      }
      return n;
    }

    private void assignSizes() {
      int block = 0;
      long blockSize = startingBlockSize;
      for (DataBlock db : blockList) {
        db.size = blockSize;
        block++;
        if ((block % tableWidth == 0) && (block / tableWidth > 1))
          blockSize *= 2;
      }
    }

    long getPos(long offset) {
      int block = 0;
      for (DataBlock db : blockList) {
        if (db.address < 0) continue;
        if ((offset >= db.offset) && (offset <= db.offset + db.size)) {
          long localOffset = offset - db.offset;
          //System.out.println("   heap ID find block= "+block+" db.dataPos " + db.dataPos+" localOffset= "+localOffset);
          return db.dataPos + localOffset;
        }
        block++;
      }

      log.error("DoublingTable: illegal offset=" + offset);
      //return -1; // temporary skip
      throw new IllegalStateException("offset=" + offset);
    }

    void showDetails(Formatter f) {
      f.format(" DoublingTable: tableWidth= %d startingBlockSize = %d managedSpace=%d maxDirectBlockSize=%d%n",
              tableWidth, startingBlockSize, managedSpace, maxDirectBlockSize);
      //sbuff.append(" nrows=" + nrows + " nDirectRows=" + nDirectRows + " nIndirectRows=" + nIndirectRows+"%n");
      f.format(" DataBlocks:%n");
      f.format("  address            dataPos            offset size%n");
      for (DataBlock dblock : blockList) {
        f.format("  %#-18x %#-18x %5d  %4d%n", dblock.address, dblock.dataPos, dblock.offset, dblock.size);
      }
    }
  }

  private class IndirectBlock {
    long size;
    int nrows, directRows, indirectRows;
    List directBlocks;
    List indirectBlocks;

    IndirectBlock(int nrows, long iblock_size) {
      this.nrows = nrows;
      this.size = iblock_size;

      if (nrows < 0) {
        double n = SpecialMathFunction.log2(iblock_size) - SpecialMathFunction.log2(startingBlockSize * tableWidth) + 1;
        nrows = (int) n;
      }

      int maxrows_directBlocks = (int) (SpecialMathFunction.log2(maxDirectBlockSize) - SpecialMathFunction.log2(startingBlockSize)) + 2;
      if (nrows < maxrows_directBlocks) {
        directRows = nrows;
        indirectRows = 0;
      } else {
        directRows = maxrows_directBlocks;
        indirectRows = (nrows - maxrows_directBlocks);
      }
      if (debugFractalHeap)
        debugOut.println("  readIndirectBlock directChildren" + directRows + " indirectChildren= " + indirectRows);
    }

    void add(DataBlock dblock) {
      if (directBlocks == null)
        directBlocks = new ArrayList<>();
      directBlocks.add(dblock);
    }

    void add(IndirectBlock iblock) {
      if (indirectBlocks == null)
        indirectBlocks = new ArrayList<>();
      indirectBlocks.add(iblock);
    }

    void showDetails(Formatter f) {
      f.format("%n IndirectBlock: nrows= %d directRows = %d indirectRows=%d startingSize=%d%n",
              nrows, directRows, indirectRows, size);
      //sbuff.append(" nrows=" + nrows + " nDirectRows=" + nDirectRows + " nIndirectRows=" + nIndirectRows+"%n");
      f.format(" DataBlocks:%n");
      f.format("  address            dataPos            offset size end%n");
      if (directBlocks != null)
        for (DataBlock dblock : directBlocks)
          f.format("  %#-18x %#-18x %5d  %4d %5d %n", dblock.address, dblock.dataPos, dblock.offset, dblock.size,
                  (dblock.offset + dblock.size));
      if (indirectBlocks != null)
        for (IndirectBlock iblock : indirectBlocks)
          iblock.showDetails(f);
    }
  }

  private static class DataBlock {
    long address;
    long sizeFilteredDirectBlock;
    int filterMask;

    long dataPos;
    long offset;
    long size;
    int extraBytes;
    boolean wasRead; // when empty, object exists, but fields are not init. not yet sure where to use.

    @Override
    public String toString() {
      return "DataBlock{" +
              "offset=" + offset +
              ", size=" + size +
              ", dataPos=" + dataPos +
              '}';
    }
  }

  void readIndirectBlock(IndirectBlock iblock, long pos, long heapAddress, boolean hasFilter) throws IOException {
    raf.seek(pos);

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FHIB"))
      throw new IllegalStateException(magic + " should equal FHIB");

    byte version = raf.readByte();
    long heapHeaderAddress = h5.readOffset();
    if (heapAddress != heapHeaderAddress)
      throw new IllegalStateException();

    int nbytes = maxHeapSize / 8;
    if (maxHeapSize % 8 != 0) nbytes++;
    long blockOffset = h5.readVariableSizeUnsigned(nbytes);

    if (debugDetail || debugFractalHeap) {
      debugOut.println(" -- FH IndirectBlock version=" + version + " blockOffset= " + blockOffset);
    }

    long npos = raf.getFilePointer();
    if (debugPos) debugOut.println("    *now at position=" + npos);

    // child direct blocks
    long blockSize = startingBlockSize;
    for (int row = 0; row < iblock.directRows; row++) {

      if (row > 1)
        blockSize *= 2;

      for (int i = 0; i < doublingTable.tableWidth; i++) {
        DataBlock directBlock = new DataBlock();
        iblock.add(directBlock);

        directBlock.address = h5.readOffset();  // This field is the address of the child direct block. The size of the [uncompressed] direct block can be computed by its offset in the heap's linear address space.
        if (hasFilter) {
          directBlock.sizeFilteredDirectBlock = h5.readLength();
          directBlock.filterMask = raf.readInt();
        }
        if (debugDetail || debugFractalHeap)
          debugOut.println("  DirectChild " + i + " address= " + directBlock.address);

        directBlock.size = blockSize;

        //if (directChild.address >= 0)
        doublingTable.blockList.add(directBlock);
      }
    }

    // child indirect blocks
    for (int row = 0; row < iblock.indirectRows; row++) {
      blockSize *= 2;
      for (int i = 0; i < doublingTable.tableWidth; i++) {
        IndirectBlock iblock2 = new IndirectBlock(-1, blockSize);
        iblock.add(iblock2);

        long childIndirectAddress = h5.readOffset();
        if (debugDetail || debugFractalHeap)
          debugOut.println("  InDirectChild " + row + " address= " + childIndirectAddress);
        if (childIndirectAddress >= 0)
          readIndirectBlock(iblock2, childIndirectAddress, heapAddress, hasFilter);
      }
    }

  }

  void readDirectBlock(long pos, long heapAddress, DataBlock dblock) throws IOException {
    if (pos < 0) return; // means its empty
    raf.seek(pos);

    // header
    String magic = raf.readString(4);
    if (!magic.equals("FHDB"))
      throw new IllegalStateException(magic + " should equal FHDB");

    byte version = raf.readByte();
    long heapHeaderAddress = h5.readOffset(); // This is the address for the fractal heap header that this block belongs to. This field is principally used for file integrity checking.
    if (heapAddress != heapHeaderAddress)
      throw new IllegalStateException();

    dblock.extraBytes = 5; // keep track of how much room is taken out of block size, that is, how much is left for the object
    dblock.extraBytes += h5.isOffsetLong ? 8 : 4;

    int nbytes = maxHeapSize / 8;
    if (maxHeapSize % 8 != 0) nbytes++;
    dblock.offset = h5.readVariableSizeUnsigned(nbytes); // This is the offset of the block within the fractal heap's address space (in bytes).
    dblock.dataPos = pos; // raf.getFilePointer();  // offsets are from the start of the block

    dblock.extraBytes += nbytes;
    if ((flags & 2) != 0) dblock.extraBytes += 4; // ?? size of checksum
    //dblock.size -= size; // subtract space used by other fields

    dblock.wasRead = true;
    if (debugDetail || debugFractalHeap)
      debugOut.println("  DirectBlock offset= " + dblock.offset + " dataPos = " + dblock.dataPos);
  }

} // FractalHeap




© 2015 - 2024 Weber Informatics LLC | Privacy Policy