ucar.nc2.iosp.hdf5.FractalHeap Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
* See LICENSE for license information.
*/
package ucar.nc2.iosp.hdf5;
import ucar.unidata.io.RandomAccessFile;
import ucar.unidata.util.SpecialMathFunction;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Formatter;
import java.util.List;
/**
* HDF5 fractal heaps
*
* @author caron
* @since 6/27/12
*/
public class FractalHeap {
private static org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(FractalHeap.class);
// level 1E "Fractal Heap" used for both Global and Local heaps in 1.8.0+
/*
* 1) the root indirect block knows how many rows it has from the header, which i can divide into
* direct and indirect using:
*
* int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2;
*
* in the example file i have, maxDirectBlockSize = 216, startingBlockSize = 2^10, tableWidth = 4, so
* maxrows = 8. So I will see 8 rows, with direct sizes:
* 2^10, 2^10, 2^11, 2^12, 2^13, 2^14, 2^15, 2^16
*
* So if nrows > 8, I will see indirect rows of size
* 2^17, 2^18, .....
*
* this value is the .
*
* 2) now read a 1st level indirect block of size 217:
*
* = lg2() - lg2()) + 1
*
* = 17 - 10 - 2 + 1 = 6.
*
* All indirect blocks of "size" 2^17 will have: (for the parameters above)
* row 0: (direct blocks): 4 x 2^10 = 2^12
* row 1: (direct blocks): 4 x 2^10 = 2^12
* row 2: (direct blocks): 4 x 2^11 = 2^13
* row 3: (direct blocks): 4 x 2^12 = 2^14
* row 4: (direct blocks): 4 x 2^13 = 2^15
* row 5: (direct blocks): 4 x 2^14 = 2^16
* ===============
* Total size: 2^17
*
* Then there are 7 rows for indirect block of size 218, 8 rows for indirect block of size 219, etc.
* An indirect block of size 2^20 will have nine rows, the last one of which are indirect blocks that are size 2^17,
* an indirect block of size 2^21 will have ten rows, the last two rows of which are indirect blocks that are size
* 2^17 & 2^18, etc.
*
* One still uses
*
* int maxrows_directBlocks = (log2(maxDirectBlockSize) - log2(startingBlockSize)) + 2
*
* Where startingBlockSize is from the header, ie the same for all indirect blocks.
*
*
*/
private java.io.PrintStream debugOut = System.out;
static boolean debugDetail, debugFractalHeap, debugPos;
private final H5headerIF h5;
private final RandomAccessFile raf;
int version;
short heapIdLen;
byte flags;
int maxSizeOfObjects;
long nextHugeObjectId, freeSpace, managedSpace, allocatedManagedSpace, offsetDirectBlock, nManagedObjects,
sizeHugeObjects, nHugeObjects, sizeTinyObjects, nTinyObjects;
long btreeAddressHugeObjects, freeSpaceTrackerAddress;
short maxHeapSize, startingNumRows, currentNumRows;
long maxDirectBlockSize;
short tableWidth;
long startingBlockSize;
long rootBlockAddress;
IndirectBlock rootBlock;
// filters
short ioFilterLen;
long sizeFilteredRootDirectBlock;
int ioFilterMask;
byte[] ioFilterInfo;
DoublingTable doublingTable;
BTree2 btreeHugeObjects;
public FractalHeap(H5headerIF h5, String forWho, long address, MemTracker memTracker) throws IOException {
this.h5 = h5;
this.raf = h5.getRandomAccessFile();
// header information is in le byte order
raf.order(RandomAccessFile.LITTLE_ENDIAN);
raf.seek(h5.getFileOffset(address));
if (debugDetail)
debugOut.println("-- readFractalHeap position=" + raf.getFilePointer());
// header
String magic = raf.readString(4);
if (!magic.equals("FRHP"))
throw new IllegalStateException(magic + " should equal FRHP");
version = raf.readByte();
heapIdLen = raf.readShort(); // bytes
ioFilterLen = raf.readShort(); // bytes
flags = raf.readByte();
maxSizeOfObjects = raf.readInt(); // greater than this are huge objects
nextHugeObjectId = h5.readLength(); // next id to use for a huge object
btreeAddressHugeObjects = h5.readOffset(); // v2 btee to track huge objects
freeSpace = h5.readLength(); // total free space in managed direct blocks
freeSpaceTrackerAddress = h5.readOffset();
managedSpace = h5.readLength(); // total amount of managed space in the heap
allocatedManagedSpace = h5.readLength(); // total amount of managed space in the heap actually allocated
offsetDirectBlock = h5.readLength(); // linear heap offset where next direct block should be allocated
nManagedObjects = h5.readLength(); // number of managed objects in the heap
sizeHugeObjects = h5.readLength(); // total size of huge objects in the heap (in bytes)
nHugeObjects = h5.readLength(); // number huge objects in the heap
sizeTinyObjects = h5.readLength(); // total size of tiny objects packed in heap Ids (in bytes)
nTinyObjects = h5.readLength(); // number of tiny objects packed in heap Ids
tableWidth = raf.readShort(); // number of columns in the doubling table for managed blocks, must be power of 2
startingBlockSize = h5.readLength(); // starting direct block size in bytes, must be power of 2
maxDirectBlockSize = h5.readLength(); // maximum direct block size in bytes, must be power of 2
maxHeapSize = raf.readShort(); // log2 of the maximum size of heap's linear address space, in bytes
startingNumRows = raf.readShort(); // starting number of rows of the root indirect block, 0 = maximum needed
rootBlockAddress = h5.readOffset(); // This is the address of the root block for the heap.
// It can be the undefined address if there is no data in the heap.
// It either points to a direct block (if the Current # of Rows in the Root
// Indirect Block value is 0), or an indirect block.
currentNumRows = raf.readShort(); // current number of rows of the root indirect block, 0 = direct block
boolean hasFilters = (ioFilterLen > 0);
if (hasFilters) {
sizeFilteredRootDirectBlock = h5.readLength();
ioFilterMask = raf.readInt();
ioFilterInfo = new byte[ioFilterLen];
raf.readFully(ioFilterInfo);
}
int checksum = raf.readInt();
if (debugDetail || debugFractalHeap) {
debugOut.println("FractalHeap for " + forWho + " version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen="
+ ioFilterLen + " flags= " + flags);
debugOut.println(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId
+ " btreeAddress=" + btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace="
+ allocatedManagedSpace + " freeSpace=" + freeSpace);
debugOut.println(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects="
+ nTinyObjects + " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize);
debugOut.println(" DoublingTable: tableWidth=" + tableWidth + " startingBlockSize=" + startingBlockSize);
debugOut.println(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows
+ " currentNumRows=" + currentNumRows);
}
if (debugPos)
debugOut.println(" *now at position=" + raf.getFilePointer());
long pos = raf.getFilePointer();
if (debugDetail)
debugOut.println("-- end FractalHeap position=" + raf.getFilePointer());
int hsize = 8 + 2 * h5.getSizeLengths() + h5.getSizeOffsets();
if (memTracker != null)
memTracker.add("Group FractalHeap (" + forWho + ")", address, pos);
doublingTable = new DoublingTable(tableWidth, startingBlockSize, allocatedManagedSpace, maxDirectBlockSize);
// data
rootBlock = new IndirectBlock(currentNumRows, startingBlockSize);
if (currentNumRows == 0) {
DataBlock dblock = new DataBlock();
doublingTable.blockList.add(dblock);
readDirectBlock(h5.getFileOffset(rootBlockAddress), address, dblock);
dblock.size = startingBlockSize; // - dblock.extraBytes; // removed 10/1/2013
rootBlock.add(dblock);
} else {
readIndirectBlock(rootBlock, h5.getFileOffset(rootBlockAddress), address, hasFilters);
// read in the direct blocks
for (DataBlock dblock : doublingTable.blockList) {
if (dblock.address > 0) {
readDirectBlock(h5.getFileOffset(dblock.address), address, dblock);
// dblock.size -= dblock.extraBytes; // removed 10/1/2013
}
}
}
}
public void showDetails(Formatter f) {
f.format("FractalHeap version=" + version + " heapIdLen=" + heapIdLen + " ioFilterLen=" + ioFilterLen + " flags= "
+ flags + "%n");
f.format(" maxSizeOfObjects=" + maxSizeOfObjects + " nextHugeObjectId=" + nextHugeObjectId + " btreeAddress="
+ btreeAddressHugeObjects + " managedSpace=" + managedSpace + " allocatedManagedSpace=" + allocatedManagedSpace
+ " freeSpace=" + freeSpace + "%n");
f.format(" nManagedObjects=" + nManagedObjects + " nHugeObjects= " + nHugeObjects + " nTinyObjects=" + nTinyObjects
+ " maxDirectBlockSize=" + maxDirectBlockSize + " maxHeapSize= 2^" + maxHeapSize + "%n");
f.format(" rootBlockAddress=" + rootBlockAddress + " startingNumRows=" + startingNumRows + " currentNumRows="
+ currentNumRows + "%n%n");
rootBlock.showDetails(f);
// doublingTable.showDetails(f);
}
public DHeapId getFractalHeapId(byte[] heapId) {
return new DHeapId(heapId);
}
public class DHeapId {
int type;
int subtype; // 1 = indirect no filter, 2 = indirect, filter 3 = direct, no filter, 4 = direct, filter
int n; // the offset field size
int m;
int offset; // This field is the offset of the object in the heap.
int size; // This field is the length of the object in the heap
DHeapId(byte[] heapId) {
type = (heapId[0] & 0x30) >> 4;
if (type == 0) {
n = maxHeapSize / 8; // This field's size is the minimum number of bytes necessary to encode the Maximum Heap
// Size value
m = h5.getNumBytesFromMax(maxDirectBlockSize - 1); // This field is the length of the object in the heap.
// It is determined by taking the minimum value of Maximum Direct Block Size and Maximum Size of Managed Objects
// in the Fractal Heap Header.
// Again, the minimum number of bytes needed to encode that value is used for the size of this field.
offset = h5.makeIntFromBytes(heapId, 1, n);
size = h5.makeIntFromBytes(heapId, 1 + n, m);
} else if (type == 1) {
// how fun to guess the subtype
boolean hasBtree = (btreeAddressHugeObjects > 0);
boolean hasFilters = (ioFilterLen > 0);
if (hasBtree)
subtype = hasFilters ? 2 : 1;
else
subtype = hasFilters ? 4 : 3;
switch (subtype) {
case 1:
case 2:
offset = h5.makeIntFromBytes(heapId, 1, (heapId.length - 1));
break;
}
} else if (type == 2) {
/*
* The sub-type for tiny heap IDs depends on whether the heap ID is large enough to store objects greater than
* 16 bytes or not.
* If the heap ID length is 18 bytes or smaller, the "normal" tiny heap ID form is used. If the heap ID length
* is greater than 18 bytes in length,
* the "extented" form is used.
*/
subtype = (heapId.length <= 18) ? 1 : 2; // 0 == normal, 1 = extended
} else {
throw new UnsupportedOperationException(); // "DHeapId subtype ="+subtype);
}
}
public long getPos() throws IOException {
switch (type) {
case 0:
return doublingTable.getPos(offset);
case 1: {
switch (subtype) {
case 1:
case 2:
if (btreeHugeObjects == null) {
btreeHugeObjects = new BTree2(h5, "FractalHeap btreeHugeObjects", btreeAddressHugeObjects);
assert btreeHugeObjects.btreeType == subtype;
}
BTree2.Record1 record1 = btreeHugeObjects.getEntry1(offset);
if (record1 == null) {
btreeHugeObjects.getEntry1(offset); // debug
throw new RuntimeException("Cant find DHeapId=" + offset);
}
return record1.hugeObjectAddress;
case 3:
case 4:
return offset; // guess
}
}
default:
throw new RuntimeException("Unknown DHeapId type =" + type);
}
}
public String toString() {
return type + "," + n + "," + m + "," + offset + "," + size;
}
public void show(Formatter f) throws IOException {
f.format(" %2d %2d %2d %6d %4d %8d", type, n, m, offset, size, getPos());
}
}
private class DoublingTable {
int tableWidth;
long startingBlockSize, managedSpace, maxDirectBlockSize;
// int nrows, nDirectRows, nIndirectRows;
List blockList;
DoublingTable(int tableWidth, long startingBlockSize, long managedSpace, long maxDirectBlockSize) {
this.tableWidth = tableWidth;
this.startingBlockSize = startingBlockSize;
this.managedSpace = managedSpace;
this.maxDirectBlockSize = maxDirectBlockSize;
this.blockList = new ArrayList<>(tableWidth * currentNumRows);
}
private int calcNrows(long max) {
int n = 0;
long sizeInBytes = 0;
long blockSize = startingBlockSize;
while (sizeInBytes < max) {
sizeInBytes += blockSize * tableWidth;
n++;
if (n > 1)
blockSize *= 2;
}
return n;
}
private void assignSizes() {
int block = 0;
long blockSize = startingBlockSize;
for (DataBlock db : blockList) {
db.size = blockSize;
block++;
if ((block % tableWidth == 0) && (block / tableWidth > 1))
blockSize *= 2;
}
}
long getPos(long offset) {
int block = 0;
for (DataBlock db : blockList) {
if (db.address < 0)
continue;
if ((offset >= db.offset) && (offset <= db.offset + db.size)) {
long localOffset = offset - db.offset;
return db.dataPos + localOffset;
}
block++;
}
log.error("DoublingTable: illegal offset=" + offset);
// return -1; // temporary skip
throw new IllegalStateException("offset=" + offset);
}
void showDetails(Formatter f) {
f.format(" DoublingTable: tableWidth= %d startingBlockSize = %d managedSpace=%d maxDirectBlockSize=%d%n",
tableWidth, startingBlockSize, managedSpace, maxDirectBlockSize);
f.format(" DataBlocks:%n");
f.format(" address dataPos offset size%n");
for (DataBlock dblock : blockList) {
f.format(" %#-18x %#-18x %5d %4d%n", dblock.address, dblock.dataPos, dblock.offset, dblock.size);
}
}
}
private class IndirectBlock {
long size;
int nrows, directRows, indirectRows;
List directBlocks;
List indirectBlocks;
IndirectBlock(int nrows, long iblock_size) {
this.nrows = nrows;
this.size = iblock_size;
if (nrows < 0) {
double n = SpecialMathFunction.log2(iblock_size) - SpecialMathFunction.log2(startingBlockSize * tableWidth) + 1;
nrows = (int) n;
}
int maxrows_directBlocks =
(int) (SpecialMathFunction.log2(maxDirectBlockSize) - SpecialMathFunction.log2(startingBlockSize)) + 2;
if (nrows < maxrows_directBlocks) {
directRows = nrows;
indirectRows = 0;
} else {
directRows = maxrows_directBlocks;
indirectRows = (nrows - maxrows_directBlocks);
}
if (debugFractalHeap)
debugOut.println(" readIndirectBlock directChildren" + directRows + " indirectChildren= " + indirectRows);
}
void add(DataBlock dblock) {
if (directBlocks == null)
directBlocks = new ArrayList<>();
directBlocks.add(dblock);
}
void add(IndirectBlock iblock) {
if (indirectBlocks == null)
indirectBlocks = new ArrayList<>();
indirectBlocks.add(iblock);
}
void showDetails(Formatter f) {
f.format("%n IndirectBlock: nrows= %d directRows = %d indirectRows=%d startingSize=%d%n", nrows, directRows,
indirectRows, size);
f.format(" DataBlocks:%n");
f.format(" address dataPos offset size end%n");
if (directBlocks != null)
for (DataBlock dblock : directBlocks)
f.format(" %#-18x %#-18x %5d %4d %5d %n", dblock.address, dblock.dataPos, dblock.offset, dblock.size,
(dblock.offset + dblock.size));
if (indirectBlocks != null)
for (IndirectBlock iblock : indirectBlocks)
iblock.showDetails(f);
}
}
private static class DataBlock {
long address;
long sizeFilteredDirectBlock;
int filterMask;
long dataPos;
long offset;
long size;
int extraBytes;
boolean wasRead; // when empty, object exists, but fields are not init. not yet sure where to use.
@Override
public String toString() {
return "DataBlock{" + "offset=" + offset + ", size=" + size + ", dataPos=" + dataPos + '}';
}
}
void readIndirectBlock(IndirectBlock iblock, long pos, long heapAddress, boolean hasFilter) throws IOException {
raf.seek(pos);
// header
String magic = raf.readString(4);
if (!magic.equals("FHIB"))
throw new IllegalStateException(magic + " should equal FHIB");
byte version = raf.readByte();
long heapHeaderAddress = h5.readOffset();
if (heapAddress != heapHeaderAddress)
throw new IllegalStateException();
int nbytes = maxHeapSize / 8;
if (maxHeapSize % 8 != 0)
nbytes++;
long blockOffset = h5.readVariableSizeUnsigned(nbytes);
if (debugDetail || debugFractalHeap) {
debugOut.println(" -- FH IndirectBlock version=" + version + " blockOffset= " + blockOffset);
}
long npos = raf.getFilePointer();
if (debugPos)
debugOut.println(" *now at position=" + npos);
// child direct blocks
long blockSize = startingBlockSize;
for (int row = 0; row < iblock.directRows; row++) {
if (row > 1)
blockSize *= 2;
for (int i = 0; i < doublingTable.tableWidth; i++) {
DataBlock directBlock = new DataBlock();
iblock.add(directBlock);
directBlock.address = h5.readOffset(); // This field is the address of the child direct block. The size of the
// [uncompressed] direct block can be computed by its offset in the
// heap's linear address space.
if (hasFilter) {
directBlock.sizeFilteredDirectBlock = h5.readLength();
directBlock.filterMask = raf.readInt();
}
if (debugDetail || debugFractalHeap)
debugOut.println(" DirectChild " + i + " address= " + directBlock.address);
directBlock.size = blockSize;
// if (directChild.address >= 0)
doublingTable.blockList.add(directBlock);
}
}
// child indirect blocks
for (int row = 0; row < iblock.indirectRows; row++) {
blockSize *= 2;
for (int i = 0; i < doublingTable.tableWidth; i++) {
IndirectBlock iblock2 = new IndirectBlock(-1, blockSize);
iblock.add(iblock2);
long childIndirectAddress = h5.readOffset();
if (debugDetail || debugFractalHeap)
debugOut.println(" InDirectChild " + row + " address= " + childIndirectAddress);
if (childIndirectAddress >= 0)
readIndirectBlock(iblock2, childIndirectAddress, heapAddress, hasFilter);
}
}
}
void readDirectBlock(long pos, long heapAddress, DataBlock dblock) throws IOException {
if (pos < 0)
return; // means its empty
raf.seek(pos);
// header
String magic = raf.readString(4);
if (!magic.equals("FHDB"))
throw new IllegalStateException(magic + " should equal FHDB");
byte version = raf.readByte();
long heapHeaderAddress = h5.readOffset(); // This is the address for the fractal heap header that this block belongs
// to. This field is principally used for file integrity checking.
if (heapAddress != heapHeaderAddress)
throw new IllegalStateException();
dblock.extraBytes = 5; // keep track of how much room is taken out of block size, that is, how much is left for the
// object
dblock.extraBytes += h5.isOffsetLong() ? 8 : 4;
int nbytes = maxHeapSize / 8;
if (maxHeapSize % 8 != 0)
nbytes++;
dblock.offset = h5.readVariableSizeUnsigned(nbytes); // This is the offset of the block within the fractal heap's
// address space (in bytes).
dblock.dataPos = pos; // raf.getFilePointer(); // offsets are from the start of the block
dblock.extraBytes += nbytes;
if ((flags & 2) != 0)
dblock.extraBytes += 4; // ?? size of checksum
// dblock.size -= size; // subtract space used by other fields
dblock.wasRead = true;
if (debugDetail || debugFractalHeap)
debugOut.println(" DirectBlock offset= " + dblock.offset + " dataPos = " + dblock.dataPos);
}
} // FractalHeap
© 2015 - 2025 Weber Informatics LLC | Privacy Policy