ucar.nc2.iosp.hdf5.DataBTree Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
* See LICENSE for license information.
*/
package ucar.nc2.iosp.hdf5;
import ucar.ma2.Section;
import ucar.nc2.Variable;
import ucar.nc2.iosp.LayoutTiled;
import ucar.nc2.util.Misc;
import ucar.unidata.io.RandomAccessFile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* This holds the chunked data storage.
* level 1A
* A B-tree, version 1, used for data (node type 1)
*
* Version 1 B-trees in HDF5 files an implementation of the B-link tree, in which the sibling nodes at a particular level
* in the tree are stored in a doubly-linked list
* The B-link trees implemented by the file format contain one more key than the number of children.
* In other words, each child pointer out of a B-tree node has a left key and a right key.
* The pointers out of internal nodes point to sub-trees while the pointers out of leaf nodes point to symbol nodes and
* raw data chunks. Aside from that difference, internal nodes and leaf nodes are identical.
*
* @see "http://www.hdfgroup.org/HDF5/doc/H5.format.html#Btrees"
* @author caron
* @since 6/27/12
*/
public class DataBTree {
private static final boolean debugDataBtree = false;
private static final boolean debugDataChunk = false;
private static final boolean debugChunkOrder = false;
private static java.io.PrintStream debugOut = System.out;
private final H5header h5;
private final MemTracker memTracker;
private final long rootNodeAddress;
private final Tiling tiling;
private final int ndimStorage, wantType;
private Variable owner;
DataBTree(H5header h5, long rootNodeAddress, int[] varShape, int[] storageSize, MemTracker memTracker) throws IOException {
this.h5 = h5;
this.rootNodeAddress = rootNodeAddress;
this.tiling = new Tiling(varShape, storageSize);
this.ndimStorage = storageSize.length;
this.memTracker = memTracker;
wantType = 1;
}
void setOwner(Variable owner) {
this.owner = owner;
}
// used by H5tiledLayoutBB
DataChunkIterator getDataChunkIteratorFilter(Section want) throws IOException {
return new DataChunkIterator(want);
}
// used by H5tiledLayout
LayoutTiled.DataChunkIterator getDataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
/*
if (if (debugChunkOrder) ) {
DataChunkIteratorNoFilter iter = new DataChunkIteratorNoFilter(null, nChunkDim);
int count = 0;
int last = -1;
while (iter.hasNext()) {
LayoutTiled.DataChunk chunk = iter.next();
System.out.printf("%d : %d%n", count++, tiling.order(chunk.offset));
if (tiling.order(chunk.offset) <= last)
System.out.println("HEY");
last = tiling.order(chunk.offset);
}
}*/
return new DataChunkIteratorNoFilter(want, nChunkDim);
}
// An Iterator over the DataChunks in the btree.
// returns the actual data from the btree leaf (level 0) nodes.
// used by H5tiledLayout, when there are no filters
class DataChunkIteratorNoFilter implements LayoutTiled.DataChunkIterator {
private Node root;
private int nChunkDim;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @param nChunkDim number of chunk dimensions - may be less than the offset[] length
* @throws IOException on error
*/
DataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
this.nChunkDim = nChunkDim;
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
long[] wantOrigin = (want != null) ? want.getOriginLong() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public LayoutTiled.DataChunk next() throws IOException {
DataChunk dc = root.next();
int[] offset = dc.offset;
long[] offsetLong = dc.offsetLong;
if (offset.length > nChunkDim) { // may have to eliminate last offset
offset = new int[nChunkDim];
System.arraycopy(dc.offset, 0, offset, 0, nChunkDim);
offsetLong = new long[nChunkDim];
System.arraycopy(dc.offsetLong, 0, offsetLong, 0, nChunkDim);
}
if (debugChunkOrder) System.out.printf("LayoutTiled.DataChunk next order %d%n", tiling.order(dc.offset));
return new LayoutTiled.DataChunk(offset, offsetLong, dc.filePos);
}
}
// An Iterator over the DataChunks in the btree.
// returns the data chunck info from the btree leaf (level 0) nodes
// used by H5tiledLayoutBB, when there are filters
class DataChunkIterator {
private Node root;
private long[] wantOrigin;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @throws IOException on error
*/
DataChunkIterator(Section want) throws IOException {
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
wantOrigin = (want != null) ? want.getOriginLong() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public DataChunk next() throws IOException {
return root.next();
}
}
// Btree nodes
class Node {
private long address;
private int level, nentries;
private Node currentNode;
// level 0 only
private List myEntries;
// level > 0 only
private int[][] offset; // int[nentries][ndim]; // other levels
private long[][] offsetLong;
// "For raw data chunk nodes, the child pointer is the address of a single raw data chunk"
private long[] childPointer; // long[nentries];
private int currentEntry; // track iteration; LOOK this seems fishy - why not an iterator ??
Node(long address, long parent) throws IOException {
if (debugDataBtree) debugOut.println("\n--> DataBTree read tree at address=" + address + " parent= " + parent +
" owner= " + owner.getNameAndDimensions());
h5.raf.order(RandomAccessFile.LITTLE_ENDIAN); // header information is in le byte order
h5.raf.seek( h5.getFileOffset(address));
this.address = address;
String magic = h5.raf.readString(4);
if (!magic.equals("TREE"))
throw new IllegalStateException("DataBTree doesnt start with TREE");
int type = h5.raf.readByte();
level = h5.raf.readByte();
nentries = h5.raf.readShort();
if (type != wantType)
throw new IllegalStateException("DataBTree must be type " + wantType);
long size = 8 + 2 * h5.getSizeOffsets() + ((long)nentries) * (8 + h5.getSizeOffsets() + 8 + ndimStorage);
if (memTracker != null) memTracker.addByLen("Data BTree (" + owner + ")", address, size);
if (debugDataBtree) debugOut.println(" type=" + type + " level=" + level + " nentries=" + nentries + " size = " + size);
long leftAddress = h5.readOffset();
long rightAddress = h5.readOffset();
if (debugDataBtree) debugOut.println(" leftAddress=" + leftAddress + " =0x" + Long.toHexString(leftAddress) +
" rightAddress=" + rightAddress + " =0x" + Long.toHexString(rightAddress));
if (level == 0) {
// read all entries as a DataChunk
myEntries = new ArrayList();
for (int i = 0; i <= nentries; i++) {
DataChunk dc = new DataChunk(ndimStorage, (i == nentries));
myEntries.add(dc);
if (debugDataChunk) debugOut.println(dc);
}
} else { // just track the offsets and node addresses
offset = new int[nentries + 1][ndimStorage];
offsetLong = new long[nentries + 1][ndimStorage];
childPointer = new long[nentries + 1];
for (int i = 0; i <= nentries; i++) {
h5.raf.skipBytes(8); // skip size, filterMask
for (int j = 0; j < ndimStorage; j++) {
long loffset = h5.raf.readLong();
//assert loffset < Integer.MAX_VALUE;
offset[i][j] = (int) loffset;
offsetLong[i][j] = loffset;
}
this.childPointer[i] = (i == nentries) ? -1 : h5.readOffset();
if (debugDataBtree) {
debugOut.print(" childPointer=" + childPointer[i] + " =0x" + Long.toHexString(childPointer[i]));
for (long anOffset : offsetLong[i]) debugOut.print(" " + anOffset);
debugOut.println();
}
}
}
}
// this finds the first entry we dont want to skip.
// entry i goes from [offset(i),offset(i+1))
// we want to skip any entries we dont need, namely those where want >= offset(i+1)
// so keep skipping until want < offset(i+1)
void first(int[] wantOrigin) throws IOException {
if (debugChunkOrder && wantOrigin != null) System.out.printf("Level %d: Tile want %d%n", level, tiling.order(wantOrigin));
if (level == 0) {
currentEntry = 0;
// note nentries-1 - assume dont skip the last one
for (currentEntry = 0; currentEntry < nentries-1; currentEntry++) {
DataChunk entry = myEntries.get(currentEntry + 1); // look at the next one
if (debugChunkOrder) System.out.printf(" Entry=%d: Tile ending order= %d%n", currentEntry, tiling.order(entry.offset));
if ((wantOrigin == null) || tiling.compare(wantOrigin, entry.offset) < 0) break;
}
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
} else {
currentNode = null;
for (currentEntry = 0; currentEntry < nentries; currentEntry++) {
if (debugChunkOrder) System.out.printf(" Entry=%3d offset [%-15s]: Tile order %d-%d%n", currentEntry,
Misc.showInts(offset[currentEntry]),
tiling.order(offset[currentEntry]), tiling.order(offset[currentEntry + 1]));
if ((wantOrigin == null) || tiling.compare(wantOrigin, offset[currentEntry + 1]) < 0) {
currentNode = new Node(childPointer[currentEntry], this.address);
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
currentNode.first(wantOrigin);
break;
}
}
// heres the case where its the last entry we want; the tiling.compare() above may fail
if (currentNode == null) {
currentEntry = nentries - 1;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(wantOrigin);
}
}
//if (currentEntry >= nentries)
// System.out.println("hah");
assert (nentries == 0) || (currentEntry < nentries) : currentEntry + " >= " + nentries;
}
void first(long[] wantOrigin) throws IOException {
if (debugChunkOrder && wantOrigin != null) System.out.printf("Level %d: Tile want %d%n", level, tiling.order(wantOrigin));
if (level == 0) {
currentEntry = 0;
// note nentries-1 - assume dont skip the last one
for (currentEntry = 0; currentEntry < nentries-1; currentEntry++) {
DataChunk entry = myEntries.get(currentEntry + 1); // look at the next one
if (debugChunkOrder) System.out.printf(" Entry=%d: Tile ending order= %d%n", currentEntry, tiling.order(entry.offsetLong));
if ((wantOrigin == null) || tiling.compare(wantOrigin, entry.offsetLong) < 0) break;
}
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
} else {
currentNode = null;
for (currentEntry = 0; currentEntry < nentries; currentEntry++) {
if (debugChunkOrder) System.out.printf(" Entry=%3d offset [%-15s]: Tile order %d-%d%n", currentEntry,
Misc.showLongs(offsetLong[currentEntry]),
tiling.order(offsetLong[currentEntry]), tiling.order(offsetLong[currentEntry + 1]));
if ((wantOrigin == null) || tiling.compare(wantOrigin, offsetLong[currentEntry + 1]) < 0) {
currentNode = new Node(childPointer[currentEntry], this.address);
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
currentNode.first(wantOrigin);
break;
}
}
// heres the case where its the last entry we want; the tiling.compare() above may fail
if (currentNode == null) {
currentEntry = nentries - 1;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(wantOrigin);
}
}
//if (currentEntry >= nentries)
// System.out.println("hah");
assert (nentries == 0) || (currentEntry < nentries) : currentEntry + " >= " + nentries;
}
// LOOK - wouldnt be a bad idea to terminate if possible instead of running through all subsequent entries
boolean hasNext() {
if (level == 0) {
return currentEntry < nentries;
} else {
if (currentNode.hasNext()) return true;
return currentEntry < nentries - 1;
}
}
DataChunk next() throws IOException {
if (level == 0) {
return myEntries.get(currentEntry++);
} else {
if (currentNode.hasNext())
return currentNode.next();
currentEntry++;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first((long[]) null);
return currentNode.next();
}
}
}
/* private void dump(DataType dt, List entries) {
try {
for (DataChunk node : entries) {
if (dt == DataType.STRING) {
HeapIdentifier heapId = new HeapIdentifier(node.address);
GlobalHeap.HeapObject ho = heapId.getHeapObject();
byte[] pa = new byte[(int) ho.dataSize];
raf.seek(ho.dataPos);
raf.read(pa);
debugOut.println(" data at " + ho.dataPos + " = " + new String(pa));
}
}
}
catch (IOException e) {
e.printStackTrace();
}
} */
// these are part of the level 1A data structure, type 1
// see http://www.hdfgroup.org/HDF5/doc/H5.format.html#V1Btrees,
// see "Key" field (type 1) p 10
// this is only for leaf nodes (level 0)
class DataChunk {
int size; // size of chunk in bytes; need storage layout dimensions to interpret
int filterMask; // bitfield indicating which filters have been skipped for this chunk
int[] offset; // offset index of this chunk, reletive to entire array
long[] offsetLong; // offset index of this chunk, reletive to entire array
long filePos; // filePos of a single raw data chunk, already shifted by the offset if needed
DataChunk(int ndim, boolean last) throws IOException {
this.size = h5.raf.readInt();
this.filterMask = h5.raf.readInt();
offset = new int[ndim];
offsetLong = new long[ndim];
for (int i = 0; i < ndim; i++) {
long loffset = h5.raf.readLong();
//assert loffset < Integer.MAX_VALUE;
offset[i] = (int) loffset;
offsetLong[i] = loffset;
}
this.filePos = last ? -1 : h5.readAddress(); //
if (memTracker != null) memTracker.addByLen("Chunked Data (" + owner + ")", filePos, size);
}
public String toString() {
StringBuilder sbuff = new StringBuilder();
sbuff.append(" ChunkedDataNode size=").append(size).append(" filterMask=").append(filterMask).append(" filePos=").append(filePos).append(" offsets= ");
for (long anOffset : offsetLong) sbuff.append(anOffset).append(" ");
return sbuff.toString();
}
}
}