ucar.nc2.iosp.hdf5.DataBTree Maven / Gradle / Ivy
The newest version!
/*
* Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
* See LICENSE for license information.
*/
package ucar.nc2.iosp.hdf5;
import java.util.Arrays;
import ucar.ma2.Section;
import ucar.nc2.iosp.LayoutTiled;
import ucar.nc2.util.Misc;
import ucar.unidata.io.RandomAccessFile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* This holds the chunked data storage.
* level 1A
* A B-tree, version 1, used for data (node type 1)
*
* Version 1 B-trees in HDF5 files an implementation of the B-link tree, in which the sibling nodes at a particular
* level
* in the tree are stored in a doubly-linked list
* The B-link trees implemented by the file format contain one more key than the number of children.
* In other words, each child pointer out of a B-tree node has a left key and a right key.
* The pointers out of internal nodes point to sub-trees while the pointers out of leaf nodes point to symbol nodes and
* raw data chunks. Aside from that difference, internal nodes and leaf nodes are identical.
*
* @see "http://www.hdfgroup.org/HDF5/doc/H5.format.html#Btrees"
* @author caron
* @since 6/27/12
*/
public class DataBTree {
private static final boolean debugDataBtree = false;
private static final boolean debugDataChunk = false;
private static final boolean debugChunkOrder = false;
private static java.io.PrintStream debugOut = System.out;
private final H5headerIF h5;
private final MemTracker memTracker;
private final long rootNodeAddress;
private final Tiling tiling;
private final int ndimStorage, wantType;
private Object owner;
public DataBTree(H5headerIF h5, long rootNodeAddress, int[] varShape, int[] storageSize, MemTracker memTracker) {
this.h5 = h5;
this.rootNodeAddress = rootNodeAddress;
this.tiling = new Tiling(varShape, storageSize);
this.ndimStorage = storageSize.length;
this.memTracker = memTracker;
wantType = 1;
}
RandomAccessFile getRandomAccessFile() {
return h5.getRandomAccessFile();
}
public void setOwner(Object owner) {
this.owner = owner;
}
// used by H5tiledLayoutBB
public DataChunkIterator getDataChunkIteratorFilter(Section want) throws IOException {
return new DataChunkIterator(want);
}
// used by H5tiledLayout
public LayoutTiled.DataChunkIterator getDataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
return new DataChunkIteratorNoFilter(want, nChunkDim);
}
// An Iterator over the DataChunks in the btree.
// returns the actual data from the btree leaf (level 0) nodes.
// used by H5tiledLayout, when there are no filters
class DataChunkIteratorNoFilter implements LayoutTiled.DataChunkIterator {
private Node root;
private int nChunkDim;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @param nChunkDim number of chunk dimensions - may be less than the offset[] length
* @throws IOException on error
*/
DataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
this.nChunkDim = nChunkDim;
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
int[] wantOrigin = (want != null) ? want.getOrigin() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public LayoutTiled.DataChunk next() throws IOException {
DataChunk dc = root.next();
int[] offset = dc.offset;
if (offset.length > nChunkDim) { // may have to eliminate last offset
offset = new int[nChunkDim];
System.arraycopy(dc.offset, 0, offset, 0, nChunkDim);
}
if (debugChunkOrder)
System.out.printf("LayoutTiled.DataChunk next order %d%n", tiling.order(dc.offset));
return new LayoutTiled.DataChunk(offset, dc.filePos);
}
}
// An Iterator over the DataChunks in the btree.
// returns the data chunck info from the btree leaf (level 0) nodes
// used by H5tiledLayoutBB, when there are filters
public class DataChunkIterator {
private Node root;
private int[] wantOrigin;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @throws IOException on error
*/
DataChunkIterator(Section want) throws IOException {
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
wantOrigin = (want != null) ? want.getOrigin() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public DataChunk next() throws IOException {
return root.next();
}
}
// Btree nodes
class Node {
private long address;
private int level, nentries;
private Node currentNode;
// level 0 only
private List myEntries;
// level > 0 only
private int[][] offset; // int[nentries][ndim]; // other levels
// "For raw data chunk nodes, the child pointer is the address of a single raw data chunk"
private long[] childPointer; // long[nentries];
private int currentEntry; // track iteration; LOOK this seems fishy - why not an iterator ??
Node(long address, long parent) throws IOException {
if (debugDataBtree)
debugOut.println("\n--> DataBTree read tree at address=" + address + " parent= " + parent + " owner= " + owner);
getRandomAccessFile().order(RandomAccessFile.LITTLE_ENDIAN); // header information is in le byte order
getRandomAccessFile().seek(h5.getFileOffset(address));
this.address = address;
String magic = getRandomAccessFile().readString(4);
if (!magic.equals("TREE"))
throw new IllegalStateException("DataBTree doesnt start with TREE");
int type = getRandomAccessFile().readByte();
level = getRandomAccessFile().readByte();
nentries = getRandomAccessFile().readShort();
if (type != wantType)
throw new IllegalStateException("DataBTree must be type " + wantType);
long size = 8 + 2 * h5.getSizeOffsets() + ((long) nentries) * (8 + h5.getSizeOffsets() + 8 + ndimStorage);
if (memTracker != null)
memTracker.addByLen("Data BTree (" + owner + ")", address, size);
if (debugDataBtree)
debugOut.println(" type=" + type + " level=" + level + " nentries=" + nentries + " size = " + size);
long leftAddress = h5.readOffset();
long rightAddress = h5.readOffset();
if (debugDataBtree)
debugOut.println(" leftAddress=" + leftAddress + " =0x" + Long.toHexString(leftAddress) + " rightAddress="
+ rightAddress + " =0x" + Long.toHexString(rightAddress));
if (level == 0) {
// read all entries as a DataChunk
myEntries = new ArrayList<>();
for (int i = 0; i <= nentries; i++) {
DataChunk dc = new DataChunk(ndimStorage, (i == nentries));
myEntries.add(dc);
if (debugDataChunk)
debugOut.println(dc);
}
} else { // just track the offsets and node addresses
offset = new int[nentries + 1][ndimStorage];
childPointer = new long[nentries + 1];
for (int i = 0; i <= nentries; i++) {
getRandomAccessFile().skipBytes(8); // skip size, filterMask
for (int j = 0; j < ndimStorage; j++) {
long loffset = getRandomAccessFile().readLong();
assert loffset < Integer.MAX_VALUE;
offset[i][j] = (int) loffset;
}
this.childPointer[i] = (i == nentries) ? -1 : h5.readOffset();
if (debugDataBtree) {
debugOut.print(" childPointer=" + childPointer[i] + " =0x" + Long.toHexString(childPointer[i]));
for (long anOffset : offset[i])
debugOut.print(" " + anOffset);
debugOut.println();
}
}
}
}
// this finds the first entry we dont want to skip.
// entry i goes from [offset(i),offset(i+1))
// we want to skip any entries we dont need, namely those where want >= offset(i+1)
// so keep skipping until want < offset(i+1)
void first(int[] wantOrigin) throws IOException {
if (debugChunkOrder && wantOrigin != null)
System.out.printf("Level %d: Tile want %d%n", level, tiling.order(wantOrigin));
if (level == 0) {
currentEntry = 0;
// note nentries-1 - assume dont skip the last one
for (currentEntry = 0; currentEntry < nentries - 1; currentEntry++) {
DataChunk entry = myEntries.get(currentEntry + 1); // look at the next one
if (debugChunkOrder)
System.out.printf(" Entry=%d: Tile ending order= %d%n", currentEntry, tiling.order(entry.offset));
if ((wantOrigin == null) || tiling.compare(wantOrigin, entry.offset) < 0)
break;
}
if (debugChunkOrder)
System.out.printf("Level %d use entry= %d%n", level, currentEntry);
} else {
currentNode = null;
for (currentEntry = 0; currentEntry < nentries; currentEntry++) {
if (debugChunkOrder)
System.out.printf(" Entry=%3d offset [%-15s]: Tile order %d-%d%n", currentEntry,
Arrays.toString(offset[currentEntry]), tiling.order(offset[currentEntry]),
tiling.order(offset[currentEntry + 1]));
if ((wantOrigin == null) || tiling.compare(wantOrigin, offset[currentEntry + 1]) < 0) {
currentNode = new Node(childPointer[currentEntry], this.address);
if (debugChunkOrder)
System.out.printf("Level %d use entry= %d%n", level, currentEntry);
currentNode.first(wantOrigin);
break;
}
}
// heres the case where its the last entry we want; the tiling.compare() above may fail
if (currentNode == null) {
currentEntry = nentries - 1;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(wantOrigin);
}
}
assert (nentries == 0) || (currentEntry < nentries) : currentEntry + " >= " + nentries;
}
// LOOK - wouldnt be a bad idea to terminate if possible instead of running through all subsequent entries
boolean hasNext() {
if (level == 0) {
return currentEntry < nentries;
} else {
if (currentNode.hasNext())
return true;
return currentEntry < nentries - 1;
}
}
DataChunk next() throws IOException {
if (level == 0) {
return myEntries.get(currentEntry++);
} else {
if (currentNode.hasNext())
return currentNode.next();
currentEntry++;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(null);
return currentNode.next();
}
}
}
// these are part of the level 1A data structure, type 1
// see http://www.hdfgroup.org/HDF5/doc/H5.format.html#V1Btrees,
// see "Key" field (type 1) p 10
// this is only for leaf nodes (level 0)
public class DataChunk {
public final int size; // size of chunk in bytes; need storage layout dimensions to interpret
public final int filterMask; // bitfield indicating which filters have been skipped for this chunk
public final int[] offset; // offset index of this chunk, relative to entire array
public final long filePos; // filePos of a single raw data chunk, already shifted by the offset if needed
DataChunk(int ndim, boolean last) throws IOException {
this.size = getRandomAccessFile().readInt();
this.filterMask = getRandomAccessFile().readInt();
offset = new int[ndim];
for (int i = 0; i < ndim; i++) {
long loffset = getRandomAccessFile().readLong();
assert loffset < Integer.MAX_VALUE;
offset[i] = (int) loffset;
}
this.filePos = last ? -1 : h5.readAddress(); //
if (memTracker != null)
memTracker.addByLen("Chunked Data (" + owner + ")", filePos, size);
}
public String toString() {
StringBuilder sbuff = new StringBuilder();
sbuff.append(" ChunkedDataNode size=").append(size).append(" filterMask=").append(filterMask).append(" filePos=")
.append(filePos).append(" offsets= ");
for (long anOffset : offset)
sbuff.append(anOffset).append(" ");
return sbuff.toString();
}
}
}
© 2015 - 2025 Weber Informatics LLC | Privacy Policy