ucar.nc2.iosp.hdf5.DataBTree Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of cdm Show documentation
Show all versions of cdm Show documentation
The NetCDF-Java Library is a Java interface to NetCDF files,
as well as to many other types of scientific data formats.
The newest version!
package ucar.nc2.iosp.hdf5;
import ucar.ma2.Section;
import ucar.nc2.Variable;
import ucar.nc2.iosp.LayoutTiled;
import ucar.unidata.io.RandomAccessFile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
/**
* This holds the chunked data storage.
* level 1A
* A B-tree, version 1, used for data (node type 1)
*
* Version 1 B-trees in HDF5 files an implementation of the B-link tree, in which the sibling nodes at a particular level
* in the tree are stored in a doubly-linked list
* The B-link trees implemented by the file format contain one more key than the number of children.
* In other words, each child pointer out of a B-tree node has a left key and a right key.
* The pointers out of internal nodes point to sub-trees while the pointers out of leaf nodes point to symbol nodes and
* raw data chunks. Aside from that difference, internal nodes and leaf nodes are identical.
*
* @see "http://www.hdfgroup.org/HDF5/doc/H5.format.html#Btrees"
* @author caron
* @since 6/27/12
*/
public class DataBTree {
private static final boolean debugDataBtree = false;
private static final boolean debugDataChunk = false;
private static final boolean debugChunkOrder = false;
private static java.io.PrintStream debugOut = System.out;
private final H5header h5;
private final RandomAccessFile raf;
private final MemTracker memTracker;
private final long rootNodeAddress;
private final Tiling tiling;
private final int ndimStorage, wantType;
private Variable owner;
DataBTree(H5header h5, long rootNodeAddress, int[] varShape, int[] storageSize, MemTracker memTracker) throws IOException {
this.h5 = h5;
this.raf = h5.raf;
this.rootNodeAddress = rootNodeAddress;
this.tiling = new Tiling(varShape, storageSize);
this.ndimStorage = storageSize.length;
this.memTracker = memTracker;
wantType = 1;
}
void setOwner(Variable owner) {
this.owner = owner;
}
// used by H5tiledLayoutBB
DataChunkIterator getDataChunkIteratorFilter(Section want) throws IOException {
return new DataChunkIterator(want);
}
// used by H5tiledLayout
LayoutTiled.DataChunkIterator getDataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
/*
if (if (debugChunkOrder) ) {
DataChunkIteratorNoFilter iter = new DataChunkIteratorNoFilter(null, nChunkDim);
int count = 0;
int last = -1;
while (iter.hasNext()) {
LayoutTiled.DataChunk chunk = iter.next();
System.out.printf("%d : %d%n", count++, tiling.order(chunk.offset));
if (tiling.order(chunk.offset) <= last)
System.out.println("HEY");
last = tiling.order(chunk.offset);
}
}*/
return new DataChunkIteratorNoFilter(want, nChunkDim);
}
// An Iterator over the DataChunks in the btree.
// returns the actual data from the btree leaf (level 0) nodes.
// used by H5tiledLayout, when there are no filters
class DataChunkIteratorNoFilter implements LayoutTiled.DataChunkIterator {
private Node root;
private int nChunkDim;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @param nChunkDim number of chunk dimensions - may be less than the offset[] length
* @throws IOException on error
*/
DataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
this.nChunkDim = nChunkDim;
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
int[] wantOrigin = (want != null) ? want.getOrigin() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public LayoutTiled.DataChunk next() throws IOException {
DataChunk dc = root.next();
int[] offset = dc.offset;
if (offset.length > nChunkDim) { // may have to eliminate last offset
offset = new int[nChunkDim];
System.arraycopy(dc.offset, 0, offset, 0, nChunkDim);
}
if (debugChunkOrder) System.out.printf("LayoutTiled.DataChunk next order %d%n", tiling.order(dc.offset));
return new LayoutTiled.DataChunk(offset, dc.filePos);
}
}
// An Iterator over the DataChunks in the btree.
// returns the data chunck info from the btree leaf (level 0) nodes
// used by H5tiledLayoutBB, when there are filters
class DataChunkIterator {
private Node root;
private int[] wantOrigin;
/**
* Constructor
*
* @param want skip any nodes that are before this section
* @throws IOException on error
*/
DataChunkIterator(Section want) throws IOException {
root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
wantOrigin = (want != null) ? want.getOrigin() : null;
root.first(wantOrigin);
}
public boolean hasNext() {
return root.hasNext(); // && !node.greaterThan(wantOrigin);
}
public DataChunk next() throws IOException {
return root.next();
}
}
// Btree nodes
class Node {
private long address;
private int level, nentries;
private Node currentNode;
// level 0 only
private List myEntries;
// level > 0 only
private int[][] offset; // int[nentries][ndim]; // other levels
// "For raw data chunk nodes, the child pointer is the address of a single raw data chunk"
private long[] childPointer; // long[nentries];
private int currentEntry; // track iteration; LOOK this seems fishy - why not an iterator ??
Node(long address, long parent) throws IOException {
if (debugDataBtree) debugOut.println("\n--> DataBTree read tree at address=" + address + " parent= " + parent +
" owner= " + owner.getNameAndDimensions());
raf.order(RandomAccessFile.LITTLE_ENDIAN); // header information is in le byte order
raf.seek( h5.getFileOffset(address));
this.address = address;
String magic = raf.readString(4);
if (!magic.equals("TREE"))
throw new IllegalStateException("DataBTree doesnt start with TREE");
int type = raf.readByte();
level = raf.readByte();
nentries = raf.readShort();
if (type != wantType)
throw new IllegalStateException("DataBTree must be type " + wantType);
long size = 8 + 2 * h5.getSizeOffsets() + ((long)nentries) * (8 + h5.getSizeOffsets() + 8 + ndimStorage);
if (memTracker != null) memTracker.addByLen("Data BTree (" + owner + ")", address, size);
if (debugDataBtree) debugOut.println(" type=" + type + " level=" + level + " nentries=" + nentries + " size = " + size);
long leftAddress = h5.readOffset();
long rightAddress = h5.readOffset();
if (debugDataBtree) debugOut.println(" leftAddress=" + leftAddress + " =0x" + Long.toHexString(leftAddress) +
" rightAddress=" + rightAddress + " =0x" + Long.toHexString(rightAddress));
if (level == 0) {
// read all entries as a DataChunk
myEntries = new ArrayList();
for (int i = 0; i <= nentries; i++) {
DataChunk dc = new DataChunk(ndimStorage, (i == nentries));
myEntries.add(dc);
if (debugDataChunk) debugOut.println(dc);
}
} else { // just track the offsets and node addresses
offset = new int[nentries + 1][ndimStorage];
childPointer = new long[nentries + 1];
for (int i = 0; i <= nentries; i++) {
raf.skipBytes(8); // skip size, filterMask
for (int j = 0; j < ndimStorage; j++) {
long loffset = raf.readLong();
assert loffset < Integer.MAX_VALUE;
offset[i][j] = (int) loffset;
}
this.childPointer[i] = (i == nentries) ? -1 : h5.readOffset();
if (debugDataBtree) {
debugOut.print(" childPointer=" + childPointer[i] + " =0x" + Long.toHexString(childPointer[i]));
for (long anOffset : offset[i]) debugOut.print(" " + anOffset);
debugOut.println();
}
}
}
}
// this finds the first entry we dont want to skip.
// entry i goes from [offset(i),offset(i+1))
// we want to skip any entries we dont need, namely those where want >= offset(i+1)
// so keep skipping until want < offset(i+1)
void first(int[] wantOrigin) throws IOException {
if (debugChunkOrder && wantOrigin != null) System.out.printf("Level %d: Tile want %d%n", level, tiling.order(wantOrigin));
if (level == 0) {
currentEntry = 0;
// note nentries-1 - assume dont skip the last one
for (currentEntry = 0; currentEntry < nentries-1; currentEntry++) {
DataChunk entry = myEntries.get(currentEntry + 1); // look at the next one
if (debugChunkOrder) System.out.printf(" Entry=%d: Tile ending order= %d%n", currentEntry, tiling.order(entry.offset));
if ((wantOrigin == null) || tiling.compare(wantOrigin, entry.offset) < 0) break;
}
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
} else {
currentNode = null;
for (currentEntry = 0; currentEntry < nentries; currentEntry++) {
if (debugChunkOrder) System.out.printf(" Entry=%d: Tile order %d-%d%n", currentEntry, tiling.order(offset[currentEntry]), tiling.order(offset[currentEntry + 1]));
if ((wantOrigin == null) || tiling.compare(wantOrigin, offset[currentEntry + 1]) < 0) {
currentNode = new Node(childPointer[currentEntry], this.address);
if (debugChunkOrder) System.out.printf("Level %d use entry= %d%n", level, currentEntry);
currentNode.first(wantOrigin);
break;
}
}
// heres the case where its the last entry we want; the tiling.compare() above may fail
if (currentNode == null) {
currentEntry = nentries - 1;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(wantOrigin);
}
}
//if (currentEntry >= nentries)
// System.out.println("hah");
assert (nentries == 0) || (currentEntry < nentries) : currentEntry + " >= " + nentries;
}
// LOOK - wouldnt be a bad idea to terminate if possible instead of running through all subsequent entries
boolean hasNext() {
if (level == 0) {
return currentEntry < nentries;
} else {
if (currentNode.hasNext()) return true;
return currentEntry < nentries - 1;
}
}
DataChunk next() throws IOException {
if (level == 0) {
return myEntries.get(currentEntry++);
} else {
if (currentNode.hasNext())
return currentNode.next();
currentEntry++;
currentNode = new Node(childPointer[currentEntry], this.address);
currentNode.first(null);
return currentNode.next();
}
}
}
/* private void dump(DataType dt, List entries) {
try {
for (DataChunk node : entries) {
if (dt == DataType.STRING) {
HeapIdentifier heapId = new HeapIdentifier(node.address);
GlobalHeap.HeapObject ho = heapId.getHeapObject();
byte[] pa = new byte[(int) ho.dataSize];
raf.seek(ho.dataPos);
raf.read(pa);
debugOut.println(" data at " + ho.dataPos + " = " + new String(pa));
}
}
}
catch (IOException e) {
e.printStackTrace();
}
} */
// these are part of the level 1A data structure, type 1
// see http://www.hdfgroup.org/HDF5/doc/H5.format.html#V1Btrees,
// see "Key" field (type 1) p 10
// this is only for leaf nodes (level 0)
class DataChunk {
int size; // size of chunk in bytes; need storage layout dimensions to interpret
int filterMask; // bitfield indicating which filters have been skipped for this chunk
int[] offset; // offset index of this chunk, reletive to entire array
long filePos; // filePos of a single raw data chunk, already shifted by the offset if needed
DataChunk(int ndim, boolean last) throws IOException {
this.size = raf.readInt();
this.filterMask = raf.readInt();
offset = new int[ndim];
for (int i = 0; i < ndim; i++) {
long loffset = raf.readLong();
assert loffset < Integer.MAX_VALUE;
offset[i] = (int) loffset;
}
this.filePos = last ? -1 : h5.readAddress(); //
if (memTracker != null) memTracker.addByLen("Chunked Data (" + owner + ")", filePos, size);
}
public String toString() {
StringBuilder sbuff = new StringBuilder();
sbuff.append(" ChunkedDataNode size=").append(size).append(" filterMask=").append(filterMask).append(" filePos=").append(filePos).append(" offsets= ");
for (long anOffset : offset) sbuff.append(anOffset).append(" ");
return sbuff.toString();
}
}
}
© 2015 - 2024 Weber Informatics LLC | Privacy Policy