All Downloads are FREE. Search and download functionalities are using the official Maven repository.

ucar.nc2.iosp.hdf5.DataBTree Maven / Gradle / Ivy

The newest version!
/*
 * Copyright (c) 1998-2018 John Caron and University Corporation for Atmospheric Research/Unidata
 * See LICENSE for license information.
 */

package ucar.nc2.iosp.hdf5;

import java.util.Arrays;
import ucar.ma2.Section;
import ucar.nc2.iosp.LayoutTiled;
import ucar.nc2.util.Misc;
import ucar.unidata.io.RandomAccessFile;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;

/**
 * This holds the chunked data storage.
 * level 1A
 * A B-tree, version 1, used for data (node type 1)
 *
 * Version 1 B-trees in HDF5 files an implementation of the B-link tree, in which the sibling nodes at a particular
 * level
 * in the tree are stored in a doubly-linked list
 * The B-link trees implemented by the file format contain one more key than the number of children.
 * In other words, each child pointer out of a B-tree node has a left key and a right key.
 * The pointers out of internal nodes point to sub-trees while the pointers out of leaf nodes point to symbol nodes and
 * raw data chunks. Aside from that difference, internal nodes and leaf nodes are identical.
 *
 * @see "http://www.hdfgroup.org/HDF5/doc/H5.format.html#Btrees"
 * @author caron
 * @since 6/27/12
 */
public class DataBTree {
  private static final boolean debugDataBtree = false;
  private static final boolean debugDataChunk = false;
  private static final boolean debugChunkOrder = false;
  private static java.io.PrintStream debugOut = System.out;

  private final H5headerIF h5;
  private final MemTracker memTracker;

  private final long rootNodeAddress;
  private final Tiling tiling;
  private final int ndimStorage, wantType;

  private Object owner;

  public DataBTree(H5headerIF h5, long rootNodeAddress, int[] varShape, int[] storageSize, MemTracker memTracker) {
    this.h5 = h5;
    this.rootNodeAddress = rootNodeAddress;
    this.tiling = new Tiling(varShape, storageSize);
    this.ndimStorage = storageSize.length;
    this.memTracker = memTracker;

    wantType = 1;
  }

  RandomAccessFile getRandomAccessFile() {
    return h5.getRandomAccessFile();
  }

  public void setOwner(Object owner) {
    this.owner = owner;
  }

  // used by H5tiledLayoutBB
  public DataChunkIterator getDataChunkIteratorFilter(Section want) throws IOException {
    return new DataChunkIterator(want);
  }

  // used by H5tiledLayout
  public LayoutTiled.DataChunkIterator getDataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
    return new DataChunkIteratorNoFilter(want, nChunkDim);
  }

  // An Iterator over the DataChunks in the btree.
  // returns the actual data from the btree leaf (level 0) nodes.
  // used by H5tiledLayout, when there are no filters
  class DataChunkIteratorNoFilter implements LayoutTiled.DataChunkIterator {
    private Node root;
    private int nChunkDim;

    /**
     * Constructor
     *
     * @param want skip any nodes that are before this section
     * @param nChunkDim number of chunk dimensions - may be less than the offset[] length
     * @throws IOException on error
     */
    DataChunkIteratorNoFilter(Section want, int nChunkDim) throws IOException {
      this.nChunkDim = nChunkDim;
      root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
      int[] wantOrigin = (want != null) ? want.getOrigin() : null;
      root.first(wantOrigin);
    }

    public boolean hasNext() {
      return root.hasNext(); // && !node.greaterThan(wantOrigin);
    }

    public LayoutTiled.DataChunk next() throws IOException {
      DataChunk dc = root.next();
      int[] offset = dc.offset;
      if (offset.length > nChunkDim) { // may have to eliminate last offset
        offset = new int[nChunkDim];
        System.arraycopy(dc.offset, 0, offset, 0, nChunkDim);
      }
      if (debugChunkOrder)
        System.out.printf("LayoutTiled.DataChunk next order %d%n", tiling.order(dc.offset));

      return new LayoutTiled.DataChunk(offset, dc.filePos);
    }
  }

  // An Iterator over the DataChunks in the btree.
  // returns the data chunck info from the btree leaf (level 0) nodes
  // used by H5tiledLayoutBB, when there are filters
  public class DataChunkIterator {
    private Node root;
    private int[] wantOrigin;

    /**
     * Constructor
     *
     * @param want skip any nodes that are before this section
     * @throws IOException on error
     */
    DataChunkIterator(Section want) throws IOException {
      root = new Node(rootNodeAddress, -1); // should we cache the nodes ???
      wantOrigin = (want != null) ? want.getOrigin() : null;
      root.first(wantOrigin);
    }

    public boolean hasNext() {
      return root.hasNext(); // && !node.greaterThan(wantOrigin);
    }

    public DataChunk next() throws IOException {
      return root.next();
    }
  }

  // Btree nodes
  class Node {
    private long address;
    private int level, nentries;
    private Node currentNode;

    // level 0 only
    private List myEntries;
    // level > 0 only
    private int[][] offset; // int[nentries][ndim]; // other levels

    // "For raw data chunk nodes, the child pointer is the address of a single raw data chunk"
    private long[] childPointer; // long[nentries];

    private int currentEntry; // track iteration; LOOK this seems fishy - why not an iterator ??

    Node(long address, long parent) throws IOException {
      if (debugDataBtree)
        debugOut.println("\n--> DataBTree read tree at address=" + address + " parent= " + parent + " owner= " + owner);

      getRandomAccessFile().order(RandomAccessFile.LITTLE_ENDIAN); // header information is in le byte order
      getRandomAccessFile().seek(h5.getFileOffset(address));
      this.address = address;

      String magic = getRandomAccessFile().readString(4);
      if (!magic.equals("TREE"))
        throw new IllegalStateException("DataBTree doesnt start with TREE");

      int type = getRandomAccessFile().readByte();
      level = getRandomAccessFile().readByte();
      nentries = getRandomAccessFile().readShort();
      if (type != wantType)
        throw new IllegalStateException("DataBTree must be type " + wantType);

      long size = 8 + 2 * h5.getSizeOffsets() + ((long) nentries) * (8 + h5.getSizeOffsets() + 8 + ndimStorage);
      if (memTracker != null)
        memTracker.addByLen("Data BTree (" + owner + ")", address, size);
      if (debugDataBtree)
        debugOut.println("    type=" + type + " level=" + level + " nentries=" + nentries + " size = " + size);

      long leftAddress = h5.readOffset();
      long rightAddress = h5.readOffset();
      if (debugDataBtree)
        debugOut.println("    leftAddress=" + leftAddress + " =0x" + Long.toHexString(leftAddress) + " rightAddress="
            + rightAddress + " =0x" + Long.toHexString(rightAddress));

      if (level == 0) {
        // read all entries as a DataChunk
        myEntries = new ArrayList<>();
        for (int i = 0; i <= nentries; i++) {
          DataChunk dc = new DataChunk(ndimStorage, (i == nentries));
          myEntries.add(dc);
          if (debugDataChunk)
            debugOut.println(dc);
        }
      } else { // just track the offsets and node addresses
        offset = new int[nentries + 1][ndimStorage];
        childPointer = new long[nentries + 1];
        for (int i = 0; i <= nentries; i++) {
          getRandomAccessFile().skipBytes(8); // skip size, filterMask
          for (int j = 0; j < ndimStorage; j++) {
            long loffset = getRandomAccessFile().readLong();
            assert loffset < Integer.MAX_VALUE;
            offset[i][j] = (int) loffset;
          }
          this.childPointer[i] = (i == nentries) ? -1 : h5.readOffset();
          if (debugDataBtree) {
            debugOut.print("    childPointer=" + childPointer[i] + " =0x" + Long.toHexString(childPointer[i]));
            for (long anOffset : offset[i])
              debugOut.print(" " + anOffset);
            debugOut.println();
          }
        }
      }
    }

    // this finds the first entry we dont want to skip.
    // entry i goes from [offset(i),offset(i+1))
    // we want to skip any entries we dont need, namely those where want >= offset(i+1)
    // so keep skipping until want < offset(i+1)
    void first(int[] wantOrigin) throws IOException {
      if (debugChunkOrder && wantOrigin != null)
        System.out.printf("Level %d: Tile want %d%n", level, tiling.order(wantOrigin));
      if (level == 0) {
        currentEntry = 0;
        // note nentries-1 - assume dont skip the last one
        for (currentEntry = 0; currentEntry < nentries - 1; currentEntry++) {
          DataChunk entry = myEntries.get(currentEntry + 1); // look at the next one
          if (debugChunkOrder)
            System.out.printf(" Entry=%d: Tile ending order= %d%n", currentEntry, tiling.order(entry.offset));
          if ((wantOrigin == null) || tiling.compare(wantOrigin, entry.offset) < 0)
            break;
        }
        if (debugChunkOrder)
          System.out.printf("Level %d use entry= %d%n", level, currentEntry);

      } else {
        currentNode = null;
        for (currentEntry = 0; currentEntry < nentries; currentEntry++) {
          if (debugChunkOrder)
            System.out.printf(" Entry=%3d offset [%-15s]: Tile order %d-%d%n", currentEntry,
                Arrays.toString(offset[currentEntry]), tiling.order(offset[currentEntry]),
                tiling.order(offset[currentEntry + 1]));
          if ((wantOrigin == null) || tiling.compare(wantOrigin, offset[currentEntry + 1]) < 0) {
            currentNode = new Node(childPointer[currentEntry], this.address);
            if (debugChunkOrder)
              System.out.printf("Level %d use entry= %d%n", level, currentEntry);
            currentNode.first(wantOrigin);
            break;
          }
        }

        // heres the case where its the last entry we want; the tiling.compare() above may fail
        if (currentNode == null) {
          currentEntry = nentries - 1;
          currentNode = new Node(childPointer[currentEntry], this.address);
          currentNode.first(wantOrigin);
        }
      }

      assert (nentries == 0) || (currentEntry < nentries) : currentEntry + " >= " + nentries;
    }

    // LOOK - wouldnt be a bad idea to terminate if possible instead of running through all subsequent entries
    boolean hasNext() {
      if (level == 0) {
        return currentEntry < nentries;

      } else {
        if (currentNode.hasNext())
          return true;
        return currentEntry < nentries - 1;
      }
    }

    DataChunk next() throws IOException {
      if (level == 0) {
        return myEntries.get(currentEntry++);

      } else {
        if (currentNode.hasNext())
          return currentNode.next();

        currentEntry++;
        currentNode = new Node(childPointer[currentEntry], this.address);
        currentNode.first(null);
        return currentNode.next();
      }
    }
  }

  // these are part of the level 1A data structure, type 1
  // see http://www.hdfgroup.org/HDF5/doc/H5.format.html#V1Btrees,
  // see "Key" field (type 1) p 10
  // this is only for leaf nodes (level 0)
  public class DataChunk {
    public final int size; // size of chunk in bytes; need storage layout dimensions to interpret
    public final int filterMask; // bitfield indicating which filters have been skipped for this chunk
    public final int[] offset; // offset index of this chunk, relative to entire array
    public final long filePos; // filePos of a single raw data chunk, already shifted by the offset if needed

    DataChunk(int ndim, boolean last) throws IOException {
      this.size = getRandomAccessFile().readInt();
      this.filterMask = getRandomAccessFile().readInt();
      offset = new int[ndim];
      for (int i = 0; i < ndim; i++) {
        long loffset = getRandomAccessFile().readLong();
        assert loffset < Integer.MAX_VALUE;
        offset[i] = (int) loffset;
      }
      this.filePos = last ? -1 : h5.readAddress(); //
      if (memTracker != null)
        memTracker.addByLen("Chunked Data (" + owner + ")", filePos, size);
    }

    public String toString() {
      StringBuilder sbuff = new StringBuilder();
      sbuff.append("  ChunkedDataNode size=").append(size).append(" filterMask=").append(filterMask).append(" filePos=")
          .append(filePos).append(" offsets= ");
      for (long anOffset : offset)
        sbuff.append(anOffset).append(" ");
      return sbuff.toString();
    }
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy