All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.dianahep.sparkroot.ast.ast.scala Maven / Gradle / Ivy

There is a newer version: 0.1.16
Show newest version
package org.dianahep.sparkroot

import org.dianahep.sparkroot.ast._
import org.dianahep.root4j.interfaces._
import org.dianahep.root4j.core._
import org.dianahep.root4j._
import org.apache.spark.sql._
import org.apache.spark.sql.types._

package object ast
{
  private val debug: Boolean = System.getProperty("debug") != null;

  /*
   * LeafInfo - simple TLeaf info
   * LeafElement - TLeafElement info
   */
  class LeafInfo(val name: String, val className: String, val nElements: Int)
  {
    override def toString = (name, className, nElements).toString
  }
  class LeafElementInfo(override val name: String, override val className: String,
    override val nElements: Int, val myTypeCode: Int) 
    extends LeafInfo(name, className,nElements)
  {
    override def toString = (name, className, nElements, myTypeCode).toString
  }

  /*
   * NodeInfo - simple Tbranch info
   * NodeElementInfo - TBranchElement info
   */
  class NodeInfo(val name: String, val title: String, val className: String,
    val myType: SRType)
  {
    override def toString = (name, title, className, myType).toString
  }
  class NodeElementInfo(override val name: String, override val title: String, 
    override val className: String, 
    override val myType: SRType, val parentName: String,
    val streamerTypeCode: Int, val myTypeCode: Int, val objClassName: String,
    val id: Int) 
    extends NodeInfo(name, title, className, myType)
  {
    override def toString = (name, title, className, myType, parentName, 
      streamerTypeCode,  myTypeCode, objClassName, id).toString
  }

  abstract class AbstractSchemaTree;
  case class RootNode(name: String, nodes: Seq[AbstractSchemaTree]) 
    extends AbstractSchemaTree;
  case class EmptyRootNode(val name: String, var entries: Long) extends AbstractSchemaTree;

  //  simple TBranch/TLeaf representations
  class Leaf(val info: LeafInfo) extends AbstractSchemaTree;
  case class TerminalNode(leaf: Leaf, info: NodeInfo, 
    iter: BasicBranchIterator) extends AbstractSchemaTree;
  case class TerminalMultiLeafNode(leaves: Seq[Leaf], info: NodeInfo,
    iter: StructBranchIterator) extends AbstractSchemaTree;
  case class Node(subnodes: Seq[AbstractSchemaTree], info: NodeInfo) 
    extends AbstractSchemaTree;

  //  TBranchElement/TLeafElement representations
  class LeafElement(override val info: LeafElementInfo) extends Leaf(info);

  // represents a splittable collection branch
  case class SplittableCollectionNodeElement(subnodes: Seq[AbstractSchemaTree], 
    leaf: LeafElement,
    info: NodeElementInfo) 
    extends AbstractSchemaTree;

  // represents a splittable object sitting in a branch
  case class SplittableObjectNodeElement(subnodes: Seq[AbstractSchemaTree],
    info: NodeElementInfo)
    extends AbstractSchemaTree;

  // represents a unsplittable collection branch: either nestedness>1 or 
  // a collection of simple types
  case class UnSplittableCollectionNodeElement(leaf: LeafElement,
    info: NodeElementInfo,
    streamer: TStreamerElement,
    iter: BranchIterator[Any])
    extends AbstractSchemaTree;

  // unsplittable object sitting in a branch - not a collection
  case class UnSplittableObjectNodeElement(leaf: LeafElement,
    info: NodeElementInfo,
    streamers: Seq[TStreamerElement], 
    iter: BranchIterator[Any]) extends AbstractSchemaTree;

  // a terminal node of the tree - not in collection of nestedness 1
  case class TerminalSimpleNodeElement(leaf: LeafElement,
    info: NodeElementInfo,
    iter: BranchIterator[Any]) extends AbstractSchemaTree;

  // a terminal node of the tree - identical to unsplittableObject
  // TODO: Do we need this guy???
  case class TerminalObjectNodeElement(leaf: LeafElement,
    info: NodeElementInfo, iter:BranchIterator[Any])
    extends AbstractSchemaTree;

  // Terminal Collection Member - for nestedness level of 1, members are split
  case class TerminalCollectionMemberNodeElement(leaf: LeafElement, info:NodeElementInfo,
    iter: BranchIterator[Any])
    extends AbstractSchemaTree;

  // Unknown guys
  case class UnknownNode(subnodes: Seq[AbstractSchemaTree], leaf: LeafElement,
    info: NodeElementInfo)
    extends AbstractSchemaTree;
  case class UnknownTerminalNode(leaf: Leaf, info: NodeElementInfo, iter: BranchIterator[Any]) 
    extends AbstractSchemaTree;

  /**
   * for simple branches - these are the ROOT Type/Codes => our internal type system
   * @return - return the DataType representing the code
   */
  def assignLeafTypeByLeafClass(leaf: TLeaf): SRType = 
    leaf.getRootClass.getClassName.last match
  {
    case 'C' => SRStringType
    case 'B' => SRByteType
    case 'b' => SRByteType
    case 'S' => SRShortType
    case 's' => SRShortType
    case 'I' => SRIntegerType
    case 'i' => SRIntegerType
    case 'F' => SRFloatType
    case 'D' => SRDoubleType
    case 'L' => SRLongType
    case 'l' => SRLongType
    case 'O' => SRBooleanType
    case _ => SRNull
  }

  /**
   * @return - Return the full Simple SR Data Type for this terminal branch
   */
  def assignBranchType(branch: TBranch): SRType = 
  {
    val leaves = branch.getLeaves
    if (leaves.size > 1) SRStructType(
      for (i <- 0 until leaves.size; leaf=leaves.get(i).asInstanceOf[TLeaf])
        yield (leaf.getName, assignLeafType(leaf))
    )
    else assignLeafType(leaves.get(0).asInstanceOf[TLeaf])
  }

  def assignLeafType(leaf: TLeaf): SRType = 
  {
    if (leaf.getArrayDim>0) // array
      SRArrayType(assignLeafTypeByLeafClass(leaf), leaf.getArrayDim)
    else
      assignLeafTypeByLeafClass(leaf)
  }

  /**
   * @return prints the Abstractly Typed Tree
   */
  def printATT(att: core.SRType, level: Int = 0, sep: String = "  "): Unit = att match {
    case core.SRNull => println(sep*level+"Null")
    case core.SRRoot(name, entries, types) => {
      println(s"Root: $name wtih $entries Entries")
      for (t <- types) printATT(t, level+1)
    }
    case core.SREmptyRoot(name, entries) =>
      println(s"Empty Root: $name with $entries Entries")
    case core.SRInt(name, _, _) => println(sep*level+s"$name: Integer")
    case core.SRString(name, _, _) => println(sep*level+s"$name: String")
    case core.SRLong(name, _, _) => println(sep*level+s"$name: Long")
    case core.SRDouble(name, _, _) => println(sep*level+s"$name: Double")
    case core.SRByte(name, _, _) => println(sep*level+s"$name: Byte")
    case core.SRBoolean(name, _, _) => println(sep*level+s"$name: Boolean")
    case core.SRFloat(name, _, _) => println(sep*level+s"$name: Float")
    case core.SRShort(name, _, _) => println(sep*level + s"$name: Short")
    case core.SRArray(name, _, _, t, n) => {
      println(sep*level + s"$name: Array[$n]")
      printATT(t, level+1)
    }
    case core.SRVector(name, _, t, split, isTop) => {
      println(sep*level + s"$name: STL Vector. split=$split and isTop=$isTop")
      printATT(t, level+1)
    }
    case core.SRMap(name, _, keyType, valueType, split, isTop) => {
      println(sep*level + s"$name: Map ${keyType.name} => ${valueType.name}. split=$split and isTop=$isTop")
      println(sep*(level+1) + "Key Type:")
      printATT(keyType, level+2)
      println(sep*(level+1) + "Value Type:")
      printATT(valueType, level+2)
    }
    case core.SRSTLString(name, _, isTop) => {
      println(sep*level + s"$name: STL String isTop=$isTop")
    }
    case core.SRComposite(name, b, members, split, isTop, isBase) => {
      println(sep*level + s"${name}: Composite split=$split isTop=$isTop isBase=$isBase")
      for (t <- members) printATT(t, level+1)
    }
    case _ => println("")
  }

  def buildSparkSchema(att: core.SRType) = att.toSparkType.asInstanceOf[StructType]
  def readSparkRow(att: core.SRType): Row = att.read.asInstanceOf[Row]
  def containsNext(att: core.SRType) = att.hasNext

  /**
   * Build ATT 
   *
   * @return ATT
   */
  def buildATT(
    tree: TTree,
    streamers: Map[String, TStreamerInfo],
    requiredColumns: Array[String]
  ): core.SRType = {

    def synthesizeLeafType(b: TBranch, leaf: TLeaf): core.SRType = 
      leaf.getRootClass.getClassName.last match {
        case 'C' => core.SRString(leaf.getName, b, leaf)
        case 'B' => core.SRByte(leaf.getName, b, leaf)
        case 'b' => core.SRByte(leaf.getName, b, leaf)
        case 'S' => core.SRShort(leaf.getName, b, leaf)
        case 's' => core.SRShort(leaf.getName, b, leaf)
        case 'I' => core.SRInt(leaf.getName, b, leaf)
        case 'i' => core.SRInt(leaf.getName, b, leaf)
        case 'F' => core.SRFloat(leaf.getName, b, leaf)
        case 'D' => core.SRDouble(leaf.getName, b, leaf)
        case 'L' => core.SRLong(leaf.getName, b, leaf)
        case 'l' => core.SRLong(leaf.getName, b, leaf)
        case 'O' => core.SRBoolean(leaf.getName, b, leaf)
        case _ => core.SRNull
    }

    def synthesizeLeaf(b: TBranch, leaf: TLeaf): core.SRType = {
      def iterate(dimsToGo: Int): core.SRType =
        if (dimsToGo==1) core.SRArray(leaf.getName, b, leaf, synthesizeLeafType(b, leaf), 
          leaf.getMaxIndex()(leaf.getArrayDim-1))
        else
          core.SRArray(leaf.getName, b, leaf, iterate(dimsToGo-1), leaf.getMaxIndex()(
            leaf.getArrayDim-dimsToGo))

      if (leaf.isInstanceOf[TLeafElement])
        // leafElement
        synthesizeLeafElement(b, leaf.asInstanceOf[TLeafElement])
      else {
        // leaf
        if (leaf.getArrayDim==0)
          synthesizeLeafType(b, leaf)
        else
          iterate(leaf.getArrayDim)
      }
    }

    def synthesizeLeafElement(b: TBranch, leaf: TLeafElement): core.SRType = {
      return core.SRNull;
    }

    /**
     * top branch is special
     */
    def synthesizeTopBranch(b: TBranch): core.SRType = {
      if (b.isInstanceOf[TBranchElement]) {
        val be = b.asInstanceOf[TBranchElement]
        val streamerInfo = streamers.applyOrElse(be.getClassName,
          (x: String) => null)
        if (streamerInfo==null) 
          // a splitted vector doesn't show up in the TStreamerInfo
          // start to analyze the class name
          synthesizeClassName(be.getClassName, be, core.SRRootType)
        else
          // majority should be present
          synthesizeStreamerInfo(be, streamerInfo, null, core.SRRootType)
      }
      else { // simple TBranch case
        val leaves = b.getLeaves
        if (leaves.size>1) // multi leaf branch
          new core.SRComposite(b.getName, b, 
            for (i <- 0 until leaves.size) yield synthesizeLeaf(b, 
              leaves.get(i).asInstanceOf[TLeaf]), true, true)
        else  // a single leaf branch
          synthesizeLeaf(b, leaves.get(0).asInstanceOf[TLeaf])
      }
    }
    
    /*
     * for the case when we have a basic type nested - 
     * it doesn't need to have a name or branch...
     */
    def synthesizeBasicStreamerType(typeCode: Int): core.SRType = typeCode match {
      case 1 => core.SRByte("", null, null)
      case 2 => core.SRShort("", null, null)
      case 3 => core.SRInt("", null, null)
      case 4 => core.SRLong("", null, null)
      case 5 => core.SRFloat("", null, null)
      case 6 => core.SRInt("", null, null)
      case 7 => core.SRString("", null, null)
      case 8 => core.SRDouble("", null, null)
      case 9 => core.SRFloat("", null, null)
      case 10 => core.SRByte("", null, null)
      case 11 => core.SRByte("", null, null)
      case 12 => core.SRShort("", null, null)
      case 13 => core.SRInt("", null, null)
      case 14 => core.SRLong("", null, null)
      case 15 => core.SRInt("", null, null)
      case 16 => core.SRLong("", null, null)
      case 17 => core.SRLong("", null, null)
      case 18 => core.SRBoolean("", null, null)
      case 19 => core.SRShort("", null, null)
      case _ => core.SRNull
    }

    /*
     * Format the name if it's a pointer to the class name
     */
    def formatNameForPointer(className: String) = 
      if (className.last=='*') className.take(className.length-1)
      else className

    def synthesizeStreamerElement(
      b: TBranchElement, 
      streamerElement: TStreamerElement,
      parentType: core.SRTypeTag
      ): core.SRType = {

      // when you have an array of something simple kOffsetL by ROOT convention  
      def iterateArray(dimsToGo: Int): core.SRType = 
        if (dimsToGo==1) core.SRArray(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement], 
          synthesizeBasicStreamerType(streamerElement.getType-20),
          streamerElement.getMaxIndex()(streamerElement.getArrayDim-1))
      else
        core.SRArray(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement], 
          iterateArray(dimsToGo-1), streamerElement.getMaxIndex()(
            streamerElement.getArrayDim-dimsToGo))

      streamerElement.getType match {
        case 0 => { // BASE CLASS
          // assume for now that the inheritance is from composite classes
          // NOTE: get the name instead of type name for the super class
          val streamerInfo = streamers.applyOrElse(streamerElement.getName,
          (x: String) => null)
          if (streamerInfo==null) core.SRUnknown(streamerElement.getName)
          else {
            // in principle there must be the TStreamerInfo for all the bases 
            // used
            if (debug) println(s"There is a class name for: ${streamerElement.getName}")
            if (streamerInfo.getElements.size==0) 
              // empty BASE CLASS, create an empty composite
              // splitting does not matter - it's empty
            // BUT it's the base class!!!
              core.SRComposite(streamerElement.getName, b, Seq(), false, false, true)
            else
              synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
          }
        }
        case 1 => core.SRByte(streamerElement.getName, b, 
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 2 => core.SRShort(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 3 => core.SRInt(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 4 => core.SRLong(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 5 => core.SRFloat(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 6 => core.SRInt(streamerElement.getName, b, 
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 7 => core.SRString(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 8 => core.SRDouble(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 9 => core.SRFloat(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 10 => core.SRByte(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 11 => core.SRByte(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 12 => core.SRShort(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 13 => core.SRInt(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 14 => core.SRLong(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 15 => core.SRInt(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 16 => core.SRLong(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 17 => core.SRLong(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 18 => core.SRBoolean(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 19 => core.SRShort(streamerElement.getName, b, 
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case it if 21 until 40 contains it => iterateArray(streamerElement.getArrayDim)
        case 61 => {
          // NOTE: get the type name
          val streamerInfo = streamers.applyOrElse(
            formatNameForPointer(streamerElement.getTypeName),
          (x: String) => null)
          if (streamerInfo==null) core.SRUnknown(streamerElement.getName)
          else synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
        }
        case 62 => {
          // NOTE: get the typename
          val streamerInfo = streamers.applyOrElse(
            formatNameForPointer(streamerElement.getTypeName),
            (x: String) => null)
          if (streamerInfo==null) core.SRUnknown(streamerElement.getName)
          else synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
        }

        // TODO: Retrieiving of TObject derived classes is not supported yet
        // TObject
        case 66 => {
          // NOTE: get the typename
          val streamerInfo = streamers.applyOrElse(streamerElement.getName,
            (x: String) => null)
          if (streamerInfo==null) core.SRUnknown(streamerElement.getName)
          else synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
        }
        case 67 => {
          // NOTE: get the typename
          val streamerInfo = streamers.applyOrElse(streamerElement.getName,
            (x: String) => null)
          if (streamerInfo==null) core.SRUnknown(streamerElement.getName)
          else synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
        }
        // TString
        case 65 => core.SRString(streamerElement.getName, b,
          if (b==null) null
          else b.getLeaves.get(0).asInstanceOf[TLeafElement])
        case 500 => synthesizeStreamerSTL(b, streamerElement.asInstanceOf[TStreamerSTL],
          parentType)
        case 69 => {
          if (debug) {
            println(s"typeName=${streamerElement.getTypeName} name=${streamerElement.getName} strippedName=${streamerElement.getTypeName.take(streamerElement.getTypeName.length-1)}")
          }
          // this is a pointer
          val streamerInfo = streamers.applyOrElse(
            formatNameForPointer(streamerElement.getTypeName),
            (x: String) => null)
          if (streamerInfo == null) core.SRUnknown(streamerElement.getName)
          else synthesizeStreamerInfo(b, streamerInfo, streamerElement, parentType)
        }
        case _ => core.SRUnknown("unidentified STreamerElement type")
      }
    }

    /**
     * @return the full type definition for a basic type
     *
     * Array is also a basic type - leave it out for now
     */
    def synthesizeBasicTypeName(
      typeName: String // basic type name
    ): core.SRType = {

      typeName match {
        case "int" => core.SRInt("", null, null)
        case "float" => core.SRFloat("", null, null)
        case "double" => core.SRDouble("", null, null)
        case "char" => core.SRByte("", null, null)
        case "long" => core.SRLong("", null, null)
        case "short" => core.SRShort("", null, null)
        case "bool" => core.SRBoolean("", null, null)
        case "unsigned int" => core.SRInt("", null, null)
        case "unsigned char" => core.SRByte("", null, null)
        case "unsigned long" => core.SRLong("", null, null)
        case "unsigned short" => core.SRShort("", null, null)

        // ROOT ones ending with t
        case "Double32_t" => core.SRFloat("", null, null)
        case _ => core.SRNull
      }
    }

    /**
     * @return the full type definition from known types
     *
     * assumptions: no TStreamerInfo for the class itself
     * 1. Either this is contained within some STL and should be STL or pair itself
     * 2. This is STL for top branch or std::pair
     *
     * assume for now that this is one of the following:
     * 1) STL Collection
     * 2) std::array ???
     * 3) pair
     * 4) enum
     *
     * we basically assume that if something is not present, it must be something of STL
     */
    def synthesizeClassName(
      className: String, // c++ standard class type declaration (w/ spaces for templ.)
      b: TBranchElement, // if the branch is split, we still need it
      parentType: core.SRTypeTag // the tag for what our parent is
    ): core.SRType = {
      val stlLinear = Seq("vector", "list", "deque", "set", "multiset",
        "forward_list", "unordered_set", "unordered_multiset")
      val stlAssociative = Seq("map", "unordered_map", "multimap", "unordered_multimap")
      val stlPair = "pair"
      val stlBitset = "bitset"
      val stlStrings = Seq("string", "__basic_string_common")
      
      // quickly parse the class type and template argumetns
      val classTypeRE = "(.*?)<(.*?)>".r
      if (debug) println(s"classType being synthesized: ${className} ${className.trim.length} ${className.length}")
      val (classTypeString, argumentsTypeString) = className match {
        case classTypeRE(aaa,bbb) => (aaa,bbb.trim)
        case _ => (null, null)
      }

      if (debug) println(s"Parsed classType=$classTypeString and argumentsType=$argumentsTypeString")

      // check if this is a string
      // and return if it is
      if (stlStrings contains className) {
        return if (b == null) 
          parentType match {
            case core.SRCollectionType => core.SRSTLString("", null, false)
            case _ => core.SRSTLString("", null, true)
          }
        else 
          parentType match {
            case core.SRCollectionType => core.SRSTLString(b.getName, b, false)
            case _ => core.SRSTLString(b.getName, b, true)
          }
      }

      // 
      // check if it's among custom streamers
      //
      val isCustom = customStreamers.applyOrElse(className,
        (x: String) => core.SRNull)
      if (isCustom != core.SRNull) return isCustom

      // if parsing is unsuccessful, assign null
      if (classTypeString == null || argumentsTypeString == null)
        return core.SRUnknown(className)

      // based on the class type name
      classTypeString match {
        case it if stlBitset == it => {
          // remap this guy to vector of bool
          synthesizeClassName("vector", b, parentType)
        }
        case it if stlLinear contains it => {
          // we have something that is vector like
          // arguments must be a single typename
          // 1. check if it's a basic type name
          val streamerInfo = streamers.applyOrElse(argumentsTypeString,
            (x: String) => null)
          val valueType = 
            if (streamerInfo == null) {
              // no streamer info for value type
              // is it a basic type
              // else synthesize the name again
              val basicType = synthesizeBasicTypeName(argumentsTypeString)
              if (basicType == core.SRNull)
                // not a basic type
                // can not be composite class - must have a TStreamerInfo
                // should be some STL - nested => no subbranching
                synthesizeClassName(argumentsTypeString, null,
                  core.SRCollectionType) 
              else basicType
            }
            else 
              // there is a TStreamerInfo
              // NOTE: this applies to a top STL node as well for which b != null
              synthesizeStreamerInfo(b, streamerInfo, null, core.SRCollectionType)

          // TODO: we need to do each collection separately???
          // that is only the case when we have version for each STL separately read in
          if (b==null) 
            parentType match {
              // this is not the top collection
              case core.SRCollectionType => core.SRVector("", b, valueType,false, false)
              // this is the top collection
              case _ => core.SRVector("", b, valueType, false, true)
            }
          else
            parentType match {
              // branch is not null for the vector
              // parent is collection - could not happen in principle...
              case core.SRCollectionType => core.SRVector(b.getName, b, valueType, false, false)
              // top STL node will be here...
              case _ => core.SRVector(b.getName, b, valueType,
                if (b.getBranches.size==0) false else true, true)
            }
        }
        case it if stlAssociative contains it => {
          // we have something that is map like
          // extract the key/value type names
          println(s"Synthesizing the current class arguments: ${classTypeString}")
          val mapTemplateRE = "(.*?),(.*?)".r
          val (keyTypeString, valueTypeString) = argumentsTypeString match {
            case mapTemplateRE(aaa,bbb) => (aaa,bbb)
            case _ => (null, null)
          }

          // if there is a matching issue - assign null
          if (keyTypeString==null || valueTypeString==null) return core.SRNull

          // check first if it is a basic type
          // if not send to be synthesized
          // TODO: For now, asssume either custom streamers or basic type
          // therefore we send it with null for the branch!
          val keyType = 
            if (synthesizeBasicTypeName(keyTypeString) == core.SRNull) 
              synthesizeClassName(keyTypeString, null, core.SRCollectionType)
            else
              synthesizeBasicTypeName(keyTypeString)

          // value type
          val streamerInfo = streamers.applyOrElse(valueTypeString,
            (x: String) => null)
          val valueType =
            if (streamerInfo == null) {
              // no streamer info
              // is basic type
              // else synthesize the name again
              val basicType = synthesizeBasicTypeName(valueTypeString)
              if (basicType == core.SRNull)
                // not a basic type
                synthesizeClassName(valueTypeString, null,
                  core.SRCollectionType) 
              else basicType
            }
            else {
              // there is a TStreamerInfo
              synthesizeStreamerInfo(b, streamerInfo, null,
                core.SRCollectionType)
            }

          // TODO: we need to do each collection separately???
          // that is only the case when we have version for each STL separately read in
          if (b==null) 
            parentType match {
              // this is not the top collection
              case core.SRCollectionType => new core.SRMap("", b, keyType,
                valueType, false, false)
              // this is the top collection
              case _ => new core.SRMap("", b, keyType, valueType, false, true)
            }
          else
            parentType match {
              case core.SRCollectionType => new core.SRMap(b.getName, b, keyType,
                valueType, false, false)
              // if there are multiple sub branches of this guy - it's split
              case _ => new core.SRMap(b.getName, b, keyType, valueType,
                if (b.getBranches.size==0) false else true, true)
            }
        }
        case it if it == stlPair => {
          // pair is considered to be the composite
          val pairTemplateRE = "(.*?),(.*?)".r
          val (firstTypeString, secondTypeString) = argumentsTypeString match {
            case pairTemplateRE(aaa,bbb) => (aaa,bbb)
            case _ => (null, null)
          }

          if (debug) println(s"We got a pair: first=$firstTypeString second=$secondTypeString")

          // if there is a matching issue - assign null
          if (firstTypeString==null || secondTypeString==null) return core.SRNull

          //  streamer info for first/second
          val streamerInfoFirst = streamers.applyOrElse(firstTypeString,
            (x: String) => null)
          val streamerInfoSecond = streamers.applyOrElse(secondTypeString,
            (x: String) => null)

          // get the type for first
          val firstType =
            if (streamerInfoFirst == null) {
              // no streamer info
              // is basic type
              // else synthesize the name again
              val basicType = synthesizeBasicTypeName(firstTypeString)
              if (basicType == core.SRNull)
                // not a basic type
                synthesizeClassName(firstTypeString, 
                  if (b==null) null
                  else if (b.getBranches.size==0) null
                  else b.getBranches.get(0).asInstanceOf[TBranchElement],
                  core.SRCompositeType) 
              else basicType
            }
            else {
              // there is a TStreamerInfo
              synthesizeStreamerInfo(null, streamerInfoFirst, null,
                core.SRCompositeType)
            }

          // get the type for second
          val secondType =
            if (streamerInfoSecond == null) {
              // no streamer info
              // is basic type
              // else synthesize the name again
              val basicType = synthesizeBasicTypeName(secondTypeString)
              if (basicType == core.SRNull)
                // not a basic type
                synthesizeClassName(secondTypeString,
                  if (b==null) null
                  else if (b.getBranches.size==0) null
                  else b.getBranches.get(1).asInstanceOf[TBranchElement],
                  core.SRCompositeType) 
              else basicType
            }
            else {
              // there is a TStreamerInfo
              synthesizeStreamerInfo(null, streamerInfoSecond, null,
                core.SRCompositeType)
            }

          // TODO: Do we need a special type for pair???
          // TODO: Can pair be split???
          if (b==null) 
            parentType match {
              // branch is null for this type
              // parent is collection
              case core.SRCollectionType => core.SRComposite("",
                b, Seq(firstType, secondType), false, false)
              // parent is not a collection
              case _ => core.SRComposite("", b, Seq(firstType, secondType), false, false)
            }
          else
            parentType match {
              // parent is collection
              // there is a branch for Collection
              case core.SRCollectionType => core.SRComposite(b.getName, b,
                Seq(firstType, secondType), 
                if (b.getBranches.size==0) false else true, false)
              case core.SRRootType => core.SRComposite(b.getName, b,
                Seq(firstType, secondType),
                if (b.getBranches.size==0) false else true, true)
              case _ => core.SRComposite(b.getName, b, Seq(firstType, secondType),
                if (b.getBranches.size==0) false else true, false)
            }
        }
        case _ => core.SRNull
      }
    }

    /**
     * @return the SparkROOT type for the STL collection
     */
    def synthesizeStreamerSTL(
      b: TBranchElement,
      streamerSTL: TStreamerSTL,
      parentType: core.SRTypeTag
    ): core.SRType = {
      // debugging...
      if (debug) println(s"TStreamer STL for type: ${streamerSTL.getTypeName}")

      // parse by the stl type
      streamerSTL.getSTLtype match {
        case 1 => { // std::vector

          // probe the member type
          val ctype = streamerSTL.getCtype
          val t = 
            if (ctype<61) {
              if (streamerSTL.getTypeName.contains("bool") && 
                ctype == 21)
                // BOOl shows up as ctype 21 - TODO: this needs to be fixed!
                // synthesize this as 18
                synthesizeBasicStreamerType(18)
              else 
                // this is some basic type - synthesize
                synthesizeBasicStreamerType(ctype)
            }
            else {
              val memberClassName = streamerSTL.getTypeName.slice(
                streamerSTL.getTypeName.
                indexOf('<')+1, streamerSTL.getTypeName.length-1).trim
              val streamerInfo = streamers.applyOrElse(memberClassName, 
                (x: String) => null)


              if (streamerInfo==null) 
                // no streamer info 
                // right away - for nested STL no splitting!
                synthesizeClassName(memberClassName, null, core.SRCollectionType)
              else synthesizeStreamerInfo(
                if (b==null) null
                else if (b.getBranches.size==0) null
                else b
                , streamerInfo, 
                null, core.SRCollectionType)
            }

          if (b==null) 
            // nested vector or a vector does not have a separate branch
            parentType match {
              // this is not the top collection
              case core.SRCollectionType => core.SRVector("", b, t ,false, false)
              // this is the top collection
              case _ => core.SRVector(streamerSTL.getName, b, t, false, true)
            }
          else
            parentType match {
              case core.SRCollectionType => core.SRVector(b.getName, b, t, false, false)
              // if there are multiple sub branches of this guy - it's split
              case _ => core.SRVector(b.getName, b, t,
                if (b.getBranches.size==0) false else true, true)
            }
        }
        case 4 => { // std::map
          synthesizeClassName(streamerSTL.getTypeName,
            b, parentType)
          /*
          // probe the key/value types
          val keyValueTypeNames = streamerSTL.getTypeName.slice(
            streamerSTL.getTypeName.indexOf('<')+1, streamerSTL.getTypeName.length-1)

          if (debug>0) println(s"KeyValueTypeNames: ${keyValueTypeNames}")
          // check if the pair of  is present in TStreamerInfo
          val streamerInfo = streamers.applyOrElse(s"pair<${keyValueTypeNames}>",
            (x: String) => null)
          if (streamerInfo != null) {
            // there is a streamer info for the pair
            // extract it and use that to identify the member type
            val keyValueTypes = synthesizeStreamerInfo(
              if (b==null) null
              else if (b.getBranches.size==0) null
              else b,
              streamerInfo, null, core.SRCollectionType).asInstanceOf[core.SRComposite]

            // create a map
            if (b==null)
              // nested vector or a vector does not have a separate branch
              parentType match {
                // this is not the Top Level Collection
                case core.SRCollectionType => new core.SRMap("",b,keyValueTypes,
                  false, false)
                // this is the top level collection
                case _ => new core.SRMap(streamerSTL.getName, b, keyValueTypes, false, true)
              }
            else 
              parentType match {
                case core.SRCollectionType => new core.SRMap(b.getName, b, keyValueTypes,
                  false, false)
                case _ => new core.SRMap(b.getName, b, keyValueTypes, 
                  if (b.getBranches.size==0) false else true, true)
              }
          }
          else {
            // pair is not in the TStreamerInfo
            // we build the pair ourselves and send it to be synthesized by
            // class name... TODO: Is extracting the key/value types explicitly better?
            //
            // TODO: This assumes that there is no splitting of the branch
            // synthesis of pair's class name doesn't accomodate splitting of branch
            // synthesis of pair's TStreamerInfo will - since it's a composite
            val keyValueTypes = synthesizeClassName(
              // note the keyValueTypeNames has not been trimmed - c++ standard type
              s"pair<${keyValueTypeNames}>", b, core.SRCollectionType
            ).asInstanceOf[core.SRComposite]
            // create a map
            if (b==null)
              // nested vector or a vector does not have a separate branch
              parentType match {
                // this is not the Top Level Collection
                case core.SRCollectionType => new core.SRMap("",b,keyValueTypes,
                  false, false)
                // this is the top level collection
                case _ => new core.SRMap(streamerSTL.getName, b, keyValueTypes, false, true)
              }
            else parentType match {
              case core.SRCollectionType => new core.SRMap(b.getName, b, keyValueTypes,
                false, false)
              case _ => new core.SRMap(b.getName, b, keyValueTypes, 
                if (b.getBranches.size==0) false else true, true)
            }
          }
          */
        }
        case 8 => {
          // std::bitset - map it to vector of bool
          synthesizeClassName("vector", b, parentType)
        }
        case 365 => { // std::string
          if (b == null) 
            parentType match {
              case core.SRCollectionType => core.SRSTLString("", null, false)
              case _ => core.SRSTLString(streamerSTL.getName, null, true)
            }
          else 
            parentType match {
              case core.SRCollectionType => core.SRSTLString(b.getName, b, false)
              case _ => core.SRSTLString(streamerSTL.getName, b, true)
            }
        }
        case _ => core.SRNull
      }
    }

    def synthesizeStreamerInfo(
      b: TBranchElement, 
      streamerInfo: TStreamerInfo,
      streamerElement: TStreamerElement, 
      parentType: core.SRTypeTag,
      flattenable: Boolean = false // is this branch flattenable
    ): core.SRType = {
      def shuffleStreamerInfo(sinfo: TStreamerInfo) = {
        val elems = sinfo.getElements
        val bases = 
          for (i <- 0 until elems.size; se=elems.get(i).asInstanceOf[TStreamerElement]
            if se.getType==0) 
            yield se
        val rest = 
          for (i <- 0 until elems.size; se=elems.get(i).asInstanceOf[TStreamerElement]
            if se.getType != 0) 
            yield se
        rest ++ bases
      }

      val elements = streamerInfo.getElements
      if (elements.size==0) // that is some empty class
        core.SRComposite(streamerElement.getName, b, Seq(), false, false, 
          streamerElement.getType==0)
      else if (elements.get(0).asInstanceOf[TStreamerElement].getName=="This") 
        synthesizeStreamerElement(b, elements.get(0).asInstanceOf[TStreamerElement],
          parentType)
      else if (streamerInfo.getName == "TClonesArray") {
        if (b == null) {
          // only for clone that occupy a branch.
          core.SRNull
        }else {
          // get the name of the object in the TClonesArray
          val typeName = b.getClonesName
          // create a name to be synthesized - just map to vector
          val nameToSynthesize = s"vector<$typeName>"
          // send a vector to be synthesized
          // this will eventually call back to synthesizeStreamerInfo
          // and get properly unwrapped
          synthesizeClassName(nameToSynthesize, b, parentType)
        }
      }
      else {
        if (b==null) {
          core.SRComposite(
            if (streamerElement==null) "" else streamerElement.getName
            , null,
            for (i <- 0 until elements.size)
              yield synthesizeStreamerElement(null, 
                elements.get(i).asInstanceOf[TStreamerElement], core.SRCompositeType),
            false, false, 
            if (streamerElement == null)  false
            else if (streamerElement.getType==0)true 
            else false
          )
        }
        else if (b.getBranches.size==0) {
          // unsplittable branch
          // members do not need the branch for reading
          // buffer will be passed to them
          core.SRComposite(b.getName, b,
            for (i <- 0 until elements.size) 
              yield synthesizeStreamerElement(null, 
                elements.get(i).asInstanceOf[TStreamerElement], core.SRCompositeType),
            false,
            parentType match {
              case core.SRRootType => true
              case _ => false
            }
          )
        }
        else 
          // splittable
          // can be flattenable or not flattenable
          if (b.getType==1 || b.getType==2 || b.getType==3 || b.getType==4) {
            // this is either a BASE/Object inside some leaf
            // or an STL Collection or TClonesArray
            synthesizeFlattenable(b, streamerInfo)
          }
          else {
            // non-flattenable branch
            core.SRComposite(
              b.getName, b,
              for (i <- 0 until b.getBranches.size; 
                sub=b.getBranches.get(i).asInstanceOf[TBranchElement])
                yield synthesizeStreamerElement(sub, 
                elements.get(sub.getID).asInstanceOf[TStreamerElement], 
                core.SRCompositeType),
              true,
              parentType match {
                case core.SRRootType => true
                case _ => false
              }
            )
          }
      }
    }

    /**
     * Synthesize a branch whose sub branches are flattened
     * @return SRType
     */
    def synthesizeFlattenable(
      b: TBranchElement, // branch whose subs are flattened
      streamerInfo: TStreamerInfo // streamer Info of this branch
    ) = {
      def findBranch(objectName: String, 
        history: Seq[String]
      ): TBranchElement = {
        // build the full name of the branch
        val fullName = 
          if (b.getType==1) 
            if (b.getName.count(_ == '.') == 0)
              // no . seps
              (history ++ Seq(objectName)).mkString(".")
            else
              // there are dots
              (b.getName.split('.').dropRight(1) ++ (history ++ Seq(objectName))).
              mkString(".")
          else
            if (b.getName.count(_ == '.') == 0)
              // no . seps
              (Seq(b.getName) ++ (history ++ Seq(objectName))).mkString(".")
            else
              (b.getName.split('.') ++ (history ++ Seq(objectName))).mkString(".")

        // debug 
        if (debug){
          println(s"History: $history")
          println(s"object Name: $objectName")
          println(s"fullName: $fullName")
        }

        // iterate over all of them and take the head of the result
        (for (i <- 0 until b.getBranches.size; 
          sub=b.getBranches.get(i).asInstanceOf[TBranchElement]; subName= {
            // when we have arrays, the square brackets are reflected in the name
            // of the branch - strip them!
            if (sub.getName.indexOf('[')>0) 
              sub.getName.substring(0, sub.getName.indexOf('['))
            else 
              sub.getName
          }
          if subName == fullName) yield sub).head
      }

      def iterate(info: TStreamerInfo, history: Seq[String]): Seq[core.SRType] = {
        // right away we have composite
        for (i <- 0 until info.getElements.size; 
          streamerElement=info.getElements.get(i).asInstanceOf[TStreamerElement]
          // skip if TObject is -1 => don't create a composite for it
          if streamerElement.getType >= 0) yield {
          if (debug) {
            println(s"StreamerElement: type=${streamerElement.getType} name=${streamerElement.getName} typeName=${streamerElement.getTypeName}")
          }

          val ttt = streamerElement.getType
          if (ttt == 0) { 
            // this is the BASE class
            if (b.getType==4 || b.getType==3) {
              // STL node - everything is flattened
              // TClonesArray node - everything is flattened

              // find the streamer
              val sInfo = streamers.applyOrElse(streamerElement.getName,
                (x: String) => null)
              // create a composite and recursively iterate the sub branches
              core.SRComposite(streamerElement.getName, null,
                iterate(sInfo, history), true, false)
            }
            else {
              // not an STL node

              // find the sub branch for this
              val sub = findBranch(streamerElement.getName, history)
              // find the TStreamerInfo for this object
              val sInfo = streamers.applyOrElse(streamerElement.getName,
                (x: String) => null)
              // synthesize this guy
              synthesizeStreamerInfo(sub, sInfo, streamerElement, 
                core.SRCompositeType, true)
            }
          }
          else if (ttt < 61 || ttt == 500) {
            // basic type or anything that is of STL type goes into 
            // element synthesis

            // find the branch for it!
            val sub = findBranch(streamerElement.getName, history)
            // send for synthesis
            synthesizeStreamerElement(sub, streamerElement,
              core.SRCompositeType)
          }
          else {
            // this typically would be some composite class
            // which we send recursively to itearte over again if it's not split
            // or we get the streamerInfo and send to synthesize

            // find the streamer 
            val sInfo = streamers.applyOrElse(
                formatNameForPointer(streamerElement.getTypeName),
              (x: String) => null)

            // if there is no sInfo
            if (sInfo == null) core.SRUnknown(streamerElement.getName)
            else {

              // TODO: This try/catch is not the best
              // create the composite and recursively iterate over all the members
              // if it fails => throws an exception, check then
              // that this branch of type 2 is not actually flattenend
              try {
                core.SRComposite(streamerElement.getName, null,
                  iterate(sInfo, history :+ streamerElement.getName), true, false
                )
              } catch {
                case _ : Throwable => {
                  val sub = findBranch(streamerElement.getName, history)
                  synthesizeStreamerInfo(sub, sInfo, streamerElement,
                    core.SRCompositeType, false)
                }
              }
            }
          }
        }
      }

      if (debug) println(s"Starting synthesize of Flattenable branch: ${b.getName}")

      // create a composite by iterating recursively over the members
      core.SRComposite(b.getName, b,
        iterate(streamerInfo, Seq()), true, false)
    }

    /**
     * Map the branch => SRType
     */
    def synthesizeBranchElement(b: TBranchElement, // top branch or sub
      streamerElement: TStreamerElement, // streamer Element for a subbranch
      parentType: core.SRTypeTag
      ): core.SRType = {
      val subs = b.getBranches
      if (streamerElement==null) {
        // top branch
        core.SRNull // should not be the case
      }
      else synthesizeStreamerElement(b, streamerElement, parentType)
    }

    requiredColumns match {
      // for the initialization stage - all the columns to be mapped
      case null => new core.SRRoot(tree.getName,
        tree.getEntries,
        for (i <- 0 until tree.getNBranches; b=tree.getBranch(i))
          yield synthesizeTopBranch(b)
      )
      // for the cases like count.... 
      case Array() => new core.SREmptyRoot(tree.getName, tree.getEntries)
      // for the non-empty list of columns that are required by for a query
      case _ => new core.SRRoot(tree.getName, tree.getEntries,
        for (i <- 0 until tree.getNBranches; b=tree.getBranch(i) 
          if requiredColumns.contains(b.getName().replace(".", "_")))
          yield synthesizeTopBranch(b)
      )
    }
  }

  /**
   * @return - returns the AbstractSchemaTree RootNode
   */
  def buildAST(tree: TTree, 
    streamers: Map[String, TStreamerInfo], // map of streamers
    requiredColumns: Array[String] // list of column names that must be preserved
    ): AbstractSchemaTree = 
  {
    def synthesizeBranch(branch: TBranch): AbstractSchemaTree = 
    {
      val subs = branch.getBranches
      if (subs.size>0)
      {
        //  
        // complex node
        // TODO: Do we have these cases or TBranch has only Leaves???
        //
        null
      }
      else
      {
        //
        //  simple node - assume 1 leaf only for now
        //  1. extract the information you need(name ,title, classname, datatype)
        //  2. Assign the iterator
        //
        val mytype = assignBranchType(branch)
        val leaves = branch.getLeaves
        if (leaves.size==1)
        {
          val leaf = leaves.get(0).asInstanceOf[TLeaf]
          new TerminalNode(new Leaf(new LeafInfo(
            leaf.getName, leaf.getRootClass.getClassName, leaf.getLen
            )), new NodeInfo(
              branch.getName, branch.getTitle, branch.getRootClass.getClassName, mytype
            ), mytype.getIterator(branch).asInstanceOf[BasicBranchIterator])
        }
        else
          new TerminalMultiLeafNode(
            for (i <- 0 until leaves.size; l=leaves.get(i).asInstanceOf[TLeaf]) yield
              new Leaf(new LeafInfo( l.getName, l.getRootClass.getClassName, l.getLen
              )), new NodeInfo(
                branch.getName, branch.getTitle, branch.getRootClass.getClassName,
                mytype
              ), mytype.getIterator(branch).asInstanceOf[StructBranchIterator]
          )
      }
    }

    // synthesize the BranchElement like branches
    def synthesizeBranchElement(branchElement: TBranchElement): AbstractSchemaTree = 
    {
      val subs = branchElement.getBranches
      branchElement.getType match {
        case -1 => // TODO: mark as unknown for now
          if (subs.size==0) {
            val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
            new UnknownTerminalNode(
              new LeafElement(new LeafElementInfo(leaf.getName,
                leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
              new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                branchElement.getRootClass.getClassName, SRNull,
                branchElement.getParentName, branchElement.getStreamerType,
                branchElement.getType, branchElement.getClassName,
                branchElement.getID),
              null
            )
          }
          else {
            val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
            new UnknownNode(
              for (i <- 0 until subs.size; sub=subs.get(i).asInstanceOf[TBranchElement])
                yield synthesizeBranchElement(sub),
              new LeafElement(new LeafElementInfo(leaf.getName,
                leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
              new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                branchElement.getRootClass.getClassName, SRNull,
                branchElement.getParentName, branchElement.getStreamerType,
                branchElement.getType, branchElement.getClassName, 
                branchElement.getID)
            )
          }
        case 0 => { // LeafNode
          if (branchElement.getID<0) // unsplit object with default streamer at wt
            // type=0 and id<0 - unsplit object with default streamer at the time
            // of writting
            if (subs.size==0) 
            {
              val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
              // need to decide if it's a collection or a an object
              // check the streamer info for this class name
              val myStreamer = streamers.applyOrElse(branchElement.getClassName,
                (x: String) => null)
              if (myStreamer!=null) {
                if (myStreamer.getElements.size==1 && 
                  myStreamer.getElements.get(0).asInstanceOf[TStreamerElement].getName=="This"
                ) // this streamerInfo is a singleton with the first element as its type
                {
                  val streamerElement = 
                    myStreamer.getElements.get(0).asInstanceOf[TStreamerElement]
                  streamerElement.getRootClass.getClassName match {
                    case "TStreamerSTL" => { 
                      new UnSplittableCollectionNodeElement(
                        new LeafElement(new LeafElementInfo(leaf.getName,
                          leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
                        new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                          branchElement.getRootClass.getClassName, SRNull,
                          branchElement.getParentName, branchElement.getStreamerType,
                          branchElement.getType, branchElement.getClassName,
                          branchElement.getID),
                        streamerElement.asInstanceOf[TStreamerSTL],
                        null
                      )
                    }
                    case _ => null
                  }
                }
                else // this streamerINfo is for a composite object
                  new UnSplittableObjectNodeElement(
                    new LeafElement(new LeafElementInfo(leaf.getName,
                      leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
                    new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                      branchElement.getRootClass.getClassName, SRNull,
                      branchElement.getParentName, branchElement.getStreamerType,
                      branchElement.getType, branchElement.getClassName,
                      branchElement.getID),
                    for (i <- 0 until myStreamer.getElements.size;
                      s=myStreamer.getElements.get(i).asInstanceOf[TStreamerElement])
                      yield s,
                    null
                  )
              }
              else
                new UnknownTerminalNode(
                  new LeafElement(new LeafElementInfo(leaf.getName,
                    leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
                  new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                    branchElement.getRootClass.getClassName, SRNull,
                    branchElement.getParentName, branchElement.getStreamerType,
                    branchElement.getType, branchElement.getClassName,
                    branchElement.getID),
                  null
                )
            }
            else { // id<0 and there are subs - splittable object
              val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
              new SplittableObjectNodeElement(
                for (i <- 0 until subs.size; sub=subs.get(i).asInstanceOf[TBranchElement])
                  yield synthesizeBranchElement(sub),
//                new LeafElement(new LeafElementInfo(leaf.getName,
//                  leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
                new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                  branchElement.getRootClass.getClassName, SRNull,
                  branchElement.getParentName, branchElement.getStreamerType,
                  branchElement.getType, branchElement.getClassName,
                  branchElement.getID)
              )
            }
          else // simple data member of split object
          {
            val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
            new TerminalSimpleNodeElement(
              new LeafElement(new LeafElementInfo(leaf.getName,
                leaf.getRootClass.getClassName, leaf.getLen, leaf.getType)),
              new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                branchElement.getRootClass.getClassName, SRNull,
                branchElement.getParentName, branchElement.getStreamerType,
                branchElement.getType, branchElement.getClassName,
                branchElement.getID),
              null
            )
          }
        }
        case 1 => // Base Class of a split object
          if (subs.size==0) null // are we inheriting from a simple type?
          else {
            new SplittableObjectNodeElement(
              for (i <- 0 until subs.size; sub=subs.get(i).asInstanceOf[TBranchElement])
                yield synthesizeBranchElement(sub),
              new NodeElementInfo(branchElement.getName, branchElement.getTitle,
                branchElement.getRootClass.getClassName, SRNull,
                branchElement.getParentName, branchElement.getStreamerType,
                branchElement.getType, branchElement.getClassName,
                branchElement.getID)
            )
          }
        case 2 => { // class-typed data member of a split object 
          // TODO: assume that this object is splittable for now
          new SplittableObjectNodeElement(
            for (i <- 0 until subs.size; sub=subs.get(i).asInstanceOf[TBranchElement])
              yield synthesizeBranchElement(sub),
            new NodeElementInfo(branchElement.getName, branchElement.getTitle,
              branchElement.getRootClass.getClassName, SRNull,
              branchElement.getParentName, branchElement.getStreamerType,
              branchElement.getType, branchElement.getClassName,
              branchElement.getID)
          )
        }
        case 4 => {
          val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
          new SplittableCollectionNodeElement(
            for (i <- 0 until subs.size; sub=subs.get(i).asInstanceOf[TBranchElement])
              yield synthesizeBranchElement(sub),
            new LeafElement(new LeafElementInfo(leaf.getName,
              leaf.getRootClass.getClassName, leaf.getLen,
              leaf.getType)),
            new NodeElementInfo(branchElement.getName, branchElement.getTitle,
              branchElement.getRootClass.getClassName, SRNull,
              branchElement.getParentName, branchElement.getStreamerType,
              branchElement.getType, branchElement.getClassName,
              branchElement.getID)
          )
        }
        case 41 => {
          val leaf = branchElement.getLeaves.get(0).asInstanceOf[TLeafElement]
          new TerminalCollectionMemberNodeElement(
            new LeafElement(new LeafElementInfo(leaf.getName,
              leaf.getRootClass.getClassName, leaf.getLen,
              leaf.getType)),
            new NodeElementInfo(branchElement.getName, branchElement.getTitle,
              branchElement.getRootClass.getClassName, SRNull,
              branchElement.getParentName, branchElement.getStreamerType,
              branchElement.getType, branchElement.getClassName,
              branchElement.getID),
            null
          )
        }
        case _ => null
      }
    }

    def synthesize(branch: TBranch): AbstractSchemaTree = 
    {
      if (branch.isInstanceOf[TBranchElement])
        synthesizeBranchElement(branch.asInstanceOf[TBranchElement])
      else
        synthesizeBranch(branch)
    }

    requiredColumns match {
      // for the initialization stage - all the columns to be mapped
      case null => new RootNode(tree.getName,
        for (i <- 0 until tree.getNBranches; b=tree.getBranch(i))
          yield synthesize(b)
      )
      // for the cases like count.... 
      case Array() => new EmptyRootNode(tree.getName, tree.getEntries)
      // for the non-empty list of columns that are required by for a query
      case _ => new RootNode(tree.getName,
        for (i <- 0 until tree.getNBranches; b=tree.getBranch(i) 
          if requiredColumns.contains(b.getName()))
          yield synthesize(b)
      )
    }
  }

  /**
   * @return Spark DataFrame Schema
   */
  def buildSparkSchema(ast: AbstractSchemaTree): StructType =
  {
    def iterate(node: AbstractSchemaTree): StructField = node match {
      case Node(subnodes, info) => StructField(info.name, StructType(
        for (x <- subnodes) yield iterate(x)
      ))
      case TerminalNode(leaf, info, iter) => StructField(info.name,
        info.myType.toSparkType)
      case TerminalMultiLeafNode(leaves, info, iter) => StructField(info.name,
        info.myType.toSparkType
      )
      /*
      case NodeElement(subnodes, info) => StructField(info.name, StructType(
        for (x <- subnodes) yield iterate(x)
      ))
      case TerminalNodeElement(leaf, info, iter) => null
      */
      case _ => null
    }
    
    ast match {
      case RootNode(_, nodes) => StructType(
        for (x <- nodes) yield iterate(x)
      )
      case EmptyRootNode(_, _) => StructType(Seq())
      case _ => null
    }
  }

  /**
   * @return Spark DataFrame 1 Row
   */
  def buildSparkRow(ast: AbstractSchemaTree): Row = 
  {
    def iterate(node: AbstractSchemaTree): Any = node match {
      case Node(subnodes, info) => Row.fromSeq(
        for (x <- subnodes) yield iterate(x)
      )
      case TerminalNode(leaf, info, iter) => iter.next
      case TerminalMultiLeafNode(_, info, iter) => Row.fromSeq(iter.next)

      /*
      case NodeElement(subnodes, info) => Row.fromSeq(
        for (x <- subnodes) yield iterate(x)
      )
      case TerminalNodeElement(leaf, info, iter) => iter.next
      */
      case _ => null
    }
    
    ast match {
      case RootNode(_, nodes) => Row.fromSeq(
        for (x <- nodes) yield iterate(x)
      )
      case EmptyRootNode(_, _) => {ast.asInstanceOf[EmptyRootNode].entries-=1; Row();}
    }
  }

  /**
   * @return - void
   * prints the Tree
   */
  def printAST(ast: AbstractSchemaTree): Unit = 
  {
    def __print__(node: AbstractSchemaTree, level: Int): Unit = node match 
    {
      case RootNode(name, nodes) => {
        println(name)
        for (x <- nodes) __print__(x, level+2)
      }
      case EmptyRootNode(name, entries) => println(name + " Entries=" + entries)
      case Node(subnodes, info) => {
        println(("  "*level) + info)
        for (x <- subnodes) __print__(x, level+2)
      }
      case TerminalNode(leaf, info, iter) => 
        println(("  "*level) + info + " ---> " + leaf.info)
      case TerminalMultiLeafNode(leaves, info, iter) => {
        println(("  "*level) + info + " ---> " + leaves.map(_.info.toString).mkString(" :: "))
      }
      // Splittable Collection Node - 1 level of nestedness
      case SplittableCollectionNodeElement(subnodes, leaf, info) => {
        println(("  "*level) + "Splittable Collection :: "+ info + " ---> " + leaf.info)
        for (sub <- subnodes) __print__(sub, level+2)
      }
      case TerminalCollectionMemberNodeElement(leaf, info, _) => {
        println(("  "*level)+ "Collection Member :: " + info + " ---> " + leaf.info)
      }
      case SplittableObjectNodeElement(subnodes, info) => {
        println(("  "*level) + "Splittable Object :: " + info + " ---> no leaf")
        for (sub <- subnodes) __print__(sub, level+2)
      }
      case UnSplittableCollectionNodeElement(leaf, info, streamer, iter) => {
        println(("  "*level) + "UnSplittable Collection :: " + 
          info + " ---> " + leaf.info)
      }
      case UnSplittableObjectNodeElement(leaf, info, streamers, iter) => {
        println(("  "*level) + "UnSplittable Object :: " + info + " ---> " + leaf.info)
      }
      case TerminalSimpleNodeElement(leaf, info, _) => 
        println(("  "*level) + "Terminal Simple :: "+ info + " ---> " + leaf.info)
      case TerminalObjectNodeElement(leaf, info, _) => 
        println(("  "*level) + "Terminal Object :: " + info + " ---> " + leaf.info)

      case UnknownNode(subnodes, leaf, info) => {
        println(("  "*level) + info + " ---> " + leaf.info)
        for (sub <- subnodes) __print__(sub, level+2)
      }
      case UnknownTerminalNode(leaf, info, iter) => 
        println(("  "*level) + info + " ---> " + leaf.info)
      case _ => println(null)
    }

    __print__(ast, 0)
  }

  def containsNext(ast: AbstractSchemaTree): Boolean = ast match {
    case RootNode(name, nodes) => containsNext(nodes.head)
    case EmptyRootNode(name, entries) => entries>0
    case Node(subnodes, info) => containsNext(subnodes head)
    case TerminalNode(leaf, info, iter) => iter.hasNext
    case TerminalMultiLeafNode(_, _, iter) => iter.hasNext

    /*
    case NodeElement(subnodes, info) => containsNext(subnodes head)
    case TerminalNodeElement(leaf, info, iter) => false
    */
    
    case _ => false
  }

  /*
   * Section for some utils
   */
  def findTree(dir: TDirectory): TTree = // find the Tree
  {
    for (i <- 0 until dir.nKeys) {
      val obj = dir.getKey(i).getObject.asInstanceOf[AbstractRootObject]
      if (obj.getRootClass.getClassName == "TDirectory" ||
        obj.getRootClass.getClassName == "TTree") 
      {
        if (obj.getRootClass.getClassName == "TDirectory")
          return findTree(obj.asInstanceOf[TDirectory])
        else (obj.getRootClass.getClassName == "TTree")
        return obj.asInstanceOf[TTree]
      }
    }
    null
  }


  def arrangeStreamers(reader: RootFileReader): Map[String, TStreamerInfo] = 
  {
    val streamers = reader.streamerInfo
    (for (i <- 0 until streamers.size; s=streamers.get(i) 
      if s.isInstanceOf[TStreamerInfo]; streamer=s.asInstanceOf[TStreamerInfo])
      yield (streamer.getName, streamer)
    ).toMap
  }

  val customStreamers: Map[String, core.SRType] = Map(
    "trigger::TriggerObjectType" -> core.SRInt("", null, null),
    "reco::Muon::MuonTrackType" -> core.SRInt("", null, null),
    "pat::IsolationKeys" -> core.SRInt("", null, null),
    "reco::IsoDeposit" -> core.SRInt("", null, null)
  )
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy