All Downloads are FREE. Search and download functionalities are using the official Maven repository.

scales.xml.parser.pull.PullIteratees.scala Maven / Gradle / Ivy

The newest version!
package scales.xml.parser.pull

import scales.utils._
import scales.xml.{Elem, EndElem, PullType, QName, ScalesXml, XmlBuilder, XmlItem, XmlPath, addAndFocus, addChild, noXmlPath, impl => ximpl}
import collection.FlatMapIterator
import scalaz.Free.Trampoline
import scalaz.Id.Id
import scalaz.Scalaz.ToEqualOps
import scalaz.effect.IO
import scalaz.{Equal, Monad}
import scalaz.iteratee.Input
import scalaz.iteratee.Input.{Empty, Eof}
import scalaz.iteratee.Iteratee.iterateeT
import scalaz.iteratee.{IterateeT, StepT}
import scalaz.iteratee.StepT.{Cont, Done}

import scales.utils.iteratee.functions._

class PullIterateeFunctions[F[_]](val F: Monad[F]){
  import scales.xml.{QNamesMatch, PeekMatch}

  def onQNames(qnames: List[QName])(implicit F: Monad[F]): ResumableIter[PullType, F, QNamesMatch] =
    PullIteratees.onQNames[F](qnames)

  /**
   * Collects all data belonging to an element that matches
   * the list.  content  content 
   * as a path (each parent node containing only one child node).
   */
  def onQNamesI(qnames: List[QName])(implicit qe: Equal[QName], F: Monad[F]): ResumableIter[PullType, F, QNamesMatch] =
    PullIteratees.onQNamesI[F](qnames)

  def skipv(downTo: Int*)(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] =
    PullIteratees.skipv[F](downTo: _*)

  /**
   * Skips all events until the indexes match downTo, can be seen as
   * \*\*[b]\*[c] skipping until c with skip(List(b,c)).
   * This can be used, for example, to identify qnames within a message and combined with capture to allow replaying.
   * Identifying a soap doc-lit request would be skip(List(2,1)).
   * It returns the XmlPath to the skipped position, for soap /Envelope/Body/Request but does not collect the contents of that node.
   * An empty list will simply return the first Element found.
   */
  def skip(downTo: => List[Int])(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] =
    PullIteratees.skip[F](downTo)

}

/**
 * Iteratees related to pull parsing
 */
trait PullIteratees {

  def pullIterateesOf[F[_]](implicit F: Monad[F]): PullIterateeFunctions[F] = new PullIterateeFunctions[F](F)

  implicit val ioPullIteratees = pullIterateesOf[IO]
  implicit val trampolinePullIteratees = pullIterateesOf[Trampoline]
  // not recommended but may help migrations
  implicit val idPullIteratees = pullIterateesOf[Id]

  type PeekMatch = PullIteratees.PeekMatch

  type QNamesMatch = PullIteratees.QNamesMatch

  /**
   * Wraps XmlPull
   */
  def iterate(path: List[QName], xml: XmlPull)(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] = iterate(path, xml.it)

  /**
   * A wrapping around withIter(onDone(List(onQNames(path))))(enumXml(xml, _))
   * it unwraps the data providing an Iterator[XPath]
   */
  def iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QName]): FlatMapIterator[XmlPath] =
    new Iterate(path, xml)

}

object PullIteratees {

  type QNamesMatch = (List[QName], Option[XmlPath])
  /**
   * Collects all data belonging to an element that matches
   * the list.  content  content 
   * as a path (each parent node containing only one child node).
   */
  def onQNames[F[_]: Monad](qnames: List[QName]): ResumableIter[PullType, F, QNamesMatch] = onQNamesI(qnames)(ScalesXml.qnameEqual, implicitly[Monad[F]])

  /**
   * Collects all data belonging to an element that matches
   * the list.  content  content 
   * as a path (each parent node containing only one child node).
   *
   * This version of onQNamesI allows a custom QName Equal to be passed.
   */
  def onQNamesI[F[_]](qnames: List[QName])(implicit qe: Equal[QName], F: Monad[F]): ResumableIter[PullType, F, QNamesMatch] = {

    /*
     * The pairs allow the depth of each element to be followed.  In particular this stops both descent and ascent problems in the
     * pushing and popping on the stack.  I.e. it covers the case where you have nested repeating QNames, both when you are looking for them
     * and when your are not.  Don't pop too early and don't incorrectly force a done.

    lazy val starter = Cont(step(Nil, (qnames.head, 0), qnames.tail.map((_, 0)), noXmlPath, false))

    def step(before: List[(QName, Int)], focus: (QName, Int), toGo: List[(QName, Int)], path: XmlPath, collecting: Boolean)(s: Input[PullType]): ResumableIter[PullType, F, QNamesMatch] = {
      iterateeT( Monad[F].point(
        s(el = {
            case Left(elem@Elem(q, a, n)) => {
              val nfocus =
                if (q === focus._1)
                  (focus._1, focus._2 + 1)
                else
                  focus

              val npath = addAndFocus(path, elem)

              val shouldCollect = collecting || (toGo.isEmpty && q === focus._1)

              Cont(
                // is it our head?
                if ((!toGo.isEmpty) && q === focus._1)
                  // move down
                  step(before :+ focus, toGo.head, toGo.tail, npath, false)
                else
                  // wait for a down
                  step(before, nfocus, toGo, npath, shouldCollect))
            }

            case Left(x: XmlItem) =>
              if (collecting) // collect
                Cont(step(before, focus, toGo, addChild(path, x), true))
              else
                Cont(step(before, focus, toGo, path, false)) // don't collect

            case Right(EndElem(q, n)) =>

              if (q === focus._1) {
                val ncfocus = (focus._1, focus._2 - 1)

                if (toGo.isEmpty && ncfocus._2 == 0) // we are popping to the selected level
                  Done(((qnames, Some(path)),
                    iterateeT( Monad[F].point( Cont(step(before, ncfocus, toGo,
                      // remove all children on the next iteration
                      path.removeAndUp.getOrElse(noXmlPath), false))))), Empty[PullType])
                else {
                  if (before.isEmpty)
                    starter // only when the root is asked for, could just refuse that of course?
                  else {
                    if (collecting)
                      // we are collecting but we still have more than 0 repeated qnames deep
                      Cont(step(before, ncfocus, toGo, path.zipUp, true))
                    else {
                      // we aren't collecting but we are moving up, we just have repeated names
                      val nfocus = before.last
                      val nbefore = before.dropRight(1)
                      Cont(step(nbefore, nfocus, focus :: toGo,
                        path.removeAndUp.getOrElse(noXmlPath), false // we have NOT been collecting
                        ))
                    }
                  }
                }
              } else {
                Cont(step(before, focus, toGo,
                  if (collecting) // empty is not enough, it should also be definitely collecting
                    path.zipUp
                  else
                    path.removeAndUp.getOrElse(noXmlPath), collecting))
              }

          },
          empty = Cont(step(before, focus, toGo, path, false)),
          eof = Done(((qnames, None), iterateeT( Monad[F].point(starter) )), Eof[PullType])
        )
      ))
    }
     */

    lazy val starter = Cont(step(Nil, noXmlPath, false))

    def step(before: List[QName], path: XmlPath, collecting: Boolean)(s: Input[PullType]): ResumableIter[PullType, F, QNamesMatch] = {
      iterateeT( Monad[F].point(
        s(el = {
          case Left(elem@Elem(q, a, n)) => {
            val npath = addAndFocus(path, elem)

            val shouldCollect = collecting || (
              before.size == (qnames.size - 1) &&
              // eval only when needed
              (before :+ q).zip(qnames).forall(p => p._1 === p._2)
            )

            Cont( step(before :+ q, npath, shouldCollect) )
          }

          case Left(x: XmlItem) =>
            Cont(step(before,
              if (collecting)
                addChild(path, x)
              else
                path, collecting))

          case Right(EndElem(q, n)) =>
            // is this the end path ?  We have to re-verify the path, do so only when needed but stops same height (but not same path)
            // and nested repeat issues
            val haveCollected = collecting && before.size == qnames.size && before.zip(qnames).forall(p => p._1 === p._2)
            val npath =
              if (haveCollected) {
                // done with path
                path
              } else
                if (collecting) {
                  // we are popping to the selected level
                  path.zipUp()
                } else {
                  path.removeAndUp().getOrElse(noXmlPath)
                }

            if (haveCollected)
              Done(((qnames, Some(npath)),
                iterateeT( Monad[F].point( Cont(step(before.dropRight(1),
                  // remove all children on the next iteration
                  npath.removeAndUp.getOrElse(noXmlPath), collecting && !haveCollected))))), Empty[PullType])
            else
              Cont(step(before.dropRight(1), npath, collecting && !haveCollected))

          },
          empty = Cont(step(before, path, false)),
          eof = Done(((qnames, None), iterateeT( Monad[F].point(starter) )), Eof[PullType])
        )
      ))
    }

    if (qnames.isEmpty) error("Qnames is empty")

    iterateeT( Monad[F].point(starter ))
  }

  type PeekMatch = Option[XmlPath]

  def skipv[F[_]](downTo: Int*)(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] = skip[F](List(downTo: _*))

  /**
   * Skips all events until the indexes match downTo, can be seen as
   * \*\*[b]\*[c] skipping until c with skip(List(b,c)).
   * This can be used, for example, to identify qnames within a message and combined with capture to allow replaying.
   * Identifying a soap doc-lit request would be skip(List(2,1)).
   * It returns the XmlPath to the skipped position, for soap /Envelope/Body/Request but does not collect the contents of that node.
   * An empty list will simply return the first Element found.
   */
  def skip[F[_]](downTo: => List[Int])(implicit F: Monad[F]): IterateeT[PullType, F, PeekMatch] = {

    lazy val dEof: StepT[PullType, F, PeekMatch] = Done(None, Eof[PullType])

    def step(before: List[Int], pos: List[Int], toGo: List[Int], path: XmlPath)(s: Input[PullType]): IterateeT[PullType, F, PeekMatch] =
      iterateeT(F.point(
        s(el = {

            case Left(elem@Elem(q, a, n)) => {
              lazy val npath = addAndFocus(path, elem)
              val npos = pos.head + 1 :: pos.tail
              val could = toGo.head == npos.head
              //println("pos "+pos+ " npos "+npos+" before "+before+" toGo "+toGo)
              if (pos.size == (before.size + 1)) // correct level
                if (toGo.size == 1 && could)
                  Done(Some(npath), Empty[PullType])
                else if (npos.head > toGo.head)
                  dEof
                else if (could)
                  // pop and move down
                  Cont(step(before :+ toGo.head, 0 :: npos, toGo.tail, npath))
                else
                  Cont(step(before, 0 :: npos, toGo, npath))
              else
                Cont(step(before, 0 :: npos, toGo, npath))

            }

            // just return this again
            case Left(x: XmlItem) =>
              Cont(step(before, pos, toGo, path))

            // pop up no collecting, loose the head as we are moving up again
            case Right(EndElem(q, n)) =>
              // get or else end doc elem
              if (pos.size > 0 && pos.size == before.size + 1)
                // we have moved down in toGo
                Cont(step(before.dropRight(1), pos.tail, before.last :: toGo, path.removeAndUp().getOrElse(noXmlPath)))
              else
                Cont(step(before, pos.tail, toGo, path.removeAndUp().getOrElse(noXmlPath)))

          },
          empty = Cont(step(before, pos, toGo, path)),
          eof = dEof //Done((downTo, None),IterV.EOF[PullType])
          )
      ))

    iterateeT( F.point( Cont(step(List[Int](), List(0), 1 :: downTo, noXmlPath)) ) )
  }

}

/**
 * Iterates over a path of QNames producing XPaths for a given Iterator[PullType]
 */
class Iterate(path: List[QName], xml: Iterator[PullType])(implicit qe: Equal[QName]) extends FlatMapIterator[XmlPath] {
  import ScalesXml.{qnameEqual => _, _}
  import ScalesUtils._
  import ximpl.TreeProxies
  val qnames = path

  if (qnames.isEmpty) error("QNames is empty")

  /* see onQName for implementation basis */

  var before: List[QName] = _
  var proxies: TreeProxies = new TreeProxies()
  var collecting: Boolean = _

  def reset {
    set(Nil, proxies.reuse, false)
  }

  reset

  def set(before: List[QName], proxies: TreeProxies, collecting: Boolean) {
    this.before = before
    this.proxies = proxies
    this.collecting = collecting
  }

  def getNext = step
  var cur = getNext

  def hasNext = cur ne null
  def next = {
    val t = cur
    cur = getNext
    t
  }

  def step : XmlPath = {
    var res : XmlPath = null.asInstanceOf[XmlPath]
    while(xml.hasNext && res == null) {
      val e = xml.next
      e match {

        case Left(elem@Elem(q, a, n)) =>
          proxies.beginSub(elem, XmlBuilder())

          val shouldCollect = collecting || (
            before.size == (qnames.size - 1) &&
            // eval only when needed
            (before :+ q).zip(qnames.dropRight(1)).forall(p => p._1 === p._2)
          )
          set(before :+ q, proxies, shouldCollect)

        case Left(x: XmlItem) =>
          if (collecting) {
            proxies.addChild(x)
          }
          set(before, proxies, collecting)

        case Right(EndElem(q, n)) =>

          // is this the end path ?  We have to re-verify the path, do so only when needed but stops same height (but not same path)
          // and nested repeat issues
          val haveCollected = collecting && before.size == qnames.size && before.zip(qnames).forall(p => p._1 === p._2)
          if (haveCollected) {
            res = proxies.proxyPath
          } else
            if (collecting) {
              // we are popping to the selected level
              proxies.elementEnd()
            } else {
              proxies.proxyRemoveAndUp()
            }

          set(before.dropRight(1), proxies, collecting && !haveCollected)
      }
    }

    res
  }



  /*
   val orig = withIter(xml)(onQNames(path))
   def getNext = {
   if (orig.hasNext) {
   val t = orig.next
   if (t._2.isDefined)
   (true, t._2)
   else (false, None)
   } else (false, None)
   }
   var cur = getNext
   def hasNext = cur._1 && cur._2.isDefined
   def next = {
   val t = cur._2
   cur = getNext
   t.get
   }
   */
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy