All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fs2.data.pfsa.TreeQueryPipe.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2024 fs2-data Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs2
package data
package pfsa

import cats.effect.Concurrent
import cats.effect.std.Queue
import cats.syntax.all._
import cats.data.NonEmptyList

/** A pipe that allows abstract implementation of recursive queries on tree like structures.
  *
  * The pipe assumes a structure that has open and end tokens to describe the tree structure.
  *
  * A context might be provided to create the element to match against an open.
  *
  * It is appropriated to implement query languages such as XPath or JsonPath.
  */
private[data] abstract class TreeQueryPipe[F[_]: Concurrent, T, O <: T, Matcher, Matchable](
    dfa: PDFA[Matcher, Matchable]) {

  /** Whether to emit open and close tags on new match. */
  val emitOpenAndClose: Boolean = true

  /** Creates the element to match given an opening token and the current context. */
  def makeMatchingElement(open: O): Matchable

  /** Specializes the token type to the opening ones. */
  def isOpen(tok: T): Option[O]

  /** Specializes the token type to the closing ones. */
  def isClose(tok: T): Boolean

  private object Open {
    def unapply(tok: T) = isOpen(tok)
  }

  private object Close {
    def unapply(tok: T) = isClose(tok)
  }

  private def go(chunk: Chunk[T],
                 idx: Int,
                 rest: Stream[F, T],
                 maxMatch: Int,
                 maxNest: Int,
                 depth: Int,
                 queues: List[(Int, Queue[F, Option[T]])],
                 resetting: Boolean,
                 q: NonEmptyList[(Int, Boolean)]): Pull[F, Stream[F, T], Unit] =
    if (idx >= chunk.size) {
      rest.pull.uncons.flatMap {
        case Some((hd, tl)) => go(hd, 0, tl, maxMatch, maxNest, depth, queues, resetting, q)
        case None           => Pull.done
      }
    } else {
      chunk(idx) match {
        case tok @ Close() =>
          // upon reading a closing tag, close every sub stream that matches this depth
          // return to the state corresponding to the previous depth, and reset to the
          // previous resetting state
          val (currentDepth, ancestors) = queues.span(_._1 == depth - 1)
          Pull.eval((if (emitOpenAndClose) queues else ancestors).traverse_(_._2.offer(tok.some))) >> Pull.eval(
            currentDepth.traverse_(_._2.offer(none))) >> go(
            chunk,
            idx + 1,
            rest,
            maxMatch,
            maxNest + currentDepth.size,
            depth - 1,
            ancestors,
            q.tail.headOption.fold(false)(_._2),
            NonEmptyList.fromList(q.tail).getOrElse(NonEmptyList.one((dfa.init, false)))
          )
        case Open(tok) =>
          // on an opening token, check if we can transition from it in the current state
          dfa.step(q.head._1, makeMatchingElement(tok)) match {
            case Some(q1) =>
              // there is a transition, which means this is a match
              // however, if we are currently resetting (i.e. an ancestor was a mismatch)
              // this is not a new match, and we just forward the tokens to the currently
              // open down streams but we do not create a new one
              val updateQueues =
                if (!resetting && maxMatch > 0 && maxNest >= 0 && dfa.finals.contains(q1)) {
                  // this is a new match, spawn a new down stream
                  Pull.eval(Queue.unbounded[F, Option[T]]).flatMap { queue =>
                    Pull
                      .output1(Stream.fromQueueNoneTerminated(queue, 1))
                      .as(((depth, queue) :: queues, maxMatch - 1, maxNest - 1))
                  }
                } else {
                  Pull.pure((queues, maxMatch, maxNest))
                }
              // in the end, push the new state corresponding to this depth, together with the current resetting state
              updateQueues
                .evalMap { case (queues, maxMatch, maxNest) =>
                  (if (emitOpenAndClose) queues else queues.dropWhile(_._1 == depth))
                    .traverse_(_._2.offer(tok.some))
                    .as((queues, maxMatch, maxNest))
                }
                .flatMap { case (queues, maxMatch, maxNest) =>
                  go(chunk, idx + 1, rest, maxMatch, maxNest, depth + 1, queues, resetting, (q1, resetting) :: q)
                }
            case None =>
              // the opening token is a mismatch, no transition exists for it
              // enter in resetting mode for descendants
              Pull.eval(queues.traverse_(_._2.offer(tok.some))) >>
                go(chunk, idx + 1, rest, maxMatch, maxNest, depth + 1, queues, true, (q.head._1, resetting) :: q)
          }
        case tok =>
          // internal tokens are just forwarded to current match down streams
          Pull.eval(queues.traverse_(_._2.offer(tok.some))) >>
            go(chunk, idx + 1, rest, maxMatch, maxNest, depth, queues, resetting, q)
      }
    }

  final def raw(maxMatch: Int, maxNest: Int)(s: Stream[F, T]): Stream[F, Stream[F, T]] =
    go(Chunk.empty, 0, s, maxMatch, maxNest, 0, Nil, false, NonEmptyList.one((dfa.init, false))).stream

  final def first(s: Stream[F, T]): Stream[F, T] =
    raw(1, 0)(s).parJoinUnbounded

  final def topmost(s: Stream[F, T]): Stream[F, T] =
    raw(Int.MaxValue, 0)(s).parJoinUnbounded

  final def aggregate[U](s: Stream[F, T],
                         f: Stream[F, T] => F[U],
                         deterministic: Boolean,
                         maxMatch: Int,
                         maxNest: Int) =
    if (deterministic)
      s.through(raw(maxMatch, maxNest)).parEvalMapUnbounded(f)
    else
      s.through(raw(maxMatch, maxNest)).parEvalMapUnordered(Int.MaxValue)(f)

}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy