All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fs2.data.xml.xpath.package.scala Maven / Gradle / Ivy

/*
 * Copyright 2024 fs2-data Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs2
package data
package xml

import dom.ElementBuilder
import xpath.internals._
import pfsa.{PDFA, PNFA, Pred}
import Pred.syntax._

import cats.effect.Concurrent
import cats.syntax.all._
import scala.annotation.nowarn

package object xpath {

  /** Namespace containing the various XPath filtering pipes. */
  def filter[F[_]]: PartiallyAppliedFilter[F] = new PartiallyAppliedFilter(true)

  /** Namespace containing the various XPath filtering pipes. */
  @nowarn
  final class PartiallyAppliedFilter[F[_]] private[xpath] (val dummy: Boolean) extends AnyVal {

    /** Selects all macthing elements in the input stream. Each matching element is emitted in a new stream.
      * Matching is performed in a streaming fashion, and events are emitted as early as possible.
      * The match streams are emitted in the same order they are encountered in the input stream, i.e.
      * in the order of the opening tags matching the query.
      *
      * The `maxMatch` parameter controls how many matches are to be emitted at most.
      * Further matches won't be emitted if any.
      *
      * The `maxNest` parameter controls the maximum level of match nesting to be emitted.
      * E.g., if you want to emit only the top most matches, set it to `0`.
      *
      * '''Warning''': make sure you actually consume all the emitted streams otherwise
      * this can lead to memory problems.
      */
    def raw(path: XPath, maxMatch: Int = Int.MaxValue, maxNest: Int = Int.MaxValue)(implicit
        F: Concurrent[F]): Pipe[F, XmlEvent, Stream[F, XmlEvent]] =
      new XmlQueryPipe(compileXPath(path)).raw(maxMatch, maxNest)(_)

    /** Selects the first match only. First is meant as in: opening tag appears first in the input, no matter the depth.
      * Tokens of the first match are emitted as they are read from the input.
      *
      * Other results are gently discarded.
      */
    def first(path: XPath)(implicit F: Concurrent[F]): Pipe[F, XmlEvent, XmlEvent] =
      new XmlQueryPipe(compileXPath(path)).first(_)

    /** Selects all matching elements in the input stream, and builds an element DOM.
      *
      * If `deterministic` is set to `true` (default value), elements are emitted in the order they
      * appeat in the input stream, i.e. first opening tag first.
      * If `deterministic` is set to false, built elements are emitted as soon
      * as possible (i.e. when the value is entirely built).
      *
      * The `maxMatch` parameter controls how many matches are to be emitted at most.
      * Further matches won't be emitted if any.
      *
      * The `maxNest` parameter controls the maximum level of match nesting to be emitted.
      * E.g., if you want to emit only the top most matches, set it to `0`.
      *
      */
    def dom[T](path: XPath, deterministic: Boolean = true, maxMatch: Int = Int.MaxValue, maxNest: Int = Int.MaxValue)(
        implicit
        F: Concurrent[F],
        builder: ElementBuilder.Aux[T]): Pipe[F, XmlEvent, T] =
      new XmlQueryPipe(compileXPath(path))
        .aggregate(_, _.through(xml.dom.elements).compile.toList, deterministic, maxMatch, maxNest)
        .flatMap(Stream.emits(_))

    /** Selects all matching elements in the input stream, and applies the [[fs2.Collector]] to it.
      *
      * If `deterministic` is set to `true` (default value), elements are emitted in the order they
      * appeat in the input stream, i.e. first opening tag first.
      * If `deterministic` is set to false, built elements are emitted as soon
      * as possible (i.e. when the value is entirely built).
      *
      * The `maxMatch` parameter controls how many matches are to be emitted at most.
      * Further matches won't be emitted if any.
      *
      * The `maxNest` parameter controls the maximum level of match nesting to be emitted.
      * E.g., if you want to emit only the top most matches, set it to `0`.
      *
      */
    def collect[T](path: XPath,
                   collector: Collector.Aux[XmlEvent, T],
                   deterministic: Boolean = true,
                   maxMatch: Int = Int.MaxValue,
                   maxNest: Int = Int.MaxValue)(implicit F: Concurrent[F]): Pipe[F, XmlEvent, T] =
      new XmlQueryPipe(compileXPath(path)).aggregate(_, _.compile.to(collector), deterministic, maxMatch, maxNest)

  }

  private[data] def compileXPath(path: XPath): PDFA[LocationMatch, StartElement] = {
    def makePredicate(p: Predicate): LocationMatch =
      p match {
        case Predicate.True             => LocationMatch.True
        case Predicate.False            => LocationMatch.False
        case Predicate.Exists(attr)     => LocationMatch.AttrExists(attr)
        case Predicate.Eq(attr, value)  => LocationMatch.AttrEq(attr, value)
        case Predicate.Neq(attr, value) => LocationMatch.AttrNeq(attr, value)
        case Predicate.And(left, right) => makePredicate(left) && makePredicate(right)
        case Predicate.Or(left, right)  => makePredicate(left) || makePredicate(right)
        case Predicate.Not(inner)       => !makePredicate(inner)
      }

    def makeLocation(l: Location): LocationMatch =
      l match {
        case Location(_, n, p) =>
          val node: LocationMatch =
            n match {
              case Node(None, None) => LocationMatch.True
              case _                => LocationMatch.Element(n)
            }
          node && p.map(makePredicate(_)).getOrElse(LocationMatch.True)
      }

    val (transitions, fs) =
      path.locations.foldLeft((Map.empty[Int, List[(Option[LocationMatch], Int)]], Set.empty[Int])) {
        case ((trans, fs), ors) =>
          val (q1, trans1) =
            ors.foldLeft((0, trans)) { case ((q, trans), l @ Location(axis, _, _)) =>
              axis match {
                case Axis.Child => (q + 1, trans.combine(Map((q -> List((Some(makeLocation(l)), q + 1))))))
                case Axis.Descendant =>
                  (q + 1, trans.combine(Map(q -> List((Some(makeLocation(l)), q + 1), (Some(LocationMatch.True), q)))))
              }
            }
          (trans1, fs + q1)

      }
    new PNFA(0, fs, transitions).determinize
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy