All Downloads are FREE. Search and download functionalities are using the official Maven repository.

fs2.data.xml.dom.TreeParser.scala Maven / Gradle / Ivy

/*
 * Copyright 2024 fs2-data Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package fs2
package data
package xml
package dom

import cats.syntax.all._

import scala.collection.mutable.ListBuffer

class XmlTreeException(msg: String) extends Exception(msg)

class TreeParser[F[_], Node](implicit F: RaiseThrowable[F]) {

  // Only for bincompat
  private var legacyBuilder: DocumentBuilder[Node] = _

  private[dom] def this(F: RaiseThrowable[F], builder: DocumentBuilder[Node]) = {
    this()(F)
    legacyBuilder = builder
  }

  private def next(chunk: Chunk[XmlEvent],
                   idx: Int,
                   rest: Stream[F, XmlEvent]): Pull[F, Nothing, (XmlEvent, Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    peek(chunk, idx, rest).map { case (evt, chunk, idx, rest) => (evt, chunk, idx + 1, rest) }

  private def peek(chunk: Chunk[XmlEvent],
                   idx: Int,
                   rest: Stream[F, XmlEvent]): Pull[F, Nothing, (XmlEvent, Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    if (idx >= chunk.size) {
      rest.pull.uncons.flatMap {
        case Some((hd, tl)) => peek(hd, 0, tl)
        case None           => Pull.raiseError(new XmlTreeException("unexpected end of input"))
      }
    } else {
      Pull.pure((chunk(idx), chunk, idx, rest))
    }

  private def expect(evt: XmlEvent,
                     chunk: Chunk[XmlEvent],
                     idx: Int,
                     rest: Stream[F, XmlEvent]): Pull[F, Nothing, (Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    next(chunk, idx, rest).flatMap {
      case (`evt`, chunk, idx, rest) => Pull.pure((chunk, idx, rest))
      case (evt, _, _, _)            => Pull.raiseError(new XmlTreeException(s"unepexted event '$evt'"))
    }

  private def prolog(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent])(implicit
      builder: DocumentBuilder[Node]): Pull[F,
                                            Nothing,
                                            (Option[XmlEvent.XmlDecl],
                                             Option[XmlEvent.XmlDoctype],
                                             List[builder.Misc],
                                             Chunk[XmlEvent],
                                             Int,
                                             Stream[F, XmlEvent])] =
    peek(chunk, idx, rest)
      .map {
        case (decl @ XmlEvent.XmlDecl(_, _, _), chunk, idx, rest) =>
          (Some(decl), chunk, idx + 1, rest)
        case (_, chunk, idx, rest) =>
          (None, chunk, idx, rest)
      }
      .flatMap { case (decl, chunk, idx, rest) =>
        (chunk, idx, rest, none[XmlEvent.XmlDoctype], new ListBuffer[builder.Misc]).tailRecM {
          case (chunk, idx, rest, doctype, misc) =>
            peek(chunk, idx, rest).flatMap {
              case (dt @ XmlEvent.XmlDoctype(_, _, _), chunk, idx, rest) =>
                doctype match {
                  case Some(_) => Pull.raiseError(new XmlTreeException("duplicate doctype"))
                  case None    => Pull.pure((chunk, idx + 1, rest, Some(dt), misc).asLeft)
                }
              case (XmlEvent.Comment(comment), chunk, idx, rest) =>
                Pull.pure((chunk, idx + 1, rest, doctype, misc ++= builder.makeComment(comment)).asLeft)
              case (XmlEvent.XmlPI(target, content), chunk, idx, rest) =>
                Pull.pure((chunk, idx + 1, rest, doctype, misc += builder.makePI(target, content)).asLeft)
              case (_, chunk, idx, rest) =>
                Pull.pure((decl, doctype, misc.result(), chunk, idx, rest).asRight)
            }
        }
      }

  private def element(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent])(implicit
      builder: ElementBuilder): Pull[F, Nothing, (builder.Elem, Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    next(chunk, idx, rest).flatMap {
      case (XmlEvent.StartTag(name, attrs, isEmpty), chunk, idx, rest) =>
        (chunk, idx, rest, new ListBuffer[builder.Content]).tailRecM { case (chunk, idx, rest, children) =>
          peek(chunk, idx, rest).flatMap {
            case (XmlEvent.EndTag(`name`), chunk, idx, rest) =>
              Pull.pure((builder.makeElement(name, attrs, isEmpty, children.result()), chunk, idx + 1, rest).asRight)
            case (XmlEvent.EndTag(name), _, _, _) =>
              Pull.raiseError(new XmlTreeException(s"unexpected closing tag '$name'"))
            case (XmlEvent.StartTag(_, _, _), chunk, idx, rest) =>
              element(chunk, idx, rest).map { case (node, chunk, idx, rest) =>
                (chunk, idx, rest, children += node).asLeft
              }
            case (texty: XmlEvent.XmlTexty, chunk, idx, rest) =>
              Pull.pure((chunk, idx + 1, rest, children += builder.makeText(texty)).asLeft)
            case (XmlEvent.Comment(comment), chunk, idx, rest) =>
              Pull.pure((chunk, idx + 1, rest, children ++= builder.makeComment(comment)).asLeft)
            case (XmlEvent.XmlPI(target, content), chunk, idx, rest) =>
              Pull.pure((chunk, idx + 1, rest, children += builder.makePI(target, content)).asLeft)
            case (evt, _, _, _) =>
              Pull.raiseError(new XmlTreeException(s"unexpected event '$evt'"))
          }
        }
      case (evt, _, _, _) =>
        Pull.raiseError(new XmlTreeException(s"unexpected event '$evt'"))
    }

  private def postlog(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent])(implicit
      builder: DocumentBuilder[Node])
      : Pull[F, Nothing, (List[builder.Misc], Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    (chunk, idx, rest, new ListBuffer[builder.Misc]).tailRecM { case (chunk, idx, rest, misc) =>
      peek(chunk, idx, rest).flatMap {
        case (XmlEvent.Comment(comment), chunk, idx, rest) =>
          Pull.pure((chunk, idx + 1, rest, misc ++= builder.makeComment(comment)).asLeft)
        case (XmlEvent.XmlPI(target, content), chunk, idx, rest) =>
          Pull.pure((chunk, idx + 1, rest, misc += builder.makePI(target, content)).asLeft)
        case (_, chunk, idx, rest) =>
          Pull.pure((misc.result(), chunk, idx, rest).asRight)
      }
    }

  private def document(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent])(implicit
      builder: DocumentBuilder[Node]): Pull[F, Node, (Chunk[XmlEvent], Int, Stream[F, XmlEvent])] =
    next(chunk, idx, rest).flatMap {
      case (XmlEvent.StartDocument, chunk, idx, rest) =>
        prolog(chunk, idx, rest).flatMap { case (decl, doctype, prolog, chunk, idx, rest) =>
          element(chunk, idx, rest).flatMap { case (node, chunk, idx, rest) =>
            postlog(chunk, idx, rest).flatMap { case (postlog, chunk, idx, rest) =>
              expect(XmlEvent.EndDocument, chunk, idx, rest).flatMap { case (chunk, idx, rest) =>
                Pull
                  .output1(
                    builder.makeDocument(decl.map(_.version),
                                         decl.flatMap(_.encoding),
                                         decl.flatMap(_.standalone),
                                         doctype,
                                         prolog,
                                         node,
                                         postlog))
                  .as((chunk, idx, rest))
              }
            }
          }
        }
      case (evt, _, _, _) => Pull.raiseError(new XmlTreeException(s"unexpected event '$evt'"))
    }

  @deprecated("only retained for bincompat", "1.6.0")
  private[dom] def pipe: Pipe[F, XmlEvent, Node] = pipe(legacyBuilder)

  def pipe(implicit builder: DocumentBuilder[Node]): Pipe[F, XmlEvent, Node] = {
    def go(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent]): Pull[F, Node, Unit] =
      if (idx >= chunk.size) {
        rest.pull.uncons.flatMap {
          case Some((hd, tl)) => go(hd, 0, tl)
          case None           => Pull.done
        }
      } else {
        document(chunk, idx, rest).flatMap { case (chunk, idx, rest) => go(chunk, idx, rest) }
      }
    s => go(Chunk.empty, 0, s).stream
  }

  def elements(implicit builder: ElementBuilder.Aux[Node]): Pipe[F, XmlEvent, Node] = {
    def go(chunk: Chunk[XmlEvent], idx: Int, rest: Stream[F, XmlEvent]): Pull[F, Node, Unit] =
      if (idx >= chunk.size) {
        rest.pull.uncons.flatMap {
          case Some((hd, tl)) => go(hd, 0, tl)
          case None           => Pull.done
        }
      } else {
        element(chunk, idx, rest).flatMap { case (elem, chunk, idx, rest) =>
          Pull.output1(elem) >> go(chunk, idx, rest)
        }
      }
    s => go(Chunk.empty, 0, s).stream
  }

}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy