All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.ruippeixotog.scalascraper.dsl.ScrapingOps.scala Maven / Gradle / Ivy

The newest version!
package net.ruippeixotog.scalascraper.dsl

import net.ruippeixotog.scalascraper.model.{Element, ElementQuery}
import net.ruippeixotog.scalascraper.scraper.{HtmlExtractor, HtmlValidator}
import net.ruippeixotog.scalascraper.util._
import scala.util.Try

import scalaz._
import scalaz.syntax.FunctorSyntax

trait ScrapingOps extends syntax.ToIdOps with std.AllInstances with IdInstances {

  class ElementsScrapingOps[F[_]: Functor, A, E <: Element](val self: F[A])(implicit toQuery: ToQuery.Aux[A, E])
      extends FunctorSyntax[F] {

    override def F: Functor[F] = Functor[F]

    @inline implicit private[this] def aToQuery(a: A): ElementQuery[E] = toQuery(a)

    def extract[B](extractor: HtmlExtractor[E, B]) = self.map(extractor.extract(_))

    @inline final def apply[B](extractor: HtmlExtractor[E, B]) = extract(extractor)

    @inline final def >>[B](extractor: HtmlExtractor[E, B]) = extract(extractor)

    def >>[B, C](extractor1: HtmlExtractor[E, B], extractor2: HtmlExtractor[E, C]) =
      self.map { doc => (extractor1.extract(doc), extractor2.extract(doc)) }

    def >>[B, C, D](extractor1: HtmlExtractor[E, B], extractor2: HtmlExtractor[E, C], extractor3: HtmlExtractor[E, D]) =
      self.map { doc => (extractor1.extract(doc), extractor2.extract(doc), extractor3.extract(doc)) }

    def tryExtract[B](extractor: HtmlExtractor[E, B]) =
      self.map { doc => Try(extractor.extract(doc)).toOption }

    @inline final def tryApply[B](extractor: HtmlExtractor[E, B]) = tryExtract(extractor)

    @inline final def >?>[B](extractor: HtmlExtractor[E, B]) = tryExtract(extractor)

    def >?>[B, C](extractor1: HtmlExtractor[E, B], extractor2: HtmlExtractor[E, C]) =
      self.map { doc => (Try(extractor1.extract(doc)).toOption, Try(extractor2.extract(doc)).toOption) }

    def >?>[B, C, D](
        extractor1: HtmlExtractor[E, B],
        extractor2: HtmlExtractor[E, C],
        extractor3: HtmlExtractor[E, D]
    ) =
      self.map { doc =>
        val e1 = Try(extractor1.extract(doc)).toOption
        val e2 = Try(extractor2.extract(doc)).toOption
        val e3 = Try(extractor3.extract(doc)).toOption
        (e1, e2, e3)
      }

    def successIf[R](success: HtmlValidator[E, _]): F[Either[Unit, A]] =
      self.map { doc => if (success.matches(doc)) Right(doc) else Left(()) }

    def errorIf[R](error: HtmlValidator[E, R]): F[Either[R, A]] =
      self.map { doc => if (error.matches(doc)) Left(error.result.get) else Right(doc) }

    def errorIf[R](errors: Seq[HtmlValidator[E, R]]): F[Either[R, A]] = {
      self.map { doc =>
        errors.foldLeft[Either[R, A]](Right(doc)) { (res, error) =>
          if (res.isLeft || !error.matches(doc)) res else Left(error.result.get)
        }
      }
    }

    def validateWith[R](
        success: HtmlValidator[E, _],
        errors: Seq[HtmlValidator[E, R]],
        default: => R = throw new ValidationException
    ): F[Either[R, A]] = {

      self.map { doc =>
        if (success.matches(doc)) Right(doc)
        else
          errors
            .foldLeft[Either[R, A]](Right(doc)) { (res, error) =>
              if (res.isLeft || !error.matches(doc)) res else Left(error.result.get)
            }
            .fold(Left.apply, _ => Left(default))
      }
    }

    @inline final def >/~[R](success: HtmlValidator[E, _]) = successIf(success)

    @inline final def >/~[R](success: HtmlValidator[E, _], error: HtmlValidator[E, R]) =
      validateWith(success, error :: Nil)

    @inline final def >/~[R](success: HtmlValidator[E, _], errors: Seq[HtmlValidator[E, R]]) =
      validateWith(success, errors)

    @inline final def >/~[R](success: HtmlValidator[E, _], error: HtmlValidator[E, R], default: R) =
      validateWith(success, error :: Nil, default)

    @inline final def >/~[R](success: HtmlValidator[E, _], errors: Seq[HtmlValidator[E, R]], default: R) =
      validateWith(success, errors, default)
  }

  implicit def deepFunctorOps[FA, A, E <: Element](
      self: FA
  )(implicit df: DeepFunctor.AuxA[FA, A], conv: ToQuery.Aux[A, E]): ElementsScrapingOps[df.F, A, E] =
    new ElementsScrapingOps[df.F, A, E](df.asF(self))(df.f, conv)
}

object ScrapingOps extends ScrapingOps




© 2015 - 2025 Weber Informatics LLC | Privacy Policy