All Downloads are FREE. Search and download functionalities are using the official Maven repository.

net.ruippeixotog.scalascraper.scraper.PolyHtmlExtractor.scala Maven / Gradle / Ivy

The newest version!
package net.ruippeixotog.scalascraper.scraper

import net.ruippeixotog.scalascraper.model.Element

/**
  * An extractor like [[HtmlExtractor]] but whose extracted content type depends on the type of the input
  * [[net.ruippeixotog.scalascraper.model.Element]]s. A `PolyHtmlExtractor` supports application of CSS queries and can
  * be turned into a normal `HtmlExtractor` by calling its `apply[E]` method, fixing the type of the input `Element` as
  * `E`.
  */
trait PolyHtmlExtractor { outer =>

  /**
    * The type of the extracted content as a function of the input elements.
    *
    * @tparam E the type of the input elements
    */
  type Out[E <: Element]

  /**
    * Returns an `HtmlExtractor` obtained from this extractor by specifying the type of the input elements.
    *
    * @tparam E the type of the input elements
    * @return an `HtmlExtractor` obtained from this extractor by specifying the type of the input elements.
    */
  def apply[E <: Element]: HtmlExtractor[E, Out[E]]

  /**
    * Applies a CSS query to `ElementQuery` inputs before passing them to this extractor.
    *
    * @param cssQuery the CSS query to apply to input `ElementQuery` instances
    * @return a `PolyHtmlExtractor` returning the contents extracted by this extractor after `cssQuery` is applied to
    *         the input queries.
    */
  def mapQuery(cssQuery: String): PolyHtmlExtractor.Aux[Out] = new PolyHtmlExtractor {
    type Out[E <: Element] = outer.Out[E]
    def apply[E <: Element] = outer[E].mapQuery(cssQuery)
  }

  /**
    * Applies a CSS query to `ElementQuery` inputs before passing them to this extractor.
    *
    * @param cssQuery the CSS query to apply to input `ElementQuery` instances
    * @return a `PolyHtmlExtractor` returning the contents extracted by this extractor after `cssQuery` is applied to
    *         the input queries.
    */
  // TODO try to find a way to make this a DSL extension method. If that really can't be done, consider deprecating
  // this DSL construction
  def apply(cssQuery: String): PolyHtmlExtractor.Aux[Out] = mapQuery(cssQuery)
}

object PolyHtmlExtractor {
  type Aux[Out0[E <: Element]] = PolyHtmlExtractor { type Out[E <: Element] = Out0[E] }

  implicit def polyHtmlExtractorAsExtractor[E <: Element](
    polyExtractor: PolyHtmlExtractor): HtmlExtractor[E, polyExtractor.Out[E]] = polyExtractor[E]
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy