All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.themillhousegroup.scoup.Scoup.scala Maven / Gradle / Ivy

There is a newer version: 1.0.0
Show newest version
package com.themillhousegroup.scoup

import org.jsoup.{ Connection, Jsoup }
import org.jsoup.nodes.Document
import scala.collection.JavaConverters._
import scala.concurrent.Future
import scala.concurrent.ExecutionContext.Implicits.global
import com.themillhousegroup.scoup.options.ScoupOptions
import org.jsoup.Connection.{ Response, Method }
import scala.collection.mutable
import java.net.URL

object Scoup extends Scoup(new RealJsoup(), ScoupOptions()) {}

/**
 * Instantiate a Scoup of your own if you want to set custom
 * options to be applied to all of its operations
 */
class Scoup(impl: JSoupProvider = new RealJsoup(), scoupOptions: ScoupOptions = ScoupOptions()) {

  private def basicJsoup(url: String, options: ScoupOptions, withCookies: Map[String, String], method: Method, data: Map[String, String]): Connection = {
    impl
      .connect(url)
      .userAgent(options.userAgent)
      .timeout(options.timeout.toMillis.toInt)
      .cookies(withCookies.asJava)
      .ignoreContentType(options.ignoreContentType)
      .followRedirects(options.followRedirects)
      .ignoreHttpErrors(options.ignoreHttpErrors)
      .data(data.asJava)
      .method(method)
  }

  private def executeAsync(url: String, options: ScoupOptions, withCookies: Map[String, String], method: Method = Method.GET, data: Map[String, String] = Map()): Future[Response] = {
    Future(basicJsoup(url, options, withCookies, method, data).execute)
  }

  /** Perform a GET on the URL, parsing the resulting Document */
  def parse(url: String, options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[Document] = {
    executeAsync(url, options, withCookies).map(_.parse)
  }

  /** Perform a GET on the URL, parsing the resulting Document and any cookies into the returned tuple */
  def parseWithCookies(url: String, options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[(Document, Map[String, String])] = {
    executeAsync(url, options, withCookies).map { resp =>
      (resp.parse, resp.cookies.asScala.toMap)
    }
  }

  /** Perform a POST on the URL, parsing the resulting Document */
  def parsePost(url: String, data: Map[String, String], options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[Document] = {
    executeAsync(url, options, withCookies, Method.POST, data).map(_.parse)
  }

  /** Perform a POST on the URL, parsing the resulting Document and any cookies into the returned tuple */
  def parsePostWithCookies(url: String, data: Map[String, String], options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[(Document, Map[String, String])] = {
    executeAsync(url, options, withCookies, Method.POST, data).map { resp =>
      (resp.parse, resp.cookies.asScala.toMap)
    }
  }

  /** Perform a GET on the URL, and return the response body. I.E. you are just using Scoup/JSoup as a HTTP client :-) */
  def get(url: String, options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[String] = {
    val acceptNonHtmlOptions = options.copy(ignoreContentType = true)
    executeAsync(url, acceptNonHtmlOptions, withCookies).map(_.body)
  }

  /** Perform a POST on the URL, and return the response body. I.E. you are just using Scoup/JSoup as a HTTP client :-) */
  def post(url: String, data: Map[String, String], options: ScoupOptions = scoupOptions, withCookies: Map[String, String] = Map()): Future[String] = {
    val acceptNonHtmlOptions = options.copy(ignoreContentType = true)
    executeAsync(url, acceptNonHtmlOptions, withCookies, Method.POST, data).map(_.body)
  }

  def parseHTML(html: String): Document = {
    impl.parse(html)
  }
}

/** Indirection to allow JSoup's static API to be mocked */
private[scoup] trait JSoupProvider {
  def connect(url: String): Connection
  def parse(html: String): Document
}

private[scoup] class RealJsoup extends JSoupProvider {
  def connect(url: String): Connection = Jsoup.connect(url)
  def parse(html: String): Document = Jsoup.parse(html)
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy