All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.marekkadek.scraper.htmlunit.browser.scala Maven / Gradle / Ivy

The newest version!
package com.marekkadek.scraper.htmlunit

import java.net.URL
import java.util.UUID

import com.gargoylesoftware.htmlunit._
import com.marekkadek.scraper.proxy._
import com.marekkadek.scraper._
import fs2.util._

// todo: very(!!!) experimental
sealed class HtmlUnitBrowser[F[_]] private (val proxySettings: Option[HttpProxy],
                                            browserVersion: BrowserVersion)(implicit FI: Effect[F])
    extends Browser[F] {

  override def fromUrl(url: String): F[Document] = FI.delay {
    val client = proxySettings match {
      case Some(proxy) => new WebClient(browserVersion, proxy.host, proxy.port)
      case None        => new WebClient(browserVersion)
    }

    client.getOptions.setThrowExceptionOnScriptError(false)

    val window = client.openTargetWindow(client.getCurrentWindow, null, UUID.randomUUID().toString)

    val urll    = new URL(url)
    val request = new WebRequest(urll, HttpMethod.GET)
    request.setAdditionalHeader("Accept", "text/html,application/xhtml+xml,application/xml")
    request.setAdditionalHeader("Accept-Charset", "utf-8")

    client.getPage(window, request)
    HtmlUnitDocument(window)
  }
}

object HtmlUnitBrowser {
  def apply[F[_]: Effect](browserVersion: BrowserVersion): HtmlUnitBrowser[F] =
    new HtmlUnitBrowser[F](None, browserVersion)
  def apply[F[_]: Effect](browserVersion: BrowserVersion, proxy: HttpProxy): HtmlUnitBrowser[F] =
    new HtmlUnitBrowser[F](Some(proxy), browserVersion)
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy