All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.cequence.jinaapi.model.CrawlerSettings.scala Maven / Gradle / Ivy

package io.cequence.jinaapi.model

import io.cequence.wsclient.domain.EnumValue

case class CrawlerSettings(
  /**
   * Content Format You can control the level of detail in the response to prevent
   * over-filtering. The default pipeline is optimized for most websites and LLM input. enum:
   * markdown, html, text, screenshot, pageshot
   */
  contentFormat: Option[ContentFormat] = None,

  /**
   * Timeout Maximum time to wait for the webpage to load. Note that this is NOT the total time
   * for the whole end-to-end request.
   */
  timeout: Option[Int] = None,

  /**
   * Target Selector Provide a CSS selector to focus on a more specific part of the page.
   * Useful when your desired content doesn't show under the default settings.
   */
  targetSelector: Option[String] = None,

  /**
   * Wait For Selector Wait for a specific element to appear before returning. Useful when your
   * desired content doesn't show under the default settings.
   */
  waitForSelector: Option[String] = None,

  /**
   * Forward Cookie Our API server can forward your custom cookie settings when accessing the
   * URL, which is useful for pages requiring extra authentication. Note that requests with
   * cookies will not be cached.
   */
  forwardCookie: Option[String] = None,

  /**
   * Use a Proxy Server Our API server can utilize your proxy to access URLs, which is helpful
   * for pages accessible only through specific proxies.
   */
  proxyURL: Option[String] = None,

  /**
   * No Cache Our API server caches both Read and Search mode contents for a certain amount of
   * time. To bypass this cache, set this header to true.
   */
  noCache: Option[Boolean] = None,

  /**
   * Stream Mode Stream mode is beneficial for large target pages, allowing more time for the
   * page to fully render. If standard mode results in incomplete content, consider using
   * Stream mode.
   */
  streamMode: Option[Boolean] = None,

  /**
   * Browser Locale Control the browser locale to render the page. Lots of websites serve
   * different content based on the locale.
   */
  browserLocale: Option[String] = None
)

sealed trait ContentFormat extends EnumValue

object ContentFormat {
  case object markdown extends ContentFormat
  case object html extends ContentFormat
  case object text extends ContentFormat
  case object screenshot extends ContentFormat
  case object pageshot extends ContentFormat
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy