All Downloads are FREE. Search and download functionalities are using the official Maven repository.

spice.net.URLParser.scala Maven / Gradle / Ivy

package spice.net

import scala.util.Try

object URLParser {
  def apply(s: String,
            validateTLD: Boolean = true,
            defaultProtocol: Protocol = Protocol.Https): Either[URLParseFailure, URL] = {
    if ((s.contains('.') || s.contains(":")) && !s.startsWith(":") && !s.endsWith(".")) {
      val (protocolOption, stage1) = extractProtocol(s)
      val (hostSection, pathSection) = separateHostAndPath(stage1)
      val (host, port) = separateHostAndPort(hostSection)
      val (stage2, fragment) = extractFragment(pathSection)
      val (stage3, parameters) = extractParameters(stage2)
      val path = URLPath.parse(stage3)

      if (host.contains("..")) {
        Left(URLParseFailure(s"$s has an invalid host", URLParseFailure.InvalidHost))
      } else if (protocolOption.isEmpty && host.contains('@') && !host.contains(':')) {
        Left(URLParseFailure(s"$s appears to be an email address", URLParseFailure.EmailAddress, None))
      } else {
        val protocol = protocolOption.getOrElse(defaultProtocol)
        val url = URL(
          protocol = protocol,
          host = host,
          port = port.orElse(protocol.defaultPort).getOrElse(-1),
          path = path,
          parameters = parameters,
          fragment = fragment
        )

        if (url.ip.isEmpty && url.host.count(_ == ':') > 1) {
          Left(URLParseFailure(s"Invalid host: ${url.host}", URLParseFailure.InvalidHost))
        } else if (validateTLD) {
          url.tld match {
            case Some(tld) if !TopLevelDomains.isValid(tld) => Left(URLParseFailure(s"Invalid top-level domain: [$tld]", URLParseFailure.InvalidTopLevelDomain))
            case _ => Right(url)
          }
        } else {
          Right(url)
        }
      }
    } else {
      Left(URLParseFailure(s"$s is not a valid URL", URLParseFailure.QuickFail))
    }
  }

  def extractProtocol(s: String): (Option[Protocol], String) = if (s.contains("://")) {
    val index = s.indexOf("://")
    val content = s.substring(0, index)
    val protocol = Protocol(content)
    (Some(protocol), s.substring(index + 3))
  } else if (s.startsWith("//")) {
    (Some(Protocol.Https), s.substring(2))
  } else {
    (None, s)
  }

  def separateHostAndPath(s: String): (String, String) = if (s.contains('/')) {
    val index = s.indexOf('/')
    (s.substring(0, index), s.substring(index))
  } else if (s.contains('?')) {
    val index = s.indexOf('?')
    (s.substring(0, index), s.substring(index))
  } else {
    (s, "")
  }

  def separateHostAndPort(s: String): (String, Option[Int]) = if (s.contains(':')) {
    val index = s.lastIndexOf(':')
    val host = s.substring(0, index)
    val port = Try(s.substring(index + 1).toInt).toOption
    if (port.isEmpty) {
      (s, None)
    } else {
      (host, port)
    }
  } else {
    (s, None)
  }

  def extractFragment(s: String): (String, Option[String]) = if (s.contains('#')) {
    val index = s.indexOf('#')
    (s.substring(0, index), Some(s.substring(index + 1)))
  } else {
    (s, None)
  }

  def extractParameters(s: String): (String, Parameters) = if (s.contains('?')) {
    val index = s.indexOf('?')
    val pre = s.substring(0, index)
    val post = s.substring(index + 1)
    val params = Parameters.parse(post)
    (pre, params)
  } else {
    (s, Parameters.empty)
  }
}




© 2015 - 2025 Weber Informatics LLC | Privacy Policy