All Downloads are FREE. Search and download functionalities are using the official Maven repository.

io.gatling.http.fetch.CssParser.scala Maven / Gradle / Ivy

The newest version!
/*
 * Copyright 2011-2024 GatlingCorp (https://gatling.io)
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *  http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package io.gatling.http.fetch

import scala.annotation.{ switch, tailrec }
import scala.util.matching.Regex

import io.gatling.http.client.uri.Uri
import io.gatling.http.util.HttpHelper

import com.typesafe.scalalogging.StrictLogging

private[fetch] object CssParser extends StrictLogging {
  private val InlineStyleImageUrls = """url\((.*)\)""".r
  private val StyleImportsUrls = """@import url\((.*)\)""".r

  def extractInlineStyleImageUrls(string: CharSequence): Iterator[String] =
    extractUrls(string, InlineStyleImageUrls)

  def extractStyleImportsUrls(string: CharSequence): Iterator[String] =
    extractUrls(string, StyleImportsUrls)

  private def extractUrls(string: CharSequence, regex: Regex): Iterator[String] =
    regex.findAllIn(string).matchData.flatMap { m =>
      val raw = m.group(1)
      extractUrl(raw, 0, raw.length).toList
    }

  private val SingleQuoteEscapeChar = Some('\'')
  private val DoubleQuoteEscapeChar = Some('"')
  private val AtImportChars = "@import".toCharArray
  private val UrlStartChars = "url(".toCharArray

  def extractUrl(string: String, start: Int, end: Int): Option[String] =
    if (string.isEmpty) {
      None
    } else {
      var protectChar: Option[Char] = None
      var broken = false

      @tailrec
      def trimLeft(cur: Int): Int =
        if (cur == end)
          cur
        else
          (string.charAt(cur): @switch) match {
            case ' ' | '\r' | '\n' => trimLeft(cur + 1)
            case '\'' =>
              protectChar match {
                case None =>
                  protectChar = SingleQuoteEscapeChar
                  trimLeft(cur + 1)
                case _ =>
                  broken = true
                  cur
              }
            case '"' =>
              protectChar match {
                case None =>
                  protectChar = DoubleQuoteEscapeChar
                  trimLeft(cur + 1)
                case _ =>
                  broken = true
                  cur
              }
            case _ => cur
          }

      @tailrec
      def trimRight(cur: Int, leftLimit: Int): Int =
        if (cur == leftLimit)
          cur
        else
          (string.charAt(cur - 1): @switch) match {
            case ' ' | '\r' | '\n' => trimRight(cur - 1, leftLimit)
            case '\'' =>
              protectChar match {
                case `SingleQuoteEscapeChar` =>
                  trimRight(cur - 1, leftLimit)
                case _ =>
                  broken = true
                  cur
              }
            case '"' =>
              protectChar match {
                case `DoubleQuoteEscapeChar` =>
                  trimRight(cur - 1, leftLimit)
                case _ =>
                  broken = true
                  cur
              }
            case _ => cur
          }

      val trimmedStart = trimLeft(start)
      val trimmedEnd = trimRight(end, trimmedStart)

      if (!broken && trimmedStart != trimmedEnd) {
        if (string.charAt(trimmedStart) == '#')
          // anchors are not real urls
          None
        else
          Some(string.substring(trimmedStart, trimmedEnd))
      } else {
        logger.debug(s"css url $string broken")
        None
      }
    }

  def extractResources(cssURI: Uri, cssContent: String): List[ConcurrentResource] = {
    val resources = collection.mutable.ArrayBuffer.empty[ConcurrentResource]

    var withinComment = false
    var withinImport = false
    var withinUrl = false
    var urlStart = 0

    def charsMatch(i: Int, chars: Array[Char]): Boolean = {
      @tailrec
      def charsMatchRec(j: Int): Boolean =
        if (j == chars.length)
          true
        else if (cssContent.charAt(i + j) != chars(j))
          false
        else
          charsMatchRec(j + 1)

      i < cssContent.length - chars.length && charsMatchRec(1)
    }

    var i = 0
    while (i < cssContent.length) {
      (cssContent.charAt(i): @switch) match {
        case '/' =>
          if (
            i < cssContent.length - 1 &&
            cssContent.charAt(i + 1) == '*'
          ) {
            withinComment = true
            i += 1
          } else if (
            i > 0 &&
            cssContent.charAt(i - 1) == '*'
          ) {
            withinComment = false
          }

        case '@' if !withinComment && charsMatch(i, AtImportChars) =>
          withinImport = true
          i = i + "@import".length

        case 'u' if !withinComment && withinImport && charsMatch(i, UrlStartChars) =>
          i = i + UrlStartChars.length
          urlStart = i
          withinUrl = true

        case ')' if !withinComment && withinUrl =>
          for {
            url <- extractUrl(cssContent, urlStart, i)
            absoluteUri <- HttpHelper.resolveFromUriSilently(cssURI, url)
          } {
            resources += CssResource(absoluteUri)
            withinUrl = false
            withinImport = false
          }

        case _ =>
      }

      i += 1
    }

    resources.toList
  }
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy