com.xceptance.xlt.api.htmlunit.LightWeightPage Maven / Gradle / Ivy

/*
 * Copyright (c) 2005-2022 Xceptance Software Technologies GmbH
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.xceptance.xlt.api.htmlunit;

import java.nio.charset.Charset;
import java.nio.charset.StandardCharsets;

import org.apache.commons.lang3.StringUtils;

import com.gargoylesoftware.htmlunit.WebRequest;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.xceptance.common.util.RegExUtils;

/**
 * A simple page object for light-weight operations.
 * 
 * @author René Schwietzke (Xceptance Software Technologies GmbH)
 */
public class LightWeightPage
{
    /**
     * The web response.
     */
    private final WebResponse response;

    /**
     * The timer name.
     */
    private final String name;

    /**
     * Contents character set.
     */
    private final Charset charset;

    /**
     * Constructor.
     * 
     * @param webResponse
     *            the web response
     */
    public LightWeightPage(final WebResponse webResponse, final String timerName)
    {
        response = webResponse;
        name = timerName;
        charset = determineContentCharset();
    }

    /**
     * Returns the page content.
     * 
     * @return the content
     */
    public String getContent()
    {
        return response.getContentAsString(getCharset());
    }

    /**
     * Returns the status code of the web response.
     * 
     * @return status code of response
     */
    public int getHttpResponseCode()
    {
        return response.getStatusCode();
    }

    /**
     * Returns the web response.
     * 
     * @return web response
     */
    public WebResponse getWebResponse()
    {
        return response;
    }

    /**
     * Returns the timer name.
     * 
     * @return timer name
     */
    public String getTimerName()
    {
        return name;
    }

    /**
     * Returns the content character set.
     * 
     * @return content character set
     */
    public String getContentCharset()
    {
        return charset.name();
    }

    /**
     * Returns the content character set.
     * 
     * @return content character set
     */
    public Charset getCharset()
    {
        return charset;
    }

    /**
     * Determines the content character set.
     * 
     * @return content character set
     */
    private Charset determineContentCharset()
    {
        if (response != null)
        {
            /*
             * TODO: I would love to replace all this code with a simple "response.getContentCharset()" as it is much
             * more elaborate and robust. Unfortunately, that method behaves (slightly) different. Maybe we can do this
             * with the next major version.
             */

            // 1st: get value of content-type response header
            String charsetName = getCharsetNameFromContentTypeHeader(response);
            if (StringUtils.isBlank(charsetName))
            {
                final String content = response.getContentAsString(StandardCharsets.ISO_8859_1);
                if (StringUtils.isNotBlank(content))
                {
                    // 2nd: get the encoding attribute from a potential  header (in case of XHTML)
                    charsetName = RegExUtils.getFirstMatch(content, "<\\?xml\\s[^>]*?encoding=\"([^\"]+)", 1);
                    if (StringUtils.isBlank(charsetName))
                    {
                        // 3rd: get declared charset from a content-type meta tag
                        charsetName = RegExUtils.getFirstMatch(content, "]*?content=\"[^\"]*?charset=([^\";]+)", 1);
                        if (StringUtils.isBlank(charsetName))
                        {
                            // 4th: get declared charset from a charset meta tag
                            charsetName = RegExUtils.getFirstMatch(content, "