All Downloads are FREE. Search and download functionalities are using the official Maven repository.

com.gargoylesoftware.htmlunit.DefaultPageCreator Maven / Gradle / Ivy

There is a newer version: 2.70.0
Show newest version
/*
 * Copyright (c) 2002-2011 Gargoyle Software Inc.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package com.gargoylesoftware.htmlunit;

import java.io.IOException;
import java.io.InputStream;
import java.io.Serializable;
import java.util.Locale;

import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.ArrayUtils;
import org.apache.commons.lang.StringUtils;

import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.HTMLParser;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
import com.gargoylesoftware.htmlunit.html.XHtmlPage;
import com.gargoylesoftware.htmlunit.xml.XmlPage;

/**
 * The default implementation of {@link PageCreator}. Designed to be extended for easier handling of new content
 * types. Just check the content type in createPage() and call super(createPage()) if your custom
 * type isn't found. There are also protected createXXXXPage() methods for creating the {@link Page} types
 * which HtmlUnit already knows about for your custom content types.
 *
 * 

* The following table shows the type of {@link Page} created depending on the content type:
*
*

* * * * * * * * * * * * * * * * * * * * * * * * *
Content typeType of page
text/html{@link HtmlPage}
text/xml
* application/xml
* text/vnd.wap.wml
* *+xml *
{@link XmlPage}, or an {@link XHtmlPage} if an XHTML namespace is used
text/javascript
* application/x-javascript *
{@link JavaScriptPage}
text/*{@link TextPage}
Anything else{@link UnexpectedPage}
* * @version $Revision: 6357 $ * @author Mike Bowler * @author Christian Sell * @author Brad Clarke * @author Marc Guillemot * @author Ahmed Ashour * @author Daniel Gredler */ public class DefaultPageCreator implements PageCreator, Serializable { /** * Creates an instance. */ public DefaultPageCreator() { // Empty. } /** * Create a Page object for the specified web response. * * @param webResponse the response from the server * @param webWindow the window that this page will be loaded into * @exception IOException if an IO problem occurs * @return the new page object */ public Page createPage(final WebResponse webResponse, final WebWindow webWindow) throws IOException { final String contentType = determineContentType(webResponse.getContentType().toLowerCase(), webResponse.getContentAsStream()); final Page newPage; final String pageType = determinePageType(contentType); if ("html".equals(pageType)) { newPage = createHtmlPage(webResponse, webWindow); } else if ("javascript".equals(pageType)) { newPage = createJavaScriptPage(webResponse, webWindow); } else if ("xml".equals(pageType)) { final XmlPage xml = createXmlPage(webResponse, webWindow); final DomElement doc = xml.getDocumentElement(); if (doc != null && HTMLParser.XHTML_NAMESPACE.equals(doc.getNamespaceURI())) { newPage = createXHtmlPage(webResponse, webWindow); } else { newPage = xml; } } else if ("text".equals(pageType)) { newPage = createTextPage(webResponse, webWindow); } else { newPage = createUnexpectedPage(webResponse, webWindow); } return newPage; } /** * Tries to determine the content type. * TODO: implement a content type sniffer based on the * Content-Type Processing Model * @param contentType the contentType header if any * @param contentAsStream stream allowing to read the downloaded content * @return the sniffed mime type * @exception IOException if an IO problem occurs */ protected String determineContentType(final String contentType, final InputStream contentAsStream) throws IOException { final byte[] markerUTF8 = {(byte) 0xef, (byte) 0xbb, (byte) 0xbf}; final byte[] markerUTF16BE = {(byte) 0xfe, (byte) 0xff}; final byte[] markerUTF16LE = {(byte) 0xff, (byte) 0xfe}; try { if (!StringUtils.isEmpty(contentType)) { return contentType; } final byte[] bytes = read(contentAsStream, 500); if (bytes.length == 0) { return "text/plain"; } final String asAsciiString = new String(bytes, "ASCII").toUpperCase(Locale.ENGLISH); if (asAsciiString.contains("= 0x0E && b <= 0x1A) || (b >= 0x1C && b <= 0x1F)) { return true; } } return false; } private boolean startsWith(final byte[] bytes, final byte[] lookFor) { if (bytes.length < lookFor.length) { return false; } for (int i = 0; i < lookFor.length; ++i) { if (bytes[i] != lookFor[i]) { return false; } } return true; } private byte[] read(final InputStream stream, final int maxNb) throws IOException { final byte[] buffer = new byte[maxNb]; final int nbRead = stream.read(buffer); if (nbRead == buffer.length) { return buffer; } return ArrayUtils.subarray(buffer, 0, nbRead); } /** * Creates an HtmlPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the HtmlPage in * @return the newly created HtmlPage * @throws IOException if the page could not be created */ protected HtmlPage createHtmlPage(final WebResponse webResponse, final WebWindow webWindow) throws IOException { return HTMLParser.parseHtml(webResponse, webWindow); } /** * Creates an XHtmlPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the HtmlPage in * @return the newly created XHtmlPage * @throws IOException if the page could not be created */ protected XHtmlPage createXHtmlPage(final WebResponse webResponse, final WebWindow webWindow) throws IOException { return HTMLParser.parseXHtml(webResponse, webWindow); } /** * Creates a JavaScriptPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the JavaScriptPage in * @return the newly created JavaScriptPage */ protected JavaScriptPage createJavaScriptPage(final WebResponse webResponse, final WebWindow webWindow) { final JavaScriptPage newPage = new JavaScriptPage(webResponse, webWindow); webWindow.setEnclosedPage(newPage); return newPage; } /** * Creates a TextPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the TextPage in * @return the newly created TextPage */ protected TextPage createTextPage(final WebResponse webResponse, final WebWindow webWindow) { final TextPage newPage = new TextPage(webResponse, webWindow); webWindow.setEnclosedPage(newPage); return newPage; } /** * Creates an UnexpectedPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the UnexpectedPage in * @return the newly created UnexpectedPage */ protected UnexpectedPage createUnexpectedPage(final WebResponse webResponse, final WebWindow webWindow) { final UnexpectedPage newPage = new UnexpectedPage(webResponse, webWindow); webWindow.setEnclosedPage(newPage); return newPage; } /** * Creates an XmlPage for this WebResponse. * * @param webResponse the page's source * @param webWindow the WebWindow to place the TextPage in * @return the newly created TextPage * @throws IOException if the page could not be created */ protected XmlPage createXmlPage(final WebResponse webResponse, final WebWindow webWindow) throws IOException { final XmlPage newPage = new XmlPage(webResponse, webWindow); webWindow.setEnclosedPage(newPage); return newPage; } /** * Determines the kind of page to create from the content type. * @param contentType the content type to evaluate * @return "xml", "html", "javascript", "text" or "unknown" */ protected String determinePageType(final String contentType) { if ("text/html".equals(contentType)) { // || contentType.equals("")) { return "html"; } else if ("text/javascript".equals(contentType) || "application/x-javascript".equals(contentType)) { return "javascript"; } else if ("text/xml".equals(contentType) || "application/xml".equals(contentType) || "text/vnd.wap.wml".equals(contentType) || contentType.matches(".*\\+xml")) { return "xml"; } else if (contentType.startsWith("text/")) { return "text"; } else { return "unknown"; } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy