com.gargoylesoftware.htmlunit.html.XmlSerializer Maven / Gradle / Ivy
Go to download
Show more of this group Show more artifacts with this name
Show all versions of vaadin-client-compiler-deps Show documentation
Show all versions of vaadin-client-compiler-deps Show documentation
Vaadin is a web application framework for Rich Internet Applications (RIA).
Vaadin enables easy development and maintenance of fast and
secure rich web
applications with a stunning look and feel and a wide browser support.
It features a server-side architecture with the majority of the logic
running
on the server. Ajax technology is used at the browser-side to ensure a
rich
and interactive user experience.
/*
* Copyright (c) 2002-2011 Gargoyle Software Inc.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package com.gargoylesoftware.htmlunit.html;
import java.io.File;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
import org.apache.commons.io.FileUtils;
import org.apache.commons.io.IOUtils;
import org.apache.commons.lang.StringUtils;
import com.gargoylesoftware.htmlunit.Page;
import com.gargoylesoftware.htmlunit.WebResponse;
import com.gargoylesoftware.htmlunit.util.MimeType;
/**
* Utility to handle conversion from HTML code to XML string.
* @version $Revision: 6471 $
* @author Ahmed Ashour
* @author Ronald Brill
* @author Marc Guillemot
*/
class XmlSerializer {
private static final String FILE_SEPARATOR = "/";
private static final Pattern CREATE_FILE_PATTERN = Pattern.compile(".*/");
private final StringBuilder buffer_ = new StringBuilder();
private final StringBuilder indent_ = new StringBuilder();
private File outputDir_;
public void save(final HtmlPage page, final File file) throws IOException {
String fileName = file.getName();
if (!fileName.endsWith(".htm") && !fileName.endsWith(".html")) {
fileName += ".html";
}
final File outputFile = new File(file.getParentFile(), fileName);
if (outputFile.exists()) {
throw new IOException("File already exists: " + outputFile);
}
fileName = fileName.substring(0, fileName.lastIndexOf('.'));
outputDir_ = new File(file.getParentFile(), fileName);
FileUtils.writeStringToFile(outputFile, asXml(page.getDocumentElement()));
}
/**
* Converts an HTML element to XML.
* @param node a node
* @return the text representation according to the setting of this serializer
* @throws IOException in case of problem saving resources
*/
public String asXml(final HtmlElement node) throws IOException {
buffer_.setLength(0);
indent_.setLength(0);
String charsetName = null;
if (node.getPage() instanceof HtmlPage) {
charsetName = node.getPage().getPageEncoding();
}
if (charsetName != null && node instanceof HtmlHtml) {
buffer_.append("").append('\n');
}
printXml(node);
final String response = buffer_.toString();
buffer_.setLength(0);
return response;
}
protected void printXml(final DomElement node) throws IOException {
if (!isExcluded(node)) {
final boolean hasChildren = node.getFirstChild() != null;
buffer_.append(indent_).append('<');
printOpeningTag(node);
if (!hasChildren && !node.isEmptyXmlTagExpanded()) {
buffer_.append("/>").append('\n');
}
else {
buffer_.append(">").append('\n');
for (DomNode child = node.getFirstChild(); child != null; child = child.getNextSibling()) {
indent_.append(" ");
if (child instanceof DomElement) {
printXml((DomElement) child);
}
else {
buffer_.append(child);
}
indent_.setLength(indent_.length() - 2);
}
buffer_.append(indent_).append("").append(node.getTagName()).append('>').append('\n');
}
}
}
/**
* Prints the content between "<" and ">" (or "/>") in the output of the tag name
* and its attributes in XML format.
* @param node the node whose opening tag is to be printed
* @throws IOException in case of problem saving resources
*/
protected void printOpeningTag(final DomElement node) throws IOException {
buffer_.append(node.getTagName());
final Map attributes = readAttributes(node);
for (final Map.Entry entry : attributes.entrySet()) {
buffer_.append(" ");
buffer_.append(entry.getKey());
buffer_.append("=\"");
final String value = entry.getValue().getNodeValue();
buffer_.append(com.gargoylesoftware.htmlunit.util.StringUtils.escapeXmlAttributeValue(value));
buffer_.append('"');
}
}
private Map readAttributes(final DomElement node) throws IOException {
if (node instanceof HtmlImage) {
return getAttributesFor((HtmlImage) node);
}
else if (node instanceof HtmlLink) {
return getAttributesFor((HtmlLink) node);
}
else if (node instanceof BaseFrame) {
return getAttributesFor((BaseFrame) node);
}
Map attributes = node.getAttributesMap();
if (node instanceof HtmlOption) {
attributes = new HashMap(attributes);
final HtmlOption option = (HtmlOption) node;
if (option.isSelected()) {
if (!attributes.containsKey("selected")) {
attributes.put("selected", new DomAttr(node.getPage(), null, "selected", "selected", false));
}
}
else {
attributes.remove("selected");
}
}
return attributes;
}
private Map getAttributesFor(final BaseFrame frame) throws IOException {
final Map map = createAttributesCopyWithClonedAttribute(frame, "src");
final DomAttr srcAttr = map.get("src");
if (srcAttr == null) {
return map;
}
final Page enclosedPage = frame.getEnclosedPage();
final String suffix = getFileExtension(enclosedPage);
final File file = createFile(srcAttr.getValue(), "." + suffix);
if (enclosedPage instanceof HtmlPage) {
file.delete(); // TODO: refactor as it is stupid to create empty file at one place
// and then to complain that it already exists
((HtmlPage) enclosedPage).save(file);
}
else {
final InputStream is = enclosedPage.getWebResponse().getContentAsStream();
final FileOutputStream fos = new FileOutputStream(file);
IOUtils.copyLarge(is, fos);
IOUtils.closeQuietly(is);
IOUtils.closeQuietly(fos);
}
srcAttr.setValue(file.getParentFile().getName() + FILE_SEPARATOR + file.getName());
return map;
}
private String getFileExtension(final Page enclosedPage) {
if (enclosedPage instanceof HtmlPage) {
return "html";
}
final URL url = enclosedPage.getUrl();
if (url.getPath().contains(".")) {
return StringUtils.substringAfterLast(url.getPath(), ".");
}
return ".unknown";
}
protected Map getAttributesFor(final HtmlLink link) throws IOException {
final Map map = createAttributesCopyWithClonedAttribute(link, "href");
final DomAttr hrefAttr = map.get("href");
if ((null != hrefAttr) && StringUtils.isNotBlank(hrefAttr.getValue())) {
final File file = createFile(hrefAttr.getValue(), ".css");
FileUtils.writeStringToFile(file, link.getWebResponse(true).getContentAsString());
hrefAttr.setValue(outputDir_.getName() + FILE_SEPARATOR + file.getName());
}
return map;
}
protected Map getAttributesFor(final HtmlImage image) throws IOException {
final Map map = createAttributesCopyWithClonedAttribute(image, "src");
final DomAttr srcAttr = map.get("src");
if ((null != srcAttr) && StringUtils.isNotBlank(srcAttr.getValue())) {
final WebResponse response = image.getWebResponse(true);
final File file = createFile(srcAttr.getValue(), "." + getSuffix(response));
FileUtils.copyInputStreamToFile(response.getContentAsStream(), file);
final String valueOnFileSystem = outputDir_.getName() + FILE_SEPARATOR + file.getName();
srcAttr.setValue(valueOnFileSystem); // this is the clone attribute node, not the original one of the page
}
return map;
}
private String getSuffix(final WebResponse response) {
// first try to take the one from the requested file
final String url = response.getWebRequest().getUrl().toString();
final String fileName = StringUtils.substringAfterLast(StringUtils.substringBefore(url, "?"), "/");
// if there is a suffix with 2-4 letters, the take it
final String suffix = StringUtils.substringAfterLast(fileName, ".");
if (suffix.length() > 1 && suffix.length() < 5) {
return suffix;
}
// use content type
return MimeType.getFileExtension(response.getContentType());
}
private Map createAttributesCopyWithClonedAttribute(final HtmlElement elt, final String attrName) {
final Map newMap = new HashMap(elt.getAttributesMap());
// clone the specified element, if possible
final DomAttr attr = newMap.get(attrName);
if (null == attr) {
return newMap;
}
final DomAttr clonedAttr = new DomAttr(attr.getPage(), attr.getNamespaceURI(),
attr.getQualifiedName(), attr.getValue(), attr.getSpecified());
newMap.put(attrName, clonedAttr);
return newMap;
}
protected boolean isExcluded(final DomElement element) {
return element instanceof HtmlScript;
}
/**
* Computes the best file to save the response to the given URL.
* @param url the requested URL
* @param extension the preferred extension
* @return the file to create
* @throws IOException if a problem occurs creating the file
*/
private File createFile(final String url, final String extension) throws IOException {
String name = url.replaceFirst("/$", "");
name = CREATE_FILE_PATTERN.matcher(name).replaceAll("");
name = StringUtils.substringBefore(name, "?"); // remove query
name = StringUtils.substringBefore(name, ";"); // remove additional info
if (!name.endsWith(extension)) {
name += extension;
}
int counter = 0;
while (true) {
final String fileName;
if (counter != 0) {
fileName = StringUtils.substringBeforeLast(name, ".")
+ "_" + counter + "." + StringUtils.substringAfterLast(name, ".");
}
else {
fileName = name;
}
outputDir_.mkdirs();
final File f = new File(outputDir_, fileName);
if (f.createNewFile()) {
return f;
}
counter++;
}
}
}