All Downloads are FREE. Search and download functionalities are using the official Maven repository.

cat.inspiracio.html.DocumentWriter Maven / Gradle / Ivy

Go to download

HTML-parser provides a parser for HTML 5 that produces HTML 5 document object model. It aims to be a Java-implementation of http://www.w3.org/TR/html5/. It is for use in the server. It does not implement features that are relevant in the client, like event handling. It is for use from javascript, via Java's scripting library.

The newest version!
/*
Copyright 2016 Alexander Bunkenburg 

Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
package cat.inspiracio.html;

import java.io.IOException;
import java.io.Writer;

import org.w3c.dom.DocumentType;
import org.w3c.dom.Element;

/** Writes a document to a writer. 
 * 
 * Usage:
 * 
 *  DocumentWriter w=new DocumentWriter(writer);
 *  w.document(d);
 * 
* A DocumentWriter can be used only once. * */ public class DocumentWriter extends DocumentRecurser { private static final String NL = "\n"; private Writer w; /** @param writer Writes to this writer. */ public DocumentWriter(Writer writer){w=writer;} /** Writes a character directly to the writer, no escaping. * @param c the char * @return Returns this for fluent style. * @throws IOException something wrong */ protected DocumentWriter write(char c) throws IOException{ w.write(c); return this; } /** Writes a string directly to the writer, no escaping. * @param s the string * @return Returns this for fluent style. * @throws IOException something wrong */ public DocumentWriter write(String s) throws IOException{ w.write(s); return this; } /** Writes a string and new-line to the writer, no escaping. * * This is a separate method so that subclasses can override it, * for modifications like minimising or indenting. * * @param s the string * @return Returns this for fluent style. * @throws IOException something wrong */ public DocumentWriter writeln(String s) throws IOException{ w.write(s); writeln(); return this; } /** Writes a string and new-line to the writer, no escaping. * @param c the char * @return Returns this for fluent style. * @throws IOException something wrong */ public DocumentWriter writeln(char c) throws IOException{ w.write(c); writeln(); return this; } /** Writes a new-line to the writer. * @return Returns this for fluent style. * @throws IOException something wrong */ protected DocumentWriter writeln() throws IOException{ w.write(NL); return this; } // overriding DocumentRecurser ------------------------------------ /** Always writes the HTML5 doctype "<!DOCTYPE html>". * @param type Ignored. * @return Returns this for fluent style. * @throws IOException something wrong */ @Override protected DocumentWriter doctype(DocumentType type) throws IOException { return write(""); } /** Processes an element. * If it's a script element, calls script(e), * otherwise super.element(e). * @param e the element * @return Returns this for fluent style. * @throws Exception something wrong */ @Override protected DocumentWriter element(Element e) throws Exception{ String tag=e.getTagName(); if("script".equals(tag)) script(e); else super.element(e); return this; } /** Writes a script element. * * Script elements are special: * They have no child elements except for text, * and the text should not be escaped. In that way, * the javascript program in there can have < > &. * * @param element Must be script and must have no children * other than text. * @throws Exception something wrong */ protected void script(Element element) throws Exception{ open(element); //Maybe could recurse over children rather than get text content. String s=element.getTextContent();//correct if precondition holds write(s);//no escaping at all close(element); } /** Writes opening tag and the attributes. * If the element has no child nodes, the final ">" of the opening tag * is not written, so that close(e) can write "/>" --- unless the element * is one of the few elements that need a separate closing tag even if they * have no children. * @param e the element * @return Returns this for fluent style. * @throws Exception something wrong */ @Override protected DocumentWriter open(Element e) throws Exception { String tag=e.getTagName(); boolean b=e.hasChildNodes() || needClosingTag(tag); write("<"); write(tag); attributes(e); if(b) write(">"); return this; } /** Writes " key=\"value\". * In value, " is escaped to "&". * If the value is null or empty, writes just the key. * @param key the key * @param value the value * @return Returns this for fluent style. * @throws Exception something wrong */ @Override protected DocumentWriter attribute(String key, String value) throws Exception { write(" "); write(key); if(value!=null && 0"); }else{ write("/>"); } return this; } /** Escapes a string for inclusion in cdata and writes it as CDATA section. * May write several CDATA section. For each "]]>" in the string, adds another cdata section. * @param s the CData string * @return Returns this for fluent style. * @throws IOException something wrong */ @Override protected DocumentWriter cdata(String s) throws Exception{ s=escapeCData(s); write(""); return this; } /** Escape ]]> to ]]>. * * https://www.w3.org/TR/html5/syntax.html#cdata-sections * The text must not contain "]]>". * */ private String escapeCData(String s){ if(s==null)return s; if(!s.contains("]]>")) return s; //Wikipedia recommends to make two cdata sections: return s.replaceAll( "]]>", "]]]]>"); } /** Escape a string for inclusion in a comment and writes it. * @param s the comment as string * @return Returns this for fluent style. * @throws Exception something wrong */ @Override protected DocumentWriter comment(String s)throws Exception{ s=escapeComment(s); write(""); } /** Escapes text for inclusion in a comment. * * https://www.w3.org/TR/html5/syntax.html#comments * * The text must not start with ">", * nor start with "->", * nor contain "--", * nor end in "-", * * Here, only escape "--" to something else. * * @param s */ private String escapeComment(String s){ if(s==null)return s; if(s.startsWith(">") || s.startsWith("->") || s.endsWith("-")) s="[" + s + "]";// improvised fix if(!s.contains("--")) return s; String replacement="-->"; replacement="[escaped double -]"; String t=s.replaceAll("--", replacement); return t; } // methods about details of html output ---------------------------- /** Escapes a String for inserting in HTML elements. * Assumes that the encoding is UTF-8 and * does not escape accents and so on. * * In detail, does: * * * * * *
icescapeElement(c)length(escapeElement(c))
38&&amp;5
60<&lt;4
escaping
* @param s The string to escape. * @return the document writer * @throws IOException something wrong */ protected DocumentWriter writeEscapeElement(String s) throws IOException{ //Often, there will be nothing to escape. Optimise that case. if(s.indexOf('&')==-1 && s.indexOf('<')==-1 && s.indexOf('>')==-1) return write(s); for (int i=0; i': builder.append(">"); break; default: write(c); } } return this; } /** In html5 for common browsers, * does this tag need a separate closing tag, * even if the element is empty? * * This methods return true for a, script, title, textarea. * * @param tag the tag * @return Does it need a closing tag? */ protected boolean needClosingTag(String tag){ switch(tag){ case "a": case "div": case "iframe": case "nav": case "p": case "script": case "textarea": case "title": return true; } return false; } /** Quotes a value so that it can be an attribute's value. * Escapes " by " and encloses in ". * @param value string to quote * @return quoted string */ protected String quote(String value){ //optimise usual case if(contains(value, '"')) value=value.replaceAll("\"", """); return '"' + value + '"'; } /** Does the string contain this character? */ private boolean contains(String value, char c){return 0<=value.indexOf(c);} // debug ---------------------------------------------------------- @Override public String toString(){return w.toString();} }




© 2015 - 2025 Weber Informatics LLC | Privacy Policy