All Downloads are FREE. Search and download functionalities are using the official Maven repository.

src.au.id.jericho.lib.html.OutputDocument Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

There is a newer version: 2.3
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 1.5
// Copyright (C) 2004 Martin Jericho
// http://jerichohtml.sourceforge.net/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package au.id.jericho.lib.html;

import java.io.*;
import java.util.*;

/**
 * Represents a modified version of an original source text.
 * 

* An OutputDocument represents an original source text that * has been modified by substituting segments of it with other text. * Each of these substitutions is registered by adding an {@link IOutputSegment} to the OutputDocument. * After all of the substitutions have been added, the modified text can be retrieved using the * {@link #output(Writer)} or {@link #toString()} methods. *

* The registered OutputSegments must not overlap each other, but may be adjacent. *

* The following example converts all externally referenced style sheets to internal style sheets: *

 *  OutputDocument outputDocument=new OutputDocument(htmlText);
 *  Source source=new Source(htmlText);
 *  StringBuffer sb=new StringBuffer();
 *  List linkStartTags=source.findAllStartTags(Tag.LINK);
 *  for (Iterator i=linkStartTags.iterator(); i.hasNext();) {
 *    StartTag startTag=(StartTag)i.next();
 *    Attributes attributes=startTag.getAttributes();
 *    String rel=attributes.getValue("rel");
 *    if (!"stylesheet".equalsIgnoreCase(rel)) continue;
 *    String href=attributes.getValue("href");
 *    if (href==null) continue;
 *    String styleSheetContent;
 *    try {
 *      styleSheetContent=CommonTools.getString(new URL(href).openStream()); // note CommonTools.getString method is not defined here
 *    } catch (Exception ex) {
 *      continue; // don't convert if URL is invalid
 *    }
 *    sb.setLength(0);
 *    sb.append("<style");
 *    Attribute typeAttribute=attributes.get("type");
 *    if (typeAttribute!=null) sb.append(' ').append(typeAttribute);
 *    sb.append(">\n").append(styleSheetContent).append("\n</style>");
 *    outputDocument.add(new StringOutputSegment(startTag,sb.toString()));
 *  }
 *  String convertedHtmlText=outputDocument.toString();
 * 
* * @see IOutputSegment * @see StringOutputSegment */ public final class OutputDocument { private CharSequence sourceText; private ArrayList outputSegments=new ArrayList(); /** * Constructs a new OutputDocument based on the specified source text. *

* Note that a {@link Source} object can be passed directly as an argument to this constructor * as it implements the CharSequence interface. * * @param sourceText the source text. */ public OutputDocument(CharSequence sourceText) { if (sourceText==null) throw new IllegalArgumentException(); this.sourceText=sourceText; } /** * Returns the original source text upon which this OutputDocument is based. * @return the original source text upon which this OutputDocument is based. */ public CharSequence getSourceText() { return sourceText; } /** * Adds the specified {@linkplain IOutputSegment output segment} to this OutputDocument. *

* Note that for efficiency reasons no exception is thrown if the added output segment overlaps another, * however in this case an {@link OverlappingOutputSegmentsException} will be thrown when the output is generated. */ public void add(IOutputSegment outputSegment) { outputSegments.add(outputSegment); } /** * *************************** */ public void add(FormControl formControl) { formControl.addToOutputDocument(this); } /** * *************************** */ public void add(FormFields formFields) { formFields.addToOutputDocument(this); } /** * Outputs the final content of this OutputDocument to the specified Writer. *

* An {@link OverlappingOutputSegmentsException} is thrown if any of the output segments overlap. * For efficiency reasons this condition is not caught when the offending output segment is added. * * @throws IOException if an I/O exception occurs. * @throws OverlappingOutputSegmentsException if any of the output segments overlap. */ public void output(Writer writer) throws IOException { if (outputSegments.isEmpty()) { Util.appendTo(writer,sourceText); return; } int pos=0; Collections.sort(outputSegments,IOutputSegment.COMPARATOR); IOutputSegment lastOutputSegment=null; for (Iterator i=outputSegments.iterator(); i.hasNext();) { IOutputSegment outputSegment=(IOutputSegment)i.next(); if (outputSegment==lastOutputSegment) continue; // silently ignore duplicate output segment if (outputSegment.getBegin()pos) Util.appendTo(writer,sourceText,pos,outputSegment.getBegin()); outputSegment.output(writer); lastOutputSegment=outputSegment; pos=outputSegment.getEnd(); } if (posOutputDocument as a String. * @return the final content of this OutputDocument as a String. * @throws OverlappingOutputSegmentsException if any of the output segments overlap. */ public String toString() { StringWriter writer=new StringWriter((int)(sourceText.length()*1.5)); try { output(writer); } catch (IOException ex) {throw new RuntimeException(ex);} // should never happen with StringWriter return writer.toString(); } }





© 2015 - 2024 Weber Informatics LLC | Privacy Policy