All Downloads are FREE. Search and download functionalities are using the official Maven repository.

au.id.jericho.lib.html.Indent Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a java library allowing analysis and manipulation of parts of an HTML document, including server-side tags, while reproducing verbatim any unrecognised or invalid HTML.

There is a newer version: 3.4
Show newest version
// Jericho HTML Parser - Java based library for analysing and manipulating HTML
// Version 2.3
// Copyright (C) 2006 Martin Jericho
// http://sourceforge.net/projects/jerichohtml/
//
// This library is free software; you can redistribute it and/or
// modify it under the terms of the GNU Lesser General Public
// License as published by the Free Software Foundation; either
// version 2.1 of the License, or (at your option) any later version.
// http://www.gnu.org/copyleft/lesser.html
//
// This library is distributed in the hope that it will be useful,
// but WITHOUT ANY WARRANTY; without even the implied warranty of
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
// Lesser General Public License for more details.
//
// You should have received a copy of the GNU Lesser General Public
// License along with this library; if not, write to the Free Software
// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA

package au.id.jericho.lib.html;

import java.util.*;
import java.io.*;
import java.net.*;

/**
 * This is an internal class for encapsulating the HTML indenting functionality.
 */
final class Indent implements CharStreamSource {
	private final Segment segment;
	private final CharSequence sourceText;
	private final String indentText;
	private final boolean tidyTags;
	private final boolean collapseWhiteSpace;
	private final boolean indentAllElements;
	private final boolean indentScriptElements;
	private Writer writer;
	
	private Tag nextTag;
	private int index;

	public Indent(final Segment segment, final String indentText, final boolean tidyTags, final boolean collapseWhiteSpace, final boolean indentAllElements) {
		this.segment=segment;
		sourceText=segment.source.toString();
		this.indentText=indentText;
		this.tidyTags=tidyTags;
		this.collapseWhiteSpace=collapseWhiteSpace;
		this.indentAllElements=indentAllElements;
		this.indentScriptElements=indentAllElements; // SCRIPT elements need to be inline to keep functional equivalency of output
	}

	public void writeTo(final Writer writer) throws IOException {
		this.writer=writer;
		nextTag=segment.source.findNextTag(segment.begin);
		index=segment.begin;
		writeContent(segment.end,segment.getChildElements(),0);
		writer.flush();
	}

	public long getEstimatedMaximumOutputLength() {
		return sourceText.length()*2;
	}

	private void writeContent(final int end, final List childElements, final int depth) throws IOException {
		// sets index to end
		for (final Iterator i=childElements.iterator(); i.hasNext();) {
			final Element element=(Element)i.next();
			final int elementBegin=element.begin;
			if (elementBegin>=end) break;
			if (indentAllElements) {
				writeText(elementBegin,depth,false,false,false,collapseWhiteSpace);
				writeElement(element,depth,end,false,false);
			} else {
				final String elementName=element.getName();
				if (!indent(element)) continue;
				writeText(elementBegin,depth,false,false,false,collapseWhiteSpace);
				if (elementName==HTMLElementName.PRE || elementName==HTMLElementName.TEXTAREA) {
					writeElement(element,depth,end,true,true);
				} else if (elementName==HTMLElementName.SCRIPT) {
					writeElement(element,depth,end,true,false);
				} else {
					writeElement(element,depth,end,false,!containsNonInlineLevelChildElements(element));
				}
			}
		}
		writeText(end,depth,false,false,false,collapseWhiteSpace);
	}

	private boolean indent(final Element element) {
		final StartTagType startTagType=element.getStartTag().getStartTagType();
		if (startTagType==StartTagType.DOCTYPE_DECLARATION) return true;
		if (startTagType!=StartTagType.NORMAL) return false;
		final String elementName=element.getName();
		if (elementName==HTMLElementName.SCRIPT) return indentScriptElements;
		if (!HTMLElements.getInlineLevelElementNames().contains(elementName)) return true;
		return containsNonInlineLevelChildElements(element);
	}

	private void writeText(final int end, int depth, final boolean beginInline, final boolean endInline, final boolean increaseIndentAfterFirstLineBreak, final boolean collapseWhiteSpace) throws IOException {
		// sets index to end
		if (index==end) return;
		while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return; // trim whitespace.
		if (!beginInline) writeIndent(depth);
		writeTextInline(end,depth,increaseIndentAfterFirstLineBreak,collapseWhiteSpace);
		if (!endInline) writer.write('\n');
	}

	private void writeElement(final Element element, final int depth, final int end, final boolean preformatted, boolean renderContentInline) throws IOException {
		// sets index to minimum of element.end or end
		final StartTag startTag=element.getStartTag();
		final EndTag endTag=element.getEndTag();
		writeIndent(depth);
		writeTag(startTag,depth,end);
		if (index==end) {
			writer.write('\n');
			return;
		}
		if (!renderContentInline) writer.write('\n');
		int contentEnd=element.getContentEnd();
		if (endendTag.begin) {
			if (!renderContentInline) writeIndent(depth);
			// assert index=endTag.begin
			writeTag(endTag,depth,end);
			writer.write('\n');
		} else if (renderContentInline) {
			writer.write('\n');
		}
	}

	private void updateNextTag() {
		// ensures that nextTag is up to date
		while (nextTag!=null) {
			if (nextTag.begin>=index) return;
			nextTag=nextTag.findNextTag();
		}
	}

	private void writeIndentedScriptContent(final int end, final int depth) throws IOException {
		// sets index to end
		if (index==end) return;
		int startOfLinePos=getStartOfLinePos(end,false);
		if (index==end) return;
		if (startOfLinePos==-1) {
			// Script started on same line as start tag.  Use the start of the next line to determine the original indent.
			writeIndent(depth);
			writeLineKeepWhiteSpace(end,depth);
			writer.write('\n');
			if (index==end) return;
			startOfLinePos=getStartOfLinePos(end,true);
			if (index==end) return;
		}
		writeTextPreserveIndenting(end,depth,index-startOfLinePos);
		writer.write('\n');
	}

	private boolean writeTextPreserveIndenting(final int end, final int depth) throws IOException {
		// sets index to end
		// returns true if all text was on one line, otherwise false
		// assert index==tag.begin;
		// end is normally tag.end, but in rare cases may be < tag.end
		// Use the start of the next line to determine the original indent.
		writeLineKeepWhiteSpace(end,depth);
		if (index==end) return true;
		int startOfLinePos=getStartOfLinePos(end,true);
		if (index==end) return true;
		writer.write('\n');
		writeTextPreserveIndenting(end,depth+1,index-startOfLinePos);
		return false;
	}

	private void writeTextPreserveIndenting(final int end, final int depth, final int originalIndentLength) throws IOException {
		// sets index to end
		writeIndent(depth);
		writeLineKeepWhiteSpace(end,depth);
		while (index!=end) {
			// Skip over the original indent:
			for (int x=0; x=textLength) return; // trim whitespace.
				writer.write('\n');
				writeIndent(subsequentLineDepth);
				i=writeSpecifiedLine(text,i);
			} while (i=textLength) return i;
		}
	}

	private boolean writeTextInline(final int end, int depth, final boolean increaseIndentAfterFirstLineBreak, final boolean collapseWhiteSpace) throws IOException {
		// returns true if all text was on one line, otherwise false
		// sets index to end
		if (index==end) return true;
		writeLine(end,depth,collapseWhiteSpace);
		if (index==end) return true;
		final int subsequentLineDepth=increaseIndentAfterFirstLineBreak ? depth+1 : depth;
		do {
			while (Segment.isWhiteSpace(sourceText.charAt(index))) if (++index==end) return false; // trim whitespace.
			writer.write('\n');
			writeIndent(subsequentLineDepth);
			writeLine(end,subsequentLineDepth,collapseWhiteSpace);
		} while (indextag.end) ? tag.end : end;
		if (tag.getTagType()==StartTagType.COMMENT || tag.getTagType()==StartTagType.CDATA_SECTION) {
			writeTextPreserveIndenting(tagEnd,depth);
		} else if (tidyTags) {
			final String tidyTag=tag.tidy();
			if ((tag instanceof StartTag) && ((StartTag)tag).getAttributes()!=null)
				writer.write(tidyTag);
			else
				writeSpecifiedTextInline(tidyTag,depth);
			index=tagEnd;
		} else {
			writeTextInline(tagEnd,depth,true,false);
		}
		if (end<=tag.end || !(tag instanceof StartTag)) return;
		if ((tag.name==HTMLElementName.SCRIPT && !indentScriptElements) || tag.getTagType().isServerTag()) {
			// this is a server start tag, we may need to write the whole server element:
			final Element element=tag.getElement();
			final EndTag endTag=element.getEndTag();
			if (endTag==null) return;
			final int contentEnd=(end=end) return;
			if (!singleLineContent) {
				writer.write('\n');
				writeIndent(depth);
			}
			// assert index==endTag.begin
			writeTag(endTag,depth,end);
		}
	}
	
  private void writeIndent(final int depth) throws IOException {
		for (int x=0; x




© 2015 - 2025 Weber Informatics LLC | Privacy Policy