All Downloads are FREE. Search and download functionalities are using the official Maven repository.

samples.src.SplitLongLines Maven / Gradle / Ivy

Go to download

Jericho HTML Parser is a simple but powerful java library allowing analysis and manipulation of parts of an HTML document, including some common server-side tags, while reproducing verbatim any unrecognised or invalid HTML. It also provides high-level HTML form manipulation functions.

There is a newer version: 2.3
Show newest version
import au.id.jericho.lib.html.*;
import java.util.*;
import java.io.*;
import java.net.*;

public class SplitLongLines {
	private static final int MAX_LENGTH=70;

	private static int col;

	public static void main(String[] args) throws Exception {
		String sourceUrlString="../doc/index.html";
		if (args.length==0)
		  System.err.println("Using default argument of \""+sourceUrlString+'"');
		else
			sourceUrlString=args[0];
		if (sourceUrlString.indexOf(':')==-1) sourceUrlString="file:"+sourceUrlString;
		URL sourceUrl=new URL(sourceUrlString);
		InputStream in=null;
		try {
			in=sourceUrl.openStream();
			BufferedReader reader=new BufferedReader(new InputStreamReader(in));
			String line;
			while ((line=reader.readLine())!=null) {
				if (line.length()<=MAX_LENGTH) {
					println(line);
					continue;
				}
				line=line.trim();
				if (line.length()<=MAX_LENGTH) {
					println(line);
					continue;
				}
				Source source=new Source(line);
				int pos=0;
				Iterator i=source.getNextTagIterator(0);
				while (i.hasNext()) {
					Tag tag=(Tag)i.next();
					if (pos!=tag.getBegin()) print(line.subSequence(pos,tag.getBegin())); // print the text between this tag and the last
					printTag(tag,line);
					pos=tag.getEnd();
				}
				if (pos!=line.length()) print(line.subSequence(pos,line.length())); // print the text between the last tag and the end of line
				println();
			}
		} finally {
			if (in!=null) in.close();
		}
  }

	private static void println() {
		System.out.println();
		col=0;
	}

	private static void println(CharSequence text) {
		System.out.println(text);
		col=0;
	}

	private static void print(CharSequence text) {
		print(text,true);
	}

	private static void print(CharSequence text, boolean splitLongText) {
		if (splitLongText && text.length()>MAX_LENGTH) {
			String[] words=text.toString().split("\\s");
			for (int i=0; i0 && col+text.length()>MAX_LENGTH) println();
		System.out.print(text);
		col+=text.length();
	}

	private static void printTag(Tag tag, String line) {
		if (tag.length()<=MAX_LENGTH || tag instanceof EndTag) {
			print(tag);
			return;
		}
		StartTag startTag=(StartTag)tag;
		Attributes attributes=startTag.getAttributes();
		if (attributes!=null) {
			print(line.substring(startTag.getBegin(),attributes.getBegin()));
			for (Iterator j=attributes.iterator(); j.hasNext();) {
				Attribute attribute=(Attribute)j.next();
				print(" ");
				print(attribute);
			}
			print(line.substring(attributes.getEnd(),startTag.getEnd()));
		} else {
			print(startTag);
		}
	}
}




© 2015 - 2024 Weber Informatics LLC | Privacy Policy