All Downloads are FREE. Search and download functionalities are using the official Maven repository.

org.openbp.common.string.TextUtil Maven / Gradle / Ivy

The newest version!
/*
 *   Licensed under the Apache License, Version 2.0 (the "License");
 *   you may not use this file except in compliance with the License.
 *   You may obtain a copy of the License at
 *
 *       http://www.apache.org/licenses/LICENSE-2.0
 *
 *   Unless required by applicable law or agreed to in writing, software
 *   distributed under the License is distributed on an "AS IS" BASIS,
 *   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 *   See the License for the specific language governing permissions and
 *   limitations under the License.
 */
package org.openbp.common.string;

import java.text.BreakIterator;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;

/**
 * Text utilities.
 *
 * @author Heiko Erhardt
 */
public final class TextUtil
{
	/**
	 * Private constructor prevents instantiation.
	 */
	private TextUtil()
	{
	}

	//////////////////////////////////////////////////
	// @@ Utility methods
	//////////////////////////////////////////////////

	/**
	 * Extracts a summary from an object description.
	 * The summary is the text up to the first point that is followed by a whitespace
	 * or up to the first new line in the string.
	 * Any white spaces in the string will be compressed to a single space.
	 *
	 * @param text Description text
	 * @return The summary string or null if the text is null or empty
	 */
	public static String extractSummary(String text)
	{
		if (text == null)
			return null;
		text = text.trim();
		int len = text.length();
		if (len == 0)
			return null;

		StringBuffer sb = new StringBuffer();

		boolean seenSpace = false;
		for (int i = 0; i < len; ++i)
		{
			char c = text.charAt(i);

			if (c == '\n')
				break;

			if (Character.isWhitespace(c))
			{
				if (!seenSpace)
				{
					sb.append(' ');
					seenSpace = true;
				}
				continue;
			}

			sb.append(c);
			seenSpace = false;

			if (c == '.')
			{
				// Break at the dot if we are at the end of the string or it is followed by a white space.
				if (i + 1 >= len || Character.isWhitespace(text.charAt(i + 1)))
					break;
			}
		}

		return sb.length() != 0 ? sb.toString() : null;
	}

	//////////////////////////////////////////////////
	// @@ Line processing
	//////////////////////////////////////////////////

	/** Trim characters for {@link #breakIntoLines} */
	private static final char [] TRIM_CHARACTERS = new char [] { ' ', '\t', '\r' };

	/**
	 * Breaks a single string into a list of lines.
	 * Every line is limited to a particular length.
	 * Internally, a java.text.BreakIterator is used to break the lines,
	 * maintaining punctaction and hyphenated words.
	 * Newline characters are considered line breaks also (they will be removed
	 * from the resulting lines).
	 *
	 * @param s The string to break
	 * @param trim
	 *  true    Remove leading and trailing spaces and tabulators from each line
* false Do not trim the lines * @param maxLength Maximum length per line or -1 for unlimited line length * @return A list of line strings */ public static List breakIntoLines(final String s, boolean trim, int maxLength) { // List of lines to return; List lines = new ArrayList(); // Check for valid arguments if (s == null) return lines; String restString = s; while (restString != null) { int lineEnd = restString.indexOf('\n'); String line = null; if (lineEnd == -1) { line = restString; restString = null; } else { line = restString.substring(0, lineEnd); if (restString.length() > (lineEnd + 1)) restString = restString.substring(lineEnd + 1); else restString = null; } if (trim) line = StringUtil.trim(line, TRIM_CHARACTERS); // Check if the line is longer than allowed if (maxLength >= 0 && line.length() > maxLength) { BreakIterator iterator = null; // Line is too long, break it into pieces for (;;) { line = line.trim(); if (line.length() == 0) break; if (iterator == null) iterator = BreakIterator.getLineInstance(); iterator.setText(line); int breakPos = 0; for (;;) { int next = iterator.next(); if (next == BreakIterator.DONE) { // No more lines to wrap break; } if (next < maxLength) { // Remember position to break breakPos = next; } else { if (breakPos == 0) { // No remembered position, use this one breakPos = next; } break; } } // Line part to add String singleLine = line.substring(0, breakPos).trim(); if (!(lines.size() == 0 && line.trim().length() == 0)) lines.add(singleLine); line = line.substring(breakPos); } } else { // Avoid the adding if the line is empty and it would be the first line if (!(lines.size() == 0 && line.trim().length() == 0)) lines.add(line); } } // Trim the last lines that do not contain any content for (int i = lines.size(); --i >= 0;) { if (!"".equals(lines.get(i))) break; lines.remove(i); } return lines; } /** * Concatenates several lines into one string. * The lines are separated by a '\n' character. * * @param lineIterator Iterator of String objects * @return The result string or null */ public static String concatLines(Iterator lineIterator) { StringBuffer sb = new StringBuffer(); while (lineIterator.hasNext()) { String line = (String) lineIterator.next(); if (line != null) { if (sb.length() != 0) sb.append('\n'); sb.append(line); } } return sb.length() != 0 ? sb.toString() : null; } ////////////////////////////////////////////////// // @@ Decoding/encoding of multi line strings ////////////////////////////////////////////////// /** * Trims the string and converts all newline and tab character to an escape representation. * * Newlines: "\n"
* Tabs: "\t" * * @param s String to encode * @return The encoded string or null if the string is null or contains white space only */ public static String encodeMultiLineString(String s) { if (s != null) { s = s.trim(); if (s.length() == 0) { s = null; } else if (s.indexOf('\n') >= 0 || s.indexOf('\t') >= 0) { StringBuffer sb = new StringBuffer(); boolean seenSpace = false; int n = s.length(); for (int i = 0; i < n; ++i) { char c = s.charAt(i); if (c == '\n') { sb.append("\\n"); seenSpace = true; } else if (c == '\t') { sb.append("\\t"); seenSpace = true; } else if (c == '\\') { sb.append("\\\\"); } else if (c == ' ') { if (seenSpace) { sb.append("\\s"); } else { sb.append(" "); seenSpace = true; } } else { sb.append(c); seenSpace = false; } } s = sb.toString(); } } return s; } /** * Trims the string and converts all newline and tab escape representations to their character values. * * Newlines: "\n"
* Tabs: "\t" * * @param s String to decode * @return The decoded string or null if the string is null or contains white space only */ public static String decodeMultiLineString(String s) { if (s != null) { s = s.trim(); if (s.length() == 0) { s = null; } else if (s.indexOf('\n') >= 0 || s.indexOf('\\') >= 0) { StringBuffer sb = new StringBuffer(); boolean eatSpace = false; int n = s.length(); for (int i = 0; i < n; ++i) { char c = s.charAt(i); if (c == '\\') { if (i + 1 < n) { char c2 = s.charAt(i + 1); switch (c2) { case 'n': c = '\n'; ++i; eatSpace = true; break; case 't': c = '\t'; ++i; eatSpace = true; break; case 's': c = ' '; ++i; eatSpace = true; break; case '\\': c = '\\'; ++i; break; default: eatSpace = false; } } else { eatSpace = false; } } else if (c == ' ' || c == '\t' || c == '\n') { if (eatSpace) { // Eat white space after newlines continue; } if (c == ' ' || c == '\t' || c == '\n') { c = ' '; eatSpace = true; } } else { eatSpace = false; } sb.append(c); } s = sb.length() != 0 ? sb.toString() : null; } } return s; } ////////////////////////////////////////////////// // @@ HTML text processing ////////////////////////////////////////////////// /** * Converts regular text to an HTML form that can be used for display in browsers or tool tips. * Newlines will be substituted by their HTML counterpart. * * @param paragraphs Array of strings, each one denoting a single paragraph or null * @param boldTitle * true Treat the first paragraph as title and print it bold.
* false No special handling for the first paragraph * @param separatorPos Position for the separator line (-1 if no separator should be used, * 0 for separator after paragraphs[0] etc. * @param maxLength Maximum length per line or -1 for unlimited line length * @return The HTML text or null */ public static String convertToHTML(String [] paragraphs, boolean boldTitle, int separatorPos, int maxLength) { if (paragraphs == null || paragraphs.length == 0) return null; StringBuffer output = new StringBuffer(); output.append(""); boolean lineAdded = false; for (int i = 0; i < paragraphs.length; ++i) { if (paragraphs [i] == null) continue; if (lineAdded) { output.append("
"); } if (i == separatorPos + 1 && lineAdded) { // Create separator line output.append("
"); } if (i == 0 && boldTitle) { output.append(""); } appendHTMLLine(paragraphs [i], output, maxLength); lineAdded = true; if (i == 0 && boldTitle) { output.append(""); } } output.append(""); return output.toString(); } /** * Appends the given text as HTML line to the output. * * @param line Line to append * @param output Output string buffer * @param maxLength Maximum length per line or -1 for unlimited line length */ public static void appendHTMLLine(String line, StringBuffer output, int maxLength) { List lines = breakIntoLines(line, false, maxLength); int n = lines.size(); for (int i = 0; i < n; ++i) { if (i > 0) output.append("
"); output.append((String) lines.get(i)); } } }




© 2015 - 2024 Weber Informatics LLC | Privacy Policy